[llvm] [AMDGPU] Change SGPR layout to striped caller/callee saved (PR #127353)
Shilei Tian via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 18 09:49:04 PST 2025
https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/127353
>From 50bdd130ec56864afccdd99b405f28249d148dbe Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Tue, 18 Feb 2025 11:50:12 -0500
Subject: [PATCH 1/2] [AMDGPU] Change SGPR layout to striped caller/callee
saved
This PR updates the SGPR layout to a striped caller/callee-saved design, similar
to the VGPR layout. The stripe width is set to 8.
Fixes #113782.
---
llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td | 6 +-
.../amdgpu-simplify-libcall-pow-codegen.ll | 290 +-
llvm/test/CodeGen/AMDGPU/bf16.ll | 335 +-
...der-no-live-segment-at-def-implicit-def.ll | 42 +-
.../branch-folding-implicit-def-subreg.ll | 404 +-
.../test/CodeGen/AMDGPU/branch-relax-spill.ll | 213 +-
...l-args-inreg-no-sgpr-for-csrspill-xfail.ll | 4 +-
llvm/test/CodeGen/AMDGPU/call-args-inreg.ll | 12 +-
.../CodeGen/AMDGPU/call-argument-types.ll | 2512 +++++------
.../AMDGPU/call-preserved-registers.ll | 34 +-
.../test/CodeGen/AMDGPU/callee-frame-setup.ll | 2337 ++++-------
.../AMDGPU/csr-sgpr-spill-live-ins.mir | 10 +-
llvm/test/CodeGen/AMDGPU/ds_read2.ll | 36 +-
.../AMDGPU/dwarf-multi-register-use-crash.ll | 72 +-
.../eliminate-frame-index-s-mov-b32.mir | 53 +-
.../CodeGen/AMDGPU/function-args-inreg.ll | 4 +-
.../CodeGen/AMDGPU/function-resource-usage.ll | 10 +-
.../CodeGen/AMDGPU/gfx-call-non-gfx-func.ll | 68 +-
.../AMDGPU/gfx-callable-argument-types.ll | 288 +-
.../AMDGPU/global_atomics_scan_fadd.ll | 3668 ++++++++---------
.../AMDGPU/global_atomics_scan_fmax.ll | 3108 +++++++-------
.../AMDGPU/global_atomics_scan_fmin.ll | 3108 +++++++-------
.../AMDGPU/global_atomics_scan_fsub.ll | 3668 ++++++++---------
.../greedy-alloc-fail-sgpr1024-spill.mir | 126 +-
.../identical-subrange-spill-infloop.ll | 146 +-
llvm/test/CodeGen/AMDGPU/indirect-call.ll | 1240 +++---
llvm/test/CodeGen/AMDGPU/issue48473.mir | 2 +-
.../llvm.amdgcn.pops.exiting.wave.id.ll | 48 +-
llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll | 45 +-
llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll | 81 +-
llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll | 45 +-
llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll | 81 +-
.../lower-work-group-id-intrinsics-hsa.ll | 64 +-
...ne-sink-temporal-divergence-swdev407790.ll | 320 +-
.../materialize-frame-index-sgpr.gfx10.ll | 842 +---
.../AMDGPU/materialize-frame-index-sgpr.ll | 1511 ++-----
...-knownbits-assign-crash-gh-issue-110930.ll | 26 +-
.../AMDGPU/pei-scavenge-sgpr-carry-out.mir | 86 +-
.../CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir | 56 +-
.../test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir | 30 +-
.../AMDGPU/promote-constOffset-to-imm.ll | 446 +-
.../ran-out-of-sgprs-allocation-failure.mir | 206 +-
.../AMDGPU/schedule-amdgpu-tracker-physreg.ll | 8 +-
llvm/test/CodeGen/AMDGPU/select.f16.ll | 15 +-
.../sgpr-spill-update-only-slot-indexes.ll | 16 +-
.../AMDGPU/shufflevector.v2i64.v8i64.ll | 2240 +++-------
llvm/test/CodeGen/AMDGPU/sibling-call.ll | 240 +-
.../AMDGPU/snippet-copy-bundle-regression.mir | 55 +-
.../AMDGPU/spill-sgpr-to-virtual-vgpr.mir | 38 +-
.../AMDGPU/spill-sgpr-used-for-exec-copy.mir | 11 +-
.../spill_more_than_wavesize_csr_sgprs.ll | 396 +-
.../CodeGen/AMDGPU/splitkit-copy-bundle.mir | 200 +-
...tack-pointer-offset-relative-frameindex.ll | 22 +-
llvm/test/CodeGen/AMDGPU/stack-realign.ll | 20 +-
.../AMDGPU/tuple-allocation-failure.ll | 333 +-
.../unallocatable-bundle-regression.mir | 22 +-
.../AMDGPU/unstructured-cfg-def-use-issue.ll | 212 +-
.../CodeGen/AMDGPU/use_restore_frame_reg.mir | 76 +-
.../AMDGPU/vgpr-large-tuple-alloc-error.ll | 352 +-
.../CodeGen/MIR/AMDGPU/spill-phys-vgprs.mir | 3 +-
60 files changed, 12951 insertions(+), 16991 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
index 80969fce3d77f..e3861a7d06c3d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
@@ -91,7 +91,11 @@ def CSR_AMDGPU_AGPRs : CalleeSavedRegs<
>;
def CSR_AMDGPU_SGPRs : CalleeSavedRegs<
- (sequence "SGPR%u", 30, 105)
+ (add (sequence "SGPR%u", 30, 37),
+ (sequence "SGPR%u", 46, 53),
+ (sequence "SGPR%u", 62, 69),
+ (sequence "SGPR%u", 78, 85),
+ (sequence "SGPR%u", 94, 105))
>;
def CSR_AMDGPU_SI_Gfx_SGPRs : CalleeSavedRegs<
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll
index ab2363860af9d..905d0deacab35 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll
@@ -125,35 +125,35 @@ define double @test_pow_fast_f64__integral_y(double %x, i32 %y.i) {
; CHECK-NEXT: v_writelane_b32 v43, s35, 3
; CHECK-NEXT: v_writelane_b32 v43, s36, 4
; CHECK-NEXT: v_writelane_b32 v43, s37, 5
-; CHECK-NEXT: v_writelane_b32 v43, s38, 6
-; CHECK-NEXT: v_writelane_b32 v43, s39, 7
+; CHECK-NEXT: v_writelane_b32 v43, s46, 6
+; CHECK-NEXT: v_writelane_b32 v43, s47, 7
; CHECK-NEXT: s_addk_i32 s32, 0x800
-; CHECK-NEXT: v_writelane_b32 v43, s40, 8
-; CHECK-NEXT: v_writelane_b32 v43, s41, 9
-; CHECK-NEXT: s_mov_b64 s[40:41], s[4:5]
+; CHECK-NEXT: v_writelane_b32 v43, s48, 8
+; CHECK-NEXT: v_writelane_b32 v43, s49, 9
+; CHECK-NEXT: s_mov_b64 s[48:49], s[4:5]
; CHECK-NEXT: s_getpc_b64 s[4:5]
; CHECK-NEXT: s_add_u32 s4, s4, _Z4log2d at gotpcrel32@lo+4
; CHECK-NEXT: s_addc_u32 s5, s5, _Z4log2d at gotpcrel32@hi+12
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
-; CHECK-NEXT: v_writelane_b32 v43, s42, 10
+; CHECK-NEXT: v_writelane_b32 v43, s50, 10
; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
-; CHECK-NEXT: v_writelane_b32 v43, s43, 11
+; CHECK-NEXT: v_writelane_b32 v43, s51, 11
; CHECK-NEXT: v_mov_b32_e32 v42, v1
-; CHECK-NEXT: v_writelane_b32 v43, s44, 12
+; CHECK-NEXT: v_writelane_b32 v43, s52, 12
; CHECK-NEXT: v_and_b32_e32 v1, 0x7fffffff, v42
-; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
-; CHECK-NEXT: v_writelane_b32 v43, s45, 13
+; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
+; CHECK-NEXT: v_writelane_b32 v43, s53, 13
; CHECK-NEXT: v_mov_b32_e32 v40, v31
; CHECK-NEXT: v_mov_b32_e32 v41, v2
-; CHECK-NEXT: s_mov_b32 s42, s15
-; CHECK-NEXT: s_mov_b32 s43, s14
-; CHECK-NEXT: s_mov_b32 s44, s13
-; CHECK-NEXT: s_mov_b32 s45, s12
+; CHECK-NEXT: s_mov_b32 s50, s15
+; CHECK-NEXT: s_mov_b32 s51, s14
+; CHECK-NEXT: s_mov_b32 s52, s13
+; CHECK-NEXT: s_mov_b32 s53, s12
; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11]
; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9]
-; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
+; CHECK-NEXT: s_mov_b64 s[46:47], s[6:7]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_cvt_f64_i32_e32 v[2:3], v41
@@ -161,15 +161,15 @@ define double @test_pow_fast_f64__integral_y(double %x, i32 %y.i) {
; CHECK-NEXT: s_add_u32 s4, s4, _Z4exp2d at gotpcrel32@lo+4
; CHECK-NEXT: s_addc_u32 s5, s5, _Z4exp2d at gotpcrel32@hi+12
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
-; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
+; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
; CHECK-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
-; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
+; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47]
; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37]
; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
-; CHECK-NEXT: s_mov_b32 s12, s45
-; CHECK-NEXT: s_mov_b32 s13, s44
-; CHECK-NEXT: s_mov_b32 s14, s43
-; CHECK-NEXT: s_mov_b32 s15, s42
+; CHECK-NEXT: s_mov_b32 s12, s53
+; CHECK-NEXT: s_mov_b32 s13, s52
+; CHECK-NEXT: s_mov_b32 s14, s51
+; CHECK-NEXT: s_mov_b32 s15, s50
; CHECK-NEXT: v_mov_b32_e32 v31, v40
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -179,14 +179,14 @@ define double @test_pow_fast_f64__integral_y(double %x, i32 %y.i) {
; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
; CHECK-NEXT: v_or_b32_e32 v1, v2, v1
-; CHECK-NEXT: v_readlane_b32 s45, v43, 13
-; CHECK-NEXT: v_readlane_b32 s44, v43, 12
-; CHECK-NEXT: v_readlane_b32 s43, v43, 11
-; CHECK-NEXT: v_readlane_b32 s42, v43, 10
-; CHECK-NEXT: v_readlane_b32 s41, v43, 9
-; CHECK-NEXT: v_readlane_b32 s40, v43, 8
-; CHECK-NEXT: v_readlane_b32 s39, v43, 7
-; CHECK-NEXT: v_readlane_b32 s38, v43, 6
+; CHECK-NEXT: v_readlane_b32 s53, v43, 13
+; CHECK-NEXT: v_readlane_b32 s52, v43, 12
+; CHECK-NEXT: v_readlane_b32 s51, v43, 11
+; CHECK-NEXT: v_readlane_b32 s50, v43, 10
+; CHECK-NEXT: v_readlane_b32 s49, v43, 9
+; CHECK-NEXT: v_readlane_b32 s48, v43, 8
+; CHECK-NEXT: v_readlane_b32 s47, v43, 7
+; CHECK-NEXT: v_readlane_b32 s46, v43, 6
; CHECK-NEXT: v_readlane_b32 s37, v43, 5
; CHECK-NEXT: v_readlane_b32 s36, v43, 4
; CHECK-NEXT: v_readlane_b32 s35, v43, 3
@@ -266,34 +266,34 @@ define double @test_powr_fast_f64(double %x, double %y) {
; CHECK-NEXT: v_writelane_b32 v43, s35, 3
; CHECK-NEXT: v_writelane_b32 v43, s36, 4
; CHECK-NEXT: v_writelane_b32 v43, s37, 5
-; CHECK-NEXT: v_writelane_b32 v43, s38, 6
-; CHECK-NEXT: v_writelane_b32 v43, s39, 7
+; CHECK-NEXT: v_writelane_b32 v43, s46, 6
+; CHECK-NEXT: v_writelane_b32 v43, s47, 7
; CHECK-NEXT: s_addk_i32 s32, 0x800
-; CHECK-NEXT: v_writelane_b32 v43, s40, 8
-; CHECK-NEXT: v_writelane_b32 v43, s41, 9
-; CHECK-NEXT: s_mov_b64 s[40:41], s[4:5]
+; CHECK-NEXT: v_writelane_b32 v43, s48, 8
+; CHECK-NEXT: v_writelane_b32 v43, s49, 9
+; CHECK-NEXT: s_mov_b64 s[48:49], s[4:5]
; CHECK-NEXT: s_getpc_b64 s[4:5]
; CHECK-NEXT: s_add_u32 s4, s4, _Z4log2d at gotpcrel32@lo+4
; CHECK-NEXT: s_addc_u32 s5, s5, _Z4log2d at gotpcrel32@hi+12
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
-; CHECK-NEXT: v_writelane_b32 v43, s42, 10
-; CHECK-NEXT: v_writelane_b32 v43, s43, 11
-; CHECK-NEXT: v_writelane_b32 v43, s44, 12
-; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
+; CHECK-NEXT: v_writelane_b32 v43, s50, 10
+; CHECK-NEXT: v_writelane_b32 v43, s51, 11
+; CHECK-NEXT: v_writelane_b32 v43, s52, 12
+; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
-; CHECK-NEXT: v_writelane_b32 v43, s45, 13
+; CHECK-NEXT: v_writelane_b32 v43, s53, 13
; CHECK-NEXT: v_mov_b32_e32 v42, v31
; CHECK-NEXT: v_mov_b32_e32 v41, v3
; CHECK-NEXT: v_mov_b32_e32 v40, v2
-; CHECK-NEXT: s_mov_b32 s42, s15
-; CHECK-NEXT: s_mov_b32 s43, s14
-; CHECK-NEXT: s_mov_b32 s44, s13
-; CHECK-NEXT: s_mov_b32 s45, s12
+; CHECK-NEXT: s_mov_b32 s50, s15
+; CHECK-NEXT: s_mov_b32 s51, s14
+; CHECK-NEXT: s_mov_b32 s52, s13
+; CHECK-NEXT: s_mov_b32 s53, s12
; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11]
; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9]
-; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
+; CHECK-NEXT: s_mov_b64 s[46:47], s[6:7]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_mul_f64 v[0:1], v[40:41], v[0:1]
@@ -301,28 +301,28 @@ define double @test_powr_fast_f64(double %x, double %y) {
; CHECK-NEXT: s_add_u32 s4, s4, _Z4exp2d at gotpcrel32@lo+4
; CHECK-NEXT: s_addc_u32 s5, s5, _Z4exp2d at gotpcrel32@hi+12
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
-; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
-; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
+; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
+; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47]
; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37]
; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
-; CHECK-NEXT: s_mov_b32 s12, s45
-; CHECK-NEXT: s_mov_b32 s13, s44
-; CHECK-NEXT: s_mov_b32 s14, s43
-; CHECK-NEXT: s_mov_b32 s15, s42
+; CHECK-NEXT: s_mov_b32 s12, s53
+; CHECK-NEXT: s_mov_b32 s13, s52
+; CHECK-NEXT: s_mov_b32 s14, s51
+; CHECK-NEXT: s_mov_b32 s15, s50
; CHECK-NEXT: v_mov_b32_e32 v31, v42
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
-; CHECK-NEXT: v_readlane_b32 s45, v43, 13
-; CHECK-NEXT: v_readlane_b32 s44, v43, 12
-; CHECK-NEXT: v_readlane_b32 s43, v43, 11
-; CHECK-NEXT: v_readlane_b32 s42, v43, 10
-; CHECK-NEXT: v_readlane_b32 s41, v43, 9
-; CHECK-NEXT: v_readlane_b32 s40, v43, 8
-; CHECK-NEXT: v_readlane_b32 s39, v43, 7
-; CHECK-NEXT: v_readlane_b32 s38, v43, 6
+; CHECK-NEXT: v_readlane_b32 s53, v43, 13
+; CHECK-NEXT: v_readlane_b32 s52, v43, 12
+; CHECK-NEXT: v_readlane_b32 s51, v43, 11
+; CHECK-NEXT: v_readlane_b32 s50, v43, 10
+; CHECK-NEXT: v_readlane_b32 s49, v43, 9
+; CHECK-NEXT: v_readlane_b32 s48, v43, 8
+; CHECK-NEXT: v_readlane_b32 s47, v43, 7
+; CHECK-NEXT: v_readlane_b32 s46, v43, 6
; CHECK-NEXT: v_readlane_b32 s37, v43, 5
; CHECK-NEXT: v_readlane_b32 s36, v43, 4
; CHECK-NEXT: v_readlane_b32 s35, v43, 3
@@ -409,35 +409,35 @@ define double @test_pown_fast_f64(double %x, i32 %y) {
; CHECK-NEXT: v_writelane_b32 v43, s35, 3
; CHECK-NEXT: v_writelane_b32 v43, s36, 4
; CHECK-NEXT: v_writelane_b32 v43, s37, 5
-; CHECK-NEXT: v_writelane_b32 v43, s38, 6
-; CHECK-NEXT: v_writelane_b32 v43, s39, 7
+; CHECK-NEXT: v_writelane_b32 v43, s46, 6
+; CHECK-NEXT: v_writelane_b32 v43, s47, 7
; CHECK-NEXT: s_addk_i32 s32, 0x800
-; CHECK-NEXT: v_writelane_b32 v43, s40, 8
-; CHECK-NEXT: v_writelane_b32 v43, s41, 9
-; CHECK-NEXT: s_mov_b64 s[40:41], s[4:5]
+; CHECK-NEXT: v_writelane_b32 v43, s48, 8
+; CHECK-NEXT: v_writelane_b32 v43, s49, 9
+; CHECK-NEXT: s_mov_b64 s[48:49], s[4:5]
; CHECK-NEXT: s_getpc_b64 s[4:5]
; CHECK-NEXT: s_add_u32 s4, s4, _Z4log2d at gotpcrel32@lo+4
; CHECK-NEXT: s_addc_u32 s5, s5, _Z4log2d at gotpcrel32@hi+12
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
-; CHECK-NEXT: v_writelane_b32 v43, s42, 10
+; CHECK-NEXT: v_writelane_b32 v43, s50, 10
; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
-; CHECK-NEXT: v_writelane_b32 v43, s43, 11
+; CHECK-NEXT: v_writelane_b32 v43, s51, 11
; CHECK-NEXT: v_mov_b32_e32 v42, v1
-; CHECK-NEXT: v_writelane_b32 v43, s44, 12
+; CHECK-NEXT: v_writelane_b32 v43, s52, 12
; CHECK-NEXT: v_and_b32_e32 v1, 0x7fffffff, v42
-; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
-; CHECK-NEXT: v_writelane_b32 v43, s45, 13
+; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
+; CHECK-NEXT: v_writelane_b32 v43, s53, 13
; CHECK-NEXT: v_mov_b32_e32 v40, v31
; CHECK-NEXT: v_mov_b32_e32 v41, v2
-; CHECK-NEXT: s_mov_b32 s42, s15
-; CHECK-NEXT: s_mov_b32 s43, s14
-; CHECK-NEXT: s_mov_b32 s44, s13
-; CHECK-NEXT: s_mov_b32 s45, s12
+; CHECK-NEXT: s_mov_b32 s50, s15
+; CHECK-NEXT: s_mov_b32 s51, s14
+; CHECK-NEXT: s_mov_b32 s52, s13
+; CHECK-NEXT: s_mov_b32 s53, s12
; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11]
; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9]
-; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
+; CHECK-NEXT: s_mov_b64 s[46:47], s[6:7]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_cvt_f64_i32_e32 v[2:3], v41
@@ -445,15 +445,15 @@ define double @test_pown_fast_f64(double %x, i32 %y) {
; CHECK-NEXT: s_add_u32 s4, s4, _Z4exp2d at gotpcrel32@lo+4
; CHECK-NEXT: s_addc_u32 s5, s5, _Z4exp2d at gotpcrel32@hi+12
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
-; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
+; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
; CHECK-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
-; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
+; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47]
; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37]
; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
-; CHECK-NEXT: s_mov_b32 s12, s45
-; CHECK-NEXT: s_mov_b32 s13, s44
-; CHECK-NEXT: s_mov_b32 s14, s43
-; CHECK-NEXT: s_mov_b32 s15, s42
+; CHECK-NEXT: s_mov_b32 s12, s53
+; CHECK-NEXT: s_mov_b32 s13, s52
+; CHECK-NEXT: s_mov_b32 s14, s51
+; CHECK-NEXT: s_mov_b32 s15, s50
; CHECK-NEXT: v_mov_b32_e32 v31, v40
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -463,14 +463,14 @@ define double @test_pown_fast_f64(double %x, i32 %y) {
; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
; CHECK-NEXT: v_or_b32_e32 v1, v2, v1
-; CHECK-NEXT: v_readlane_b32 s45, v43, 13
-; CHECK-NEXT: v_readlane_b32 s44, v43, 12
-; CHECK-NEXT: v_readlane_b32 s43, v43, 11
-; CHECK-NEXT: v_readlane_b32 s42, v43, 10
-; CHECK-NEXT: v_readlane_b32 s41, v43, 9
-; CHECK-NEXT: v_readlane_b32 s40, v43, 8
-; CHECK-NEXT: v_readlane_b32 s39, v43, 7
-; CHECK-NEXT: v_readlane_b32 s38, v43, 6
+; CHECK-NEXT: v_readlane_b32 s53, v43, 13
+; CHECK-NEXT: v_readlane_b32 s52, v43, 12
+; CHECK-NEXT: v_readlane_b32 s51, v43, 11
+; CHECK-NEXT: v_readlane_b32 s50, v43, 10
+; CHECK-NEXT: v_readlane_b32 s49, v43, 9
+; CHECK-NEXT: v_readlane_b32 s48, v43, 8
+; CHECK-NEXT: v_readlane_b32 s47, v43, 7
+; CHECK-NEXT: v_readlane_b32 s46, v43, 6
; CHECK-NEXT: v_readlane_b32 s37, v43, 5
; CHECK-NEXT: v_readlane_b32 s36, v43, 4
; CHECK-NEXT: v_readlane_b32 s35, v43, 3
@@ -552,32 +552,32 @@ define double @test_pown_fast_f64_known_even(double %x, i32 %y.arg) {
; CHECK-NEXT: v_writelane_b32 v42, s35, 3
; CHECK-NEXT: v_writelane_b32 v42, s36, 4
; CHECK-NEXT: v_writelane_b32 v42, s37, 5
-; CHECK-NEXT: v_writelane_b32 v42, s38, 6
-; CHECK-NEXT: v_writelane_b32 v42, s39, 7
+; CHECK-NEXT: v_writelane_b32 v42, s46, 6
+; CHECK-NEXT: v_writelane_b32 v42, s47, 7
; CHECK-NEXT: s_addk_i32 s32, 0x400
-; CHECK-NEXT: v_writelane_b32 v42, s40, 8
-; CHECK-NEXT: v_writelane_b32 v42, s41, 9
-; CHECK-NEXT: s_mov_b64 s[40:41], s[4:5]
+; CHECK-NEXT: v_writelane_b32 v42, s48, 8
+; CHECK-NEXT: v_writelane_b32 v42, s49, 9
+; CHECK-NEXT: s_mov_b64 s[48:49], s[4:5]
; CHECK-NEXT: s_getpc_b64 s[4:5]
; CHECK-NEXT: s_add_u32 s4, s4, _Z4log2d at gotpcrel32@lo+4
; CHECK-NEXT: s_addc_u32 s5, s5, _Z4log2d at gotpcrel32@hi+12
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
-; CHECK-NEXT: v_writelane_b32 v42, s42, 10
-; CHECK-NEXT: v_writelane_b32 v42, s43, 11
-; CHECK-NEXT: v_writelane_b32 v42, s44, 12
+; CHECK-NEXT: v_writelane_b32 v42, s50, 10
+; CHECK-NEXT: v_writelane_b32 v42, s51, 11
+; CHECK-NEXT: v_writelane_b32 v42, s52, 12
; CHECK-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
-; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
+; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
-; CHECK-NEXT: v_writelane_b32 v42, s45, 13
+; CHECK-NEXT: v_writelane_b32 v42, s53, 13
; CHECK-NEXT: v_mov_b32_e32 v40, v31
-; CHECK-NEXT: s_mov_b32 s42, s15
-; CHECK-NEXT: s_mov_b32 s43, s14
-; CHECK-NEXT: s_mov_b32 s44, s13
-; CHECK-NEXT: s_mov_b32 s45, s12
+; CHECK-NEXT: s_mov_b32 s50, s15
+; CHECK-NEXT: s_mov_b32 s51, s14
+; CHECK-NEXT: s_mov_b32 s52, s13
+; CHECK-NEXT: s_mov_b32 s53, s12
; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11]
; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9]
-; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
+; CHECK-NEXT: s_mov_b64 s[46:47], s[6:7]
; CHECK-NEXT: v_lshlrev_b32_e32 v41, 1, v2
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -586,28 +586,28 @@ define double @test_pown_fast_f64_known_even(double %x, i32 %y.arg) {
; CHECK-NEXT: s_add_u32 s4, s4, _Z4exp2d at gotpcrel32@lo+4
; CHECK-NEXT: s_addc_u32 s5, s5, _Z4exp2d at gotpcrel32@hi+12
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
-; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
+; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
; CHECK-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
-; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
+; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47]
; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37]
; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
-; CHECK-NEXT: s_mov_b32 s12, s45
-; CHECK-NEXT: s_mov_b32 s13, s44
-; CHECK-NEXT: s_mov_b32 s14, s43
-; CHECK-NEXT: s_mov_b32 s15, s42
+; CHECK-NEXT: s_mov_b32 s12, s53
+; CHECK-NEXT: s_mov_b32 s13, s52
+; CHECK-NEXT: s_mov_b32 s14, s51
+; CHECK-NEXT: s_mov_b32 s15, s50
; CHECK-NEXT: v_mov_b32_e32 v31, v40
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
-; CHECK-NEXT: v_readlane_b32 s45, v42, 13
-; CHECK-NEXT: v_readlane_b32 s44, v42, 12
-; CHECK-NEXT: v_readlane_b32 s43, v42, 11
-; CHECK-NEXT: v_readlane_b32 s42, v42, 10
-; CHECK-NEXT: v_readlane_b32 s41, v42, 9
-; CHECK-NEXT: v_readlane_b32 s40, v42, 8
-; CHECK-NEXT: v_readlane_b32 s39, v42, 7
-; CHECK-NEXT: v_readlane_b32 s38, v42, 6
+; CHECK-NEXT: v_readlane_b32 s53, v42, 13
+; CHECK-NEXT: v_readlane_b32 s52, v42, 12
+; CHECK-NEXT: v_readlane_b32 s51, v42, 11
+; CHECK-NEXT: v_readlane_b32 s50, v42, 10
+; CHECK-NEXT: v_readlane_b32 s49, v42, 9
+; CHECK-NEXT: v_readlane_b32 s48, v42, 8
+; CHECK-NEXT: v_readlane_b32 s47, v42, 7
+; CHECK-NEXT: v_readlane_b32 s46, v42, 6
; CHECK-NEXT: v_readlane_b32 s37, v42, 5
; CHECK-NEXT: v_readlane_b32 s36, v42, 4
; CHECK-NEXT: v_readlane_b32 s35, v42, 3
@@ -694,34 +694,34 @@ define double @test_pown_fast_f64_known_odd(double %x, i32 %y.arg) {
; CHECK-NEXT: v_writelane_b32 v43, s35, 3
; CHECK-NEXT: v_writelane_b32 v43, s36, 4
; CHECK-NEXT: v_writelane_b32 v43, s37, 5
-; CHECK-NEXT: v_writelane_b32 v43, s38, 6
-; CHECK-NEXT: v_writelane_b32 v43, s39, 7
+; CHECK-NEXT: v_writelane_b32 v43, s46, 6
+; CHECK-NEXT: v_writelane_b32 v43, s47, 7
; CHECK-NEXT: s_addk_i32 s32, 0x800
-; CHECK-NEXT: v_writelane_b32 v43, s40, 8
-; CHECK-NEXT: v_writelane_b32 v43, s41, 9
-; CHECK-NEXT: s_mov_b64 s[40:41], s[4:5]
+; CHECK-NEXT: v_writelane_b32 v43, s48, 8
+; CHECK-NEXT: v_writelane_b32 v43, s49, 9
+; CHECK-NEXT: s_mov_b64 s[48:49], s[4:5]
; CHECK-NEXT: s_getpc_b64 s[4:5]
; CHECK-NEXT: s_add_u32 s4, s4, _Z4log2d at gotpcrel32@lo+4
; CHECK-NEXT: s_addc_u32 s5, s5, _Z4log2d at gotpcrel32@hi+12
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
-; CHECK-NEXT: v_writelane_b32 v43, s42, 10
+; CHECK-NEXT: v_writelane_b32 v43, s50, 10
; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
-; CHECK-NEXT: v_writelane_b32 v43, s43, 11
+; CHECK-NEXT: v_writelane_b32 v43, s51, 11
; CHECK-NEXT: v_mov_b32_e32 v41, v1
-; CHECK-NEXT: v_writelane_b32 v43, s44, 12
+; CHECK-NEXT: v_writelane_b32 v43, s52, 12
; CHECK-NEXT: v_and_b32_e32 v1, 0x7fffffff, v41
-; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
-; CHECK-NEXT: v_writelane_b32 v43, s45, 13
+; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
+; CHECK-NEXT: v_writelane_b32 v43, s53, 13
; CHECK-NEXT: v_mov_b32_e32 v40, v31
-; CHECK-NEXT: s_mov_b32 s42, s15
-; CHECK-NEXT: s_mov_b32 s43, s14
-; CHECK-NEXT: s_mov_b32 s44, s13
-; CHECK-NEXT: s_mov_b32 s45, s12
+; CHECK-NEXT: s_mov_b32 s50, s15
+; CHECK-NEXT: s_mov_b32 s51, s14
+; CHECK-NEXT: s_mov_b32 s52, s13
+; CHECK-NEXT: s_mov_b32 s53, s12
; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11]
; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9]
-; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
+; CHECK-NEXT: s_mov_b64 s[46:47], s[6:7]
; CHECK-NEXT: v_or_b32_e32 v42, 1, v2
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -730,15 +730,15 @@ define double @test_pown_fast_f64_known_odd(double %x, i32 %y.arg) {
; CHECK-NEXT: s_add_u32 s4, s4, _Z4exp2d at gotpcrel32@lo+4
; CHECK-NEXT: s_addc_u32 s5, s5, _Z4exp2d at gotpcrel32@hi+12
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
-; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
+; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
; CHECK-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
-; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
+; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47]
; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37]
; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
-; CHECK-NEXT: s_mov_b32 s12, s45
-; CHECK-NEXT: s_mov_b32 s13, s44
-; CHECK-NEXT: s_mov_b32 s14, s43
-; CHECK-NEXT: s_mov_b32 s15, s42
+; CHECK-NEXT: s_mov_b32 s12, s53
+; CHECK-NEXT: s_mov_b32 s13, s52
+; CHECK-NEXT: s_mov_b32 s14, s51
+; CHECK-NEXT: s_mov_b32 s15, s50
; CHECK-NEXT: v_mov_b32_e32 v31, v40
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -747,14 +747,14 @@ define double @test_pown_fast_f64_known_odd(double %x, i32 %y.arg) {
; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
; CHECK-NEXT: v_or_b32_e32 v1, v2, v1
-; CHECK-NEXT: v_readlane_b32 s45, v43, 13
-; CHECK-NEXT: v_readlane_b32 s44, v43, 12
-; CHECK-NEXT: v_readlane_b32 s43, v43, 11
-; CHECK-NEXT: v_readlane_b32 s42, v43, 10
-; CHECK-NEXT: v_readlane_b32 s41, v43, 9
-; CHECK-NEXT: v_readlane_b32 s40, v43, 8
-; CHECK-NEXT: v_readlane_b32 s39, v43, 7
-; CHECK-NEXT: v_readlane_b32 s38, v43, 6
+; CHECK-NEXT: v_readlane_b32 s53, v43, 13
+; CHECK-NEXT: v_readlane_b32 s52, v43, 12
+; CHECK-NEXT: v_readlane_b32 s51, v43, 11
+; CHECK-NEXT: v_readlane_b32 s50, v43, 10
+; CHECK-NEXT: v_readlane_b32 s49, v43, 9
+; CHECK-NEXT: v_readlane_b32 s48, v43, 8
+; CHECK-NEXT: v_readlane_b32 s47, v43, 7
+; CHECK-NEXT: v_readlane_b32 s46, v43, 6
; CHECK-NEXT: v_readlane_b32 s37, v43, 5
; CHECK-NEXT: v_readlane_b32 s36, v43, 4
; CHECK-NEXT: v_readlane_b32 s35, v43, 3
diff --git a/llvm/test/CodeGen/AMDGPU/bf16.ll b/llvm/test/CodeGen/AMDGPU/bf16.ll
index 0382cc72a36ae..f36f5d5f00edc 100644
--- a/llvm/test/CodeGen/AMDGPU/bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/bf16.ll
@@ -37515,14 +37515,6 @@ define <16 x bfloat> @v_vselect_v16bf16(<16 x i1> %cond, <16 x bfloat> %a, <16 x
; GCN-LABEL: v_vselect_v16bf16:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GCN-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
-; GCN-NEXT: s_mov_b64 exec, s[4:5]
-; GCN-NEXT: s_waitcnt expcnt(0)
-; GCN-NEXT: v_writelane_b32 v31, s30, 0
-; GCN-NEXT: v_writelane_b32 v31, s31, 1
-; GCN-NEXT: v_writelane_b32 v31, s34, 2
-; GCN-NEXT: v_writelane_b32 v31, s35, 3
; GCN-NEXT: v_and_b32_e32 v0, 1, v0
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
; GCN-NEXT: v_and_b32_e32 v0, 1, v1
@@ -37560,21 +37552,21 @@ define <16 x bfloat> @v_vselect_v16bf16(<16 x i1> %cond, <16 x bfloat> %a, <16 x
; GCN-NEXT: v_and_b32_e32 v8, 1, v14
; GCN-NEXT: v_cmp_eq_u32_e64 s[28:29], 1, v7
; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s32
-; GCN-NEXT: v_cmp_eq_u32_e64 s[30:31], 1, v8
+; GCN-NEXT: v_cmp_eq_u32_e64 s[38:39], 1, v8
; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:64
; GCN-NEXT: v_and_b32_e32 v9, 1, v15
-; GCN-NEXT: v_cmp_eq_u32_e64 s[34:35], 1, v9
+; GCN-NEXT: v_cmp_eq_u32_e64 s[40:41], 1, v9
; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:60
; GCN-NEXT: s_waitcnt vmcnt(2)
; GCN-NEXT: v_mul_f32_e32 v7, 1.0, v7
; GCN-NEXT: s_waitcnt vmcnt(1)
; GCN-NEXT: v_mul_f32_e32 v8, 1.0, v8
-; GCN-NEXT: v_cndmask_b32_e64 v15, v8, v7, s[34:35]
+; GCN-NEXT: v_cndmask_b32_e64 v15, v8, v7, s[40:41]
; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:56
; GCN-NEXT: v_mul_f32_e32 v8, 1.0, v30
; GCN-NEXT: s_waitcnt vmcnt(1)
; GCN-NEXT: v_mul_f32_e32 v9, 1.0, v9
-; GCN-NEXT: v_cndmask_b32_e64 v14, v9, v8, s[30:31]
+; GCN-NEXT: v_cndmask_b32_e64 v14, v9, v8, s[38:39]
; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:52
; GCN-NEXT: v_mul_f32_e32 v9, 1.0, v29
; GCN-NEXT: s_waitcnt vmcnt(1)
@@ -37650,14 +37642,6 @@ define <16 x bfloat> @v_vselect_v16bf16(<16 x i1> %cond, <16 x bfloat> %a, <16 x
; GCN-NEXT: v_and_b32_e32 v13, 0xffff0000, v13
; GCN-NEXT: v_and_b32_e32 v14, 0xffff0000, v14
; GCN-NEXT: v_and_b32_e32 v15, 0xffff0000, v15
-; GCN-NEXT: v_readlane_b32 s35, v31, 3
-; GCN-NEXT: v_readlane_b32 s34, v31, 2
-; GCN-NEXT: v_readlane_b32 s31, v31, 1
-; GCN-NEXT: v_readlane_b32 s30, v31, 0
-; GCN-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GCN-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
-; GCN-NEXT: s_mov_b64 exec, s[4:5]
-; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX7-LABEL: v_vselect_v16bf16:
@@ -37798,9 +37782,6 @@ define <16 x bfloat> @v_vselect_v16bf16(<16 x i1> %cond, <16 x bfloat> %a, <16 x
; GFX8-LABEL: v_vselect_v16bf16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX8-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
-; GFX8-NEXT: s_mov_b64 exec, s[4:5]
; GFX8-NEXT: v_and_b32_e32 v0, 1, v0
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
; GFX8-NEXT: v_and_b32_e32 v0, 1, v1
@@ -37826,17 +37807,13 @@ define <16 x bfloat> @v_vselect_v16bf16(<16 x i1> %cond, <16 x bfloat> %a, <16 x
; GFX8-NEXT: v_and_b32_e32 v0, 1, v11
; GFX8-NEXT: v_cmp_eq_u32_e64 s[24:25], 1, v0
; GFX8-NEXT: v_and_b32_e32 v0, 1, v12
-; GFX8-NEXT: v_writelane_b32 v31, s30, 0
; GFX8-NEXT: v_cmp_eq_u32_e64 s[26:27], 1, v0
; GFX8-NEXT: v_and_b32_e32 v0, 1, v13
-; GFX8-NEXT: v_writelane_b32 v31, s31, 1
; GFX8-NEXT: v_cmp_eq_u32_e64 s[28:29], 1, v0
; GFX8-NEXT: v_and_b32_e32 v0, 1, v14
-; GFX8-NEXT: v_writelane_b32 v31, s34, 2
-; GFX8-NEXT: v_cmp_eq_u32_e64 s[30:31], 1, v0
+; GFX8-NEXT: v_cmp_eq_u32_e64 s[38:39], 1, v0
; GFX8-NEXT: v_and_b32_e32 v0, 1, v15
-; GFX8-NEXT: v_writelane_b32 v31, s35, 3
-; GFX8-NEXT: v_cmp_eq_u32_e64 s[34:35], 1, v0
+; GFX8-NEXT: v_cmp_eq_u32_e64 s[40:41], 1, v0
; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v22
; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v30
; GFX8-NEXT: v_cndmask_b32_e64 v6, v1, v0, s[28:29]
@@ -37862,9 +37839,9 @@ define <16 x bfloat> @v_vselect_v16bf16(<16 x i1> %cond, <16 x bfloat> %a, <16 x
; GFX8-NEXT: v_or_b32_sdwa v4, v9, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_sdwa v5, v8, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_waitcnt vmcnt(0)
-; GFX8-NEXT: v_cndmask_b32_e64 v10, v0, v23, s[30:31]
+; GFX8-NEXT: v_cndmask_b32_e64 v10, v0, v23, s[38:39]
; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0
-; GFX8-NEXT: v_cndmask_b32_e64 v11, v0, v1, s[34:35]
+; GFX8-NEXT: v_cndmask_b32_e64 v11, v0, v1, s[40:41]
; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v19
; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v27
; GFX8-NEXT: v_cndmask_b32_e64 v3, v1, v0, s[16:17]
@@ -37887,14 +37864,6 @@ define <16 x bfloat> @v_vselect_v16bf16(<16 x i1> %cond, <16 x bfloat> %a, <16 x
; GFX8-NEXT: v_or_b32_sdwa v2, v13, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_sdwa v3, v12, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_sdwa v7, v10, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_readlane_b32 s35, v31, 3
-; GFX8-NEXT: v_readlane_b32 s34, v31, 2
-; GFX8-NEXT: v_readlane_b32 s31, v31, 1
-; GFX8-NEXT: v_readlane_b32 s30, v31, 0
-; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
-; GFX8-NEXT: s_mov_b64 exec, s[4:5]
-; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_vselect_v16bf16:
@@ -38898,108 +38867,78 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x
; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
; GFX8-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
; GFX8-NEXT: s_mov_b64 exec, s[4:5]
-; GFX8-NEXT: v_writelane_b32 v34, s30, 0
-; GFX8-NEXT: v_writelane_b32 v34, s31, 1
-; GFX8-NEXT: v_writelane_b32 v34, s34, 2
; GFX8-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX8-NEXT: v_writelane_b32 v34, s35, 3
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
; GFX8-NEXT: v_and_b32_e32 v0, 1, v1
-; GFX8-NEXT: v_writelane_b32 v34, s36, 4
; GFX8-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v0
; GFX8-NEXT: v_and_b32_e32 v0, 1, v2
-; GFX8-NEXT: v_writelane_b32 v34, s37, 5
; GFX8-NEXT: v_cmp_eq_u32_e64 s[6:7], 1, v0
; GFX8-NEXT: v_and_b32_e32 v0, 1, v3
-; GFX8-NEXT: v_writelane_b32 v34, s38, 6
; GFX8-NEXT: v_cmp_eq_u32_e64 s[8:9], 1, v0
; GFX8-NEXT: v_and_b32_e32 v0, 1, v4
-; GFX8-NEXT: v_writelane_b32 v34, s39, 7
; GFX8-NEXT: v_cmp_eq_u32_e64 s[10:11], 1, v0
; GFX8-NEXT: v_and_b32_e32 v0, 1, v5
-; GFX8-NEXT: v_writelane_b32 v34, s40, 8
; GFX8-NEXT: v_cmp_eq_u32_e64 s[12:13], 1, v0
; GFX8-NEXT: v_and_b32_e32 v0, 1, v6
-; GFX8-NEXT: v_writelane_b32 v34, s41, 9
; GFX8-NEXT: v_cmp_eq_u32_e64 s[14:15], 1, v0
; GFX8-NEXT: v_and_b32_e32 v0, 1, v7
-; GFX8-NEXT: v_writelane_b32 v34, s42, 10
; GFX8-NEXT: v_cmp_eq_u32_e64 s[16:17], 1, v0
; GFX8-NEXT: v_and_b32_e32 v0, 1, v8
-; GFX8-NEXT: v_writelane_b32 v34, s43, 11
; GFX8-NEXT: v_cmp_eq_u32_e64 s[18:19], 1, v0
; GFX8-NEXT: v_and_b32_e32 v0, 1, v9
-; GFX8-NEXT: v_writelane_b32 v34, s44, 12
; GFX8-NEXT: v_cmp_eq_u32_e64 s[20:21], 1, v0
; GFX8-NEXT: v_and_b32_e32 v0, 1, v10
-; GFX8-NEXT: v_writelane_b32 v34, s45, 13
; GFX8-NEXT: v_cmp_eq_u32_e64 s[22:23], 1, v0
; GFX8-NEXT: v_and_b32_e32 v0, 1, v11
-; GFX8-NEXT: v_writelane_b32 v34, s46, 14
; GFX8-NEXT: v_cmp_eq_u32_e64 s[24:25], 1, v0
; GFX8-NEXT: v_and_b32_e32 v0, 1, v12
-; GFX8-NEXT: v_writelane_b32 v34, s47, 15
; GFX8-NEXT: v_cmp_eq_u32_e64 s[26:27], 1, v0
; GFX8-NEXT: v_and_b32_e32 v0, 1, v13
-; GFX8-NEXT: v_writelane_b32 v34, s48, 16
; GFX8-NEXT: v_cmp_eq_u32_e64 s[28:29], 1, v0
; GFX8-NEXT: v_and_b32_e32 v0, 1, v14
-; GFX8-NEXT: v_writelane_b32 v34, s49, 17
-; GFX8-NEXT: v_cmp_eq_u32_e64 s[30:31], 1, v0
+; GFX8-NEXT: v_cmp_eq_u32_e64 s[38:39], 1, v0
; GFX8-NEXT: v_and_b32_e32 v0, 1, v15
-; GFX8-NEXT: v_writelane_b32 v34, s50, 18
-; GFX8-NEXT: v_cmp_eq_u32_e64 s[34:35], 1, v0
+; GFX8-NEXT: v_cmp_eq_u32_e64 s[40:41], 1, v0
; GFX8-NEXT: v_and_b32_e32 v0, 1, v16
-; GFX8-NEXT: v_writelane_b32 v34, s51, 19
-; GFX8-NEXT: v_cmp_eq_u32_e64 s[36:37], 1, v0
+; GFX8-NEXT: v_cmp_eq_u32_e64 s[42:43], 1, v0
; GFX8-NEXT: v_and_b32_e32 v0, 1, v17
-; GFX8-NEXT: v_writelane_b32 v34, s52, 20
-; GFX8-NEXT: v_cmp_eq_u32_e64 s[38:39], 1, v0
+; GFX8-NEXT: v_cmp_eq_u32_e64 s[44:45], 1, v0
; GFX8-NEXT: v_and_b32_e32 v0, 1, v18
-; GFX8-NEXT: v_writelane_b32 v34, s53, 21
-; GFX8-NEXT: v_cmp_eq_u32_e64 s[40:41], 1, v0
+; GFX8-NEXT: v_cmp_eq_u32_e64 s[54:55], 1, v0
; GFX8-NEXT: v_and_b32_e32 v0, 1, v19
-; GFX8-NEXT: v_writelane_b32 v34, s54, 22
-; GFX8-NEXT: v_cmp_eq_u32_e64 s[42:43], 1, v0
+; GFX8-NEXT: v_cmp_eq_u32_e64 s[56:57], 1, v0
; GFX8-NEXT: v_and_b32_e32 v0, 1, v20
-; GFX8-NEXT: v_writelane_b32 v34, s55, 23
-; GFX8-NEXT: v_cmp_eq_u32_e64 s[44:45], 1, v0
+; GFX8-NEXT: v_cmp_eq_u32_e64 s[58:59], 1, v0
; GFX8-NEXT: v_and_b32_e32 v0, 1, v21
-; GFX8-NEXT: v_writelane_b32 v34, s56, 24
-; GFX8-NEXT: v_cmp_eq_u32_e64 s[46:47], 1, v0
+; GFX8-NEXT: v_cmp_eq_u32_e64 s[60:61], 1, v0
; GFX8-NEXT: v_and_b32_e32 v0, 1, v22
-; GFX8-NEXT: v_writelane_b32 v34, s57, 25
-; GFX8-NEXT: v_cmp_eq_u32_e64 s[48:49], 1, v0
+; GFX8-NEXT: v_cmp_eq_u32_e64 s[70:71], 1, v0
; GFX8-NEXT: v_and_b32_e32 v0, 1, v23
-; GFX8-NEXT: v_writelane_b32 v34, s58, 26
-; GFX8-NEXT: v_cmp_eq_u32_e64 s[50:51], 1, v0
+; GFX8-NEXT: v_cmp_eq_u32_e64 s[72:73], 1, v0
; GFX8-NEXT: v_and_b32_e32 v0, 1, v24
-; GFX8-NEXT: v_writelane_b32 v34, s59, 27
-; GFX8-NEXT: v_cmp_eq_u32_e64 s[52:53], 1, v0
+; GFX8-NEXT: v_cmp_eq_u32_e64 s[74:75], 1, v0
; GFX8-NEXT: v_and_b32_e32 v0, 1, v25
-; GFX8-NEXT: v_writelane_b32 v34, s60, 28
-; GFX8-NEXT: v_cmp_eq_u32_e64 s[54:55], 1, v0
+; GFX8-NEXT: v_cmp_eq_u32_e64 s[76:77], 1, v0
; GFX8-NEXT: v_and_b32_e32 v0, 1, v26
-; GFX8-NEXT: v_writelane_b32 v34, s61, 29
-; GFX8-NEXT: v_cmp_eq_u32_e64 s[56:57], 1, v0
+; GFX8-NEXT: v_cmp_eq_u32_e64 s[86:87], 1, v0
; GFX8-NEXT: v_and_b32_e32 v0, 1, v27
-; GFX8-NEXT: v_writelane_b32 v34, s62, 30
-; GFX8-NEXT: v_cmp_eq_u32_e64 s[58:59], 1, v0
+; GFX8-NEXT: v_writelane_b32 v34, s30, 0
+; GFX8-NEXT: v_cmp_eq_u32_e64 s[88:89], 1, v0
; GFX8-NEXT: v_and_b32_e32 v0, 1, v28
-; GFX8-NEXT: v_writelane_b32 v34, s63, 31
-; GFX8-NEXT: v_cmp_eq_u32_e64 s[60:61], 1, v0
+; GFX8-NEXT: v_writelane_b32 v34, s31, 1
+; GFX8-NEXT: v_cmp_eq_u32_e64 s[90:91], 1, v0
; GFX8-NEXT: v_and_b32_e32 v0, 1, v29
-; GFX8-NEXT: v_writelane_b32 v34, s64, 32
-; GFX8-NEXT: v_cmp_eq_u32_e64 s[62:63], 1, v0
+; GFX8-NEXT: v_writelane_b32 v34, s34, 2
+; GFX8-NEXT: v_cmp_eq_u32_e64 s[30:31], 1, v0
; GFX8-NEXT: v_and_b32_e32 v0, 1, v30
-; GFX8-NEXT: v_writelane_b32 v34, s65, 33
-; GFX8-NEXT: v_cmp_eq_u32_e64 s[64:65], 1, v0
+; GFX8-NEXT: v_writelane_b32 v34, s35, 3
+; GFX8-NEXT: v_cmp_eq_u32_e64 s[34:35], 1, v0
; GFX8-NEXT: buffer_load_ushort v0, off, s[0:3], s32
-; GFX8-NEXT: v_writelane_b32 v34, s66, 34
-; GFX8-NEXT: v_writelane_b32 v34, s67, 35
+; GFX8-NEXT: v_writelane_b32 v34, s36, 4
+; GFX8-NEXT: v_writelane_b32 v34, s37, 5
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX8-NEXT: v_cmp_eq_u32_e64 s[66:67], 1, v0
+; GFX8-NEXT: v_cmp_eq_u32_e64 s[36:37], 1, v0
; GFX8-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:68
; GFX8-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4
; GFX8-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:72
@@ -39036,40 +38975,40 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x
; GFX8-NEXT: v_lshrrev_b32_e32 v33, 16, v29
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_lshrrev_b32_e32 v28, 16, v32
-; GFX8-NEXT: v_cndmask_b32_e64 v28, v33, v28, s[66:67]
-; GFX8-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[64:65]
+; GFX8-NEXT: v_cndmask_b32_e64 v28, v33, v28, s[36:37]
+; GFX8-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[34:35]
; GFX8-NEXT: v_lshrrev_b32_e32 v32, 16, v31
; GFX8-NEXT: v_lshrrev_b32_e32 v33, 16, v30
-; GFX8-NEXT: v_cndmask_b32_e64 v32, v33, v32, s[62:63]
-; GFX8-NEXT: v_cndmask_b32_e64 v30, v30, v31, s[60:61]
+; GFX8-NEXT: v_cndmask_b32_e64 v32, v33, v32, s[30:31]
+; GFX8-NEXT: v_cndmask_b32_e64 v30, v30, v31, s[90:91]
; GFX8-NEXT: v_lshrrev_b32_e32 v31, 16, v27
; GFX8-NEXT: v_lshrrev_b32_e32 v33, 16, v26
-; GFX8-NEXT: v_cndmask_b32_e64 v31, v33, v31, s[58:59]
-; GFX8-NEXT: v_cndmask_b32_e64 v26, v26, v27, s[56:57]
+; GFX8-NEXT: v_cndmask_b32_e64 v31, v33, v31, s[88:89]
+; GFX8-NEXT: v_cndmask_b32_e64 v26, v26, v27, s[86:87]
; GFX8-NEXT: v_lshrrev_b32_e32 v27, 16, v25
; GFX8-NEXT: v_lshrrev_b32_e32 v33, 16, v24
-; GFX8-NEXT: v_cndmask_b32_e64 v27, v33, v27, s[54:55]
-; GFX8-NEXT: v_cndmask_b32_e64 v24, v24, v25, s[52:53]
+; GFX8-NEXT: v_cndmask_b32_e64 v27, v33, v27, s[76:77]
+; GFX8-NEXT: v_cndmask_b32_e64 v24, v24, v25, s[74:75]
; GFX8-NEXT: v_lshrrev_b32_e32 v25, 16, v23
; GFX8-NEXT: v_lshrrev_b32_e32 v33, 16, v22
-; GFX8-NEXT: v_cndmask_b32_e64 v25, v33, v25, s[50:51]
-; GFX8-NEXT: v_cndmask_b32_e64 v22, v22, v23, s[48:49]
+; GFX8-NEXT: v_cndmask_b32_e64 v25, v33, v25, s[72:73]
+; GFX8-NEXT: v_cndmask_b32_e64 v22, v22, v23, s[70:71]
; GFX8-NEXT: v_lshrrev_b32_e32 v23, 16, v21
; GFX8-NEXT: v_lshrrev_b32_e32 v33, 16, v20
-; GFX8-NEXT: v_cndmask_b32_e64 v23, v33, v23, s[46:47]
-; GFX8-NEXT: v_cndmask_b32_e64 v20, v20, v21, s[44:45]
+; GFX8-NEXT: v_cndmask_b32_e64 v23, v33, v23, s[60:61]
+; GFX8-NEXT: v_cndmask_b32_e64 v20, v20, v21, s[58:59]
; GFX8-NEXT: v_lshrrev_b32_e32 v21, 16, v19
; GFX8-NEXT: v_lshrrev_b32_e32 v33, 16, v18
-; GFX8-NEXT: v_cndmask_b32_e64 v21, v33, v21, s[42:43]
-; GFX8-NEXT: v_cndmask_b32_e64 v18, v18, v19, s[40:41]
+; GFX8-NEXT: v_cndmask_b32_e64 v21, v33, v21, s[56:57]
+; GFX8-NEXT: v_cndmask_b32_e64 v18, v18, v19, s[54:55]
; GFX8-NEXT: v_lshrrev_b32_e32 v19, 16, v17
; GFX8-NEXT: v_lshrrev_b32_e32 v33, 16, v16
-; GFX8-NEXT: v_cndmask_b32_e64 v19, v33, v19, s[38:39]
-; GFX8-NEXT: v_cndmask_b32_e64 v16, v16, v17, s[36:37]
+; GFX8-NEXT: v_cndmask_b32_e64 v19, v33, v19, s[44:45]
+; GFX8-NEXT: v_cndmask_b32_e64 v16, v16, v17, s[42:43]
; GFX8-NEXT: v_lshrrev_b32_e32 v17, 16, v15
; GFX8-NEXT: v_lshrrev_b32_e32 v33, 16, v14
-; GFX8-NEXT: v_cndmask_b32_e64 v17, v33, v17, s[34:35]
-; GFX8-NEXT: v_cndmask_b32_e64 v14, v14, v15, s[30:31]
+; GFX8-NEXT: v_cndmask_b32_e64 v17, v33, v17, s[40:41]
+; GFX8-NEXT: v_cndmask_b32_e64 v14, v14, v15, s[38:39]
; GFX8-NEXT: v_lshrrev_b32_e32 v15, 16, v13
; GFX8-NEXT: v_lshrrev_b32_e32 v33, 16, v12
; GFX8-NEXT: v_cndmask_b32_e64 v15, v33, v15, s[28:29]
@@ -39130,36 +39069,6 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x
; GFX8-NEXT: v_or_b32_sdwa v13, v26, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_sdwa v14, v30, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_sdwa v15, v29, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_readlane_b32 s67, v34, 35
-; GFX8-NEXT: v_readlane_b32 s66, v34, 34
-; GFX8-NEXT: v_readlane_b32 s65, v34, 33
-; GFX8-NEXT: v_readlane_b32 s64, v34, 32
-; GFX8-NEXT: v_readlane_b32 s63, v34, 31
-; GFX8-NEXT: v_readlane_b32 s62, v34, 30
-; GFX8-NEXT: v_readlane_b32 s61, v34, 29
-; GFX8-NEXT: v_readlane_b32 s60, v34, 28
-; GFX8-NEXT: v_readlane_b32 s59, v34, 27
-; GFX8-NEXT: v_readlane_b32 s58, v34, 26
-; GFX8-NEXT: v_readlane_b32 s57, v34, 25
-; GFX8-NEXT: v_readlane_b32 s56, v34, 24
-; GFX8-NEXT: v_readlane_b32 s55, v34, 23
-; GFX8-NEXT: v_readlane_b32 s54, v34, 22
-; GFX8-NEXT: v_readlane_b32 s53, v34, 21
-; GFX8-NEXT: v_readlane_b32 s52, v34, 20
-; GFX8-NEXT: v_readlane_b32 s51, v34, 19
-; GFX8-NEXT: v_readlane_b32 s50, v34, 18
-; GFX8-NEXT: v_readlane_b32 s49, v34, 17
-; GFX8-NEXT: v_readlane_b32 s48, v34, 16
-; GFX8-NEXT: v_readlane_b32 s47, v34, 15
-; GFX8-NEXT: v_readlane_b32 s46, v34, 14
-; GFX8-NEXT: v_readlane_b32 s45, v34, 13
-; GFX8-NEXT: v_readlane_b32 s44, v34, 12
-; GFX8-NEXT: v_readlane_b32 s43, v34, 11
-; GFX8-NEXT: v_readlane_b32 s42, v34, 10
-; GFX8-NEXT: v_readlane_b32 s41, v34, 9
-; GFX8-NEXT: v_readlane_b32 s40, v34, 8
-; GFX8-NEXT: v_readlane_b32 s39, v34, 7
-; GFX8-NEXT: v_readlane_b32 s38, v34, 6
; GFX8-NEXT: v_readlane_b32 s37, v34, 5
; GFX8-NEXT: v_readlane_b32 s36, v34, 4
; GFX8-NEXT: v_readlane_b32 s35, v34, 3
@@ -39178,108 +39087,76 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x
; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: v_writelane_b32 v33, s30, 0
-; GFX9-NEXT: v_writelane_b32 v33, s31, 1
-; GFX9-NEXT: v_writelane_b32 v33, s34, 2
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX9-NEXT: v_writelane_b32 v33, s35, 3
; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v0
; GFX9-NEXT: v_and_b32_e32 v0, 1, v3
-; GFX9-NEXT: v_writelane_b32 v33, s36, 4
; GFX9-NEXT: v_cmp_eq_u32_e64 s[6:7], 1, v0
; GFX9-NEXT: v_and_b32_e32 v0, 1, v2
-; GFX9-NEXT: v_writelane_b32 v33, s37, 5
; GFX9-NEXT: v_cmp_eq_u32_e64 s[8:9], 1, v0
; GFX9-NEXT: v_and_b32_e32 v0, 1, v5
-; GFX9-NEXT: v_writelane_b32 v33, s38, 6
; GFX9-NEXT: v_cmp_eq_u32_e64 s[10:11], 1, v0
; GFX9-NEXT: v_and_b32_e32 v0, 1, v4
-; GFX9-NEXT: v_writelane_b32 v33, s39, 7
; GFX9-NEXT: v_cmp_eq_u32_e64 s[12:13], 1, v0
; GFX9-NEXT: v_and_b32_e32 v0, 1, v7
-; GFX9-NEXT: v_writelane_b32 v33, s40, 8
; GFX9-NEXT: v_cmp_eq_u32_e64 s[14:15], 1, v0
; GFX9-NEXT: v_and_b32_e32 v0, 1, v6
-; GFX9-NEXT: v_writelane_b32 v33, s41, 9
; GFX9-NEXT: v_cmp_eq_u32_e64 s[16:17], 1, v0
; GFX9-NEXT: v_and_b32_e32 v0, 1, v9
-; GFX9-NEXT: v_writelane_b32 v33, s42, 10
; GFX9-NEXT: v_cmp_eq_u32_e64 s[18:19], 1, v0
; GFX9-NEXT: v_and_b32_e32 v0, 1, v8
-; GFX9-NEXT: v_writelane_b32 v33, s43, 11
; GFX9-NEXT: v_cmp_eq_u32_e64 s[20:21], 1, v0
; GFX9-NEXT: v_and_b32_e32 v0, 1, v11
-; GFX9-NEXT: v_writelane_b32 v33, s44, 12
; GFX9-NEXT: v_cmp_eq_u32_e64 s[22:23], 1, v0
; GFX9-NEXT: v_and_b32_e32 v0, 1, v10
-; GFX9-NEXT: v_writelane_b32 v33, s45, 13
; GFX9-NEXT: v_cmp_eq_u32_e64 s[24:25], 1, v0
; GFX9-NEXT: v_and_b32_e32 v0, 1, v13
-; GFX9-NEXT: v_writelane_b32 v33, s46, 14
; GFX9-NEXT: v_cmp_eq_u32_e64 s[26:27], 1, v0
; GFX9-NEXT: v_and_b32_e32 v0, 1, v12
-; GFX9-NEXT: v_writelane_b32 v33, s47, 15
; GFX9-NEXT: v_cmp_eq_u32_e64 s[28:29], 1, v0
; GFX9-NEXT: v_and_b32_e32 v0, 1, v15
-; GFX9-NEXT: v_writelane_b32 v33, s48, 16
-; GFX9-NEXT: v_cmp_eq_u32_e64 s[30:31], 1, v0
+; GFX9-NEXT: v_cmp_eq_u32_e64 s[38:39], 1, v0
; GFX9-NEXT: v_and_b32_e32 v0, 1, v14
-; GFX9-NEXT: v_writelane_b32 v33, s49, 17
-; GFX9-NEXT: v_cmp_eq_u32_e64 s[34:35], 1, v0
+; GFX9-NEXT: v_cmp_eq_u32_e64 s[40:41], 1, v0
; GFX9-NEXT: v_and_b32_e32 v0, 1, v17
-; GFX9-NEXT: v_writelane_b32 v33, s50, 18
-; GFX9-NEXT: v_cmp_eq_u32_e64 s[36:37], 1, v0
+; GFX9-NEXT: v_cmp_eq_u32_e64 s[42:43], 1, v0
; GFX9-NEXT: v_and_b32_e32 v0, 1, v16
-; GFX9-NEXT: v_writelane_b32 v33, s51, 19
-; GFX9-NEXT: v_cmp_eq_u32_e64 s[38:39], 1, v0
+; GFX9-NEXT: v_cmp_eq_u32_e64 s[44:45], 1, v0
; GFX9-NEXT: v_and_b32_e32 v0, 1, v19
-; GFX9-NEXT: v_writelane_b32 v33, s52, 20
-; GFX9-NEXT: v_cmp_eq_u32_e64 s[40:41], 1, v0
+; GFX9-NEXT: v_cmp_eq_u32_e64 s[54:55], 1, v0
; GFX9-NEXT: v_and_b32_e32 v0, 1, v18
-; GFX9-NEXT: v_writelane_b32 v33, s53, 21
-; GFX9-NEXT: v_cmp_eq_u32_e64 s[42:43], 1, v0
+; GFX9-NEXT: v_cmp_eq_u32_e64 s[56:57], 1, v0
; GFX9-NEXT: v_and_b32_e32 v0, 1, v21
-; GFX9-NEXT: v_writelane_b32 v33, s54, 22
-; GFX9-NEXT: v_cmp_eq_u32_e64 s[44:45], 1, v0
+; GFX9-NEXT: v_cmp_eq_u32_e64 s[58:59], 1, v0
; GFX9-NEXT: v_and_b32_e32 v0, 1, v20
-; GFX9-NEXT: v_writelane_b32 v33, s55, 23
-; GFX9-NEXT: v_cmp_eq_u32_e64 s[46:47], 1, v0
+; GFX9-NEXT: v_cmp_eq_u32_e64 s[60:61], 1, v0
; GFX9-NEXT: v_and_b32_e32 v0, 1, v23
-; GFX9-NEXT: v_writelane_b32 v33, s56, 24
-; GFX9-NEXT: v_cmp_eq_u32_e64 s[48:49], 1, v0
+; GFX9-NEXT: v_cmp_eq_u32_e64 s[70:71], 1, v0
; GFX9-NEXT: v_and_b32_e32 v0, 1, v22
-; GFX9-NEXT: v_writelane_b32 v33, s57, 25
-; GFX9-NEXT: v_cmp_eq_u32_e64 s[50:51], 1, v0
+; GFX9-NEXT: v_cmp_eq_u32_e64 s[72:73], 1, v0
; GFX9-NEXT: v_and_b32_e32 v0, 1, v25
-; GFX9-NEXT: v_writelane_b32 v33, s58, 26
-; GFX9-NEXT: v_cmp_eq_u32_e64 s[52:53], 1, v0
+; GFX9-NEXT: v_cmp_eq_u32_e64 s[74:75], 1, v0
; GFX9-NEXT: v_and_b32_e32 v0, 1, v24
-; GFX9-NEXT: v_writelane_b32 v33, s59, 27
-; GFX9-NEXT: v_cmp_eq_u32_e64 s[54:55], 1, v0
+; GFX9-NEXT: v_cmp_eq_u32_e64 s[76:77], 1, v0
; GFX9-NEXT: v_and_b32_e32 v0, 1, v27
-; GFX9-NEXT: v_writelane_b32 v33, s60, 28
-; GFX9-NEXT: v_cmp_eq_u32_e64 s[56:57], 1, v0
+; GFX9-NEXT: v_cmp_eq_u32_e64 s[86:87], 1, v0
; GFX9-NEXT: v_and_b32_e32 v0, 1, v26
-; GFX9-NEXT: v_writelane_b32 v33, s61, 29
-; GFX9-NEXT: v_cmp_eq_u32_e64 s[58:59], 1, v0
+; GFX9-NEXT: v_cmp_eq_u32_e64 s[88:89], 1, v0
; GFX9-NEXT: v_and_b32_e32 v0, 1, v29
-; GFX9-NEXT: v_writelane_b32 v33, s62, 30
-; GFX9-NEXT: v_cmp_eq_u32_e64 s[60:61], 1, v0
+; GFX9-NEXT: v_cmp_eq_u32_e64 s[90:91], 1, v0
; GFX9-NEXT: v_and_b32_e32 v0, 1, v28
-; GFX9-NEXT: v_writelane_b32 v33, s63, 31
-; GFX9-NEXT: v_cmp_eq_u32_e64 s[62:63], 1, v0
+; GFX9-NEXT: v_cmp_eq_u32_e64 s[92:93], 1, v0
; GFX9-NEXT: buffer_load_ushort v0, off, s[0:3], s32
-; GFX9-NEXT: v_writelane_b32 v33, s64, 32
-; GFX9-NEXT: v_writelane_b32 v33, s65, 33
-; GFX9-NEXT: v_writelane_b32 v33, s66, 34
+; GFX9-NEXT: v_writelane_b32 v33, s30, 0
+; GFX9-NEXT: v_writelane_b32 v33, s31, 1
+; GFX9-NEXT: v_writelane_b32 v33, s34, 2
; GFX9-NEXT: v_and_b32_e32 v1, 1, v1
-; GFX9-NEXT: v_writelane_b32 v33, s67, 35
+; GFX9-NEXT: v_writelane_b32 v33, s35, 3
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX9-NEXT: v_cmp_eq_u32_e64 s[64:65], 1, v0
+; GFX9-NEXT: v_cmp_eq_u32_e64 s[30:31], 1, v0
; GFX9-NEXT: v_and_b32_e32 v0, 1, v30
-; GFX9-NEXT: v_cmp_eq_u32_e64 s[66:67], 1, v0
+; GFX9-NEXT: v_cmp_eq_u32_e64 s[34:35], 1, v0
; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:68
; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4
; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:72
@@ -39313,42 +39190,42 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x
; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:128
; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:64
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_cndmask_b32_e64 v29, v31, v32, s[66:67]
+; GFX9-NEXT: v_cndmask_b32_e64 v29, v31, v32, s[34:35]
; GFX9-NEXT: v_lshrrev_b32_e32 v32, 16, v32
; GFX9-NEXT: v_lshrrev_b32_e32 v31, 16, v31
-; GFX9-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[64:65]
-; GFX9-NEXT: v_cndmask_b32_e64 v32, v28, v30, s[62:63]
+; GFX9-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[30:31]
+; GFX9-NEXT: v_cndmask_b32_e64 v32, v28, v30, s[92:93]
; GFX9-NEXT: v_lshrrev_b32_e32 v30, 16, v30
; GFX9-NEXT: v_lshrrev_b32_e32 v28, 16, v28
-; GFX9-NEXT: v_cndmask_b32_e64 v28, v28, v30, s[60:61]
-; GFX9-NEXT: v_cndmask_b32_e64 v30, v26, v27, s[58:59]
+; GFX9-NEXT: v_cndmask_b32_e64 v28, v28, v30, s[90:91]
+; GFX9-NEXT: v_cndmask_b32_e64 v30, v26, v27, s[88:89]
; GFX9-NEXT: v_lshrrev_b32_e32 v27, 16, v27
; GFX9-NEXT: v_lshrrev_b32_e32 v26, 16, v26
-; GFX9-NEXT: v_cndmask_b32_e64 v26, v26, v27, s[56:57]
-; GFX9-NEXT: v_cndmask_b32_e64 v27, v24, v25, s[54:55]
+; GFX9-NEXT: v_cndmask_b32_e64 v26, v26, v27, s[86:87]
+; GFX9-NEXT: v_cndmask_b32_e64 v27, v24, v25, s[76:77]
; GFX9-NEXT: v_lshrrev_b32_e32 v25, 16, v25
; GFX9-NEXT: v_lshrrev_b32_e32 v24, 16, v24
-; GFX9-NEXT: v_cndmask_b32_e64 v24, v24, v25, s[52:53]
-; GFX9-NEXT: v_cndmask_b32_e64 v25, v22, v23, s[50:51]
+; GFX9-NEXT: v_cndmask_b32_e64 v24, v24, v25, s[74:75]
+; GFX9-NEXT: v_cndmask_b32_e64 v25, v22, v23, s[72:73]
; GFX9-NEXT: v_lshrrev_b32_e32 v23, 16, v23
; GFX9-NEXT: v_lshrrev_b32_e32 v22, 16, v22
-; GFX9-NEXT: v_cndmask_b32_e64 v22, v22, v23, s[48:49]
-; GFX9-NEXT: v_cndmask_b32_e64 v23, v20, v21, s[46:47]
+; GFX9-NEXT: v_cndmask_b32_e64 v22, v22, v23, s[70:71]
+; GFX9-NEXT: v_cndmask_b32_e64 v23, v20, v21, s[60:61]
; GFX9-NEXT: v_lshrrev_b32_e32 v21, 16, v21
; GFX9-NEXT: v_lshrrev_b32_e32 v20, 16, v20
-; GFX9-NEXT: v_cndmask_b32_e64 v20, v20, v21, s[44:45]
-; GFX9-NEXT: v_cndmask_b32_e64 v21, v18, v19, s[42:43]
+; GFX9-NEXT: v_cndmask_b32_e64 v20, v20, v21, s[58:59]
+; GFX9-NEXT: v_cndmask_b32_e64 v21, v18, v19, s[56:57]
; GFX9-NEXT: v_lshrrev_b32_e32 v19, 16, v19
; GFX9-NEXT: v_lshrrev_b32_e32 v18, 16, v18
-; GFX9-NEXT: v_cndmask_b32_e64 v18, v18, v19, s[40:41]
-; GFX9-NEXT: v_cndmask_b32_e64 v19, v16, v17, s[38:39]
+; GFX9-NEXT: v_cndmask_b32_e64 v18, v18, v19, s[54:55]
+; GFX9-NEXT: v_cndmask_b32_e64 v19, v16, v17, s[44:45]
; GFX9-NEXT: v_lshrrev_b32_e32 v17, 16, v17
; GFX9-NEXT: v_lshrrev_b32_e32 v16, 16, v16
-; GFX9-NEXT: v_cndmask_b32_e64 v16, v16, v17, s[36:37]
-; GFX9-NEXT: v_cndmask_b32_e64 v17, v14, v15, s[34:35]
+; GFX9-NEXT: v_cndmask_b32_e64 v16, v16, v17, s[42:43]
+; GFX9-NEXT: v_cndmask_b32_e64 v17, v14, v15, s[40:41]
; GFX9-NEXT: v_lshrrev_b32_e32 v15, 16, v15
; GFX9-NEXT: v_lshrrev_b32_e32 v14, 16, v14
-; GFX9-NEXT: v_cndmask_b32_e64 v14, v14, v15, s[30:31]
+; GFX9-NEXT: v_cndmask_b32_e64 v14, v14, v15, s[38:39]
; GFX9-NEXT: v_cndmask_b32_e64 v15, v12, v13, s[28:29]
; GFX9-NEXT: v_lshrrev_b32_e32 v13, 16, v13
; GFX9-NEXT: v_lshrrev_b32_e32 v12, 16, v12
@@ -39394,38 +39271,6 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x
; GFX9-NEXT: v_perm_b32 v13, v26, v30, s4
; GFX9-NEXT: v_perm_b32 v14, v28, v32, s4
; GFX9-NEXT: v_perm_b32 v15, v31, v29, s4
-; GFX9-NEXT: v_readlane_b32 s67, v33, 35
-; GFX9-NEXT: v_readlane_b32 s66, v33, 34
-; GFX9-NEXT: v_readlane_b32 s65, v33, 33
-; GFX9-NEXT: v_readlane_b32 s64, v33, 32
-; GFX9-NEXT: v_readlane_b32 s63, v33, 31
-; GFX9-NEXT: v_readlane_b32 s62, v33, 30
-; GFX9-NEXT: v_readlane_b32 s61, v33, 29
-; GFX9-NEXT: v_readlane_b32 s60, v33, 28
-; GFX9-NEXT: v_readlane_b32 s59, v33, 27
-; GFX9-NEXT: v_readlane_b32 s58, v33, 26
-; GFX9-NEXT: v_readlane_b32 s57, v33, 25
-; GFX9-NEXT: v_readlane_b32 s56, v33, 24
-; GFX9-NEXT: v_readlane_b32 s55, v33, 23
-; GFX9-NEXT: v_readlane_b32 s54, v33, 22
-; GFX9-NEXT: v_readlane_b32 s53, v33, 21
-; GFX9-NEXT: v_readlane_b32 s52, v33, 20
-; GFX9-NEXT: v_readlane_b32 s51, v33, 19
-; GFX9-NEXT: v_readlane_b32 s50, v33, 18
-; GFX9-NEXT: v_readlane_b32 s49, v33, 17
-; GFX9-NEXT: v_readlane_b32 s48, v33, 16
-; GFX9-NEXT: v_readlane_b32 s47, v33, 15
-; GFX9-NEXT: v_readlane_b32 s46, v33, 14
-; GFX9-NEXT: v_readlane_b32 s45, v33, 13
-; GFX9-NEXT: v_readlane_b32 s44, v33, 12
-; GFX9-NEXT: v_readlane_b32 s43, v33, 11
-; GFX9-NEXT: v_readlane_b32 s42, v33, 10
-; GFX9-NEXT: v_readlane_b32 s41, v33, 9
-; GFX9-NEXT: v_readlane_b32 s40, v33, 8
-; GFX9-NEXT: v_readlane_b32 s39, v33, 7
-; GFX9-NEXT: v_readlane_b32 s38, v33, 6
-; GFX9-NEXT: v_readlane_b32 s37, v33, 5
-; GFX9-NEXT: v_readlane_b32 s36, v33, 4
; GFX9-NEXT: v_readlane_b32 s35, v33, 3
; GFX9-NEXT: v_readlane_b32 s34, v33, 2
; GFX9-NEXT: v_readlane_b32 s31, v33, 1
diff --git a/llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll b/llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll
index f9ffa5ae57f3e..85b9adfe6ea5c 100644
--- a/llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll
+++ b/llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll
@@ -9,24 +9,24 @@ define amdgpu_kernel void @blender_no_live_segment_at_def_error(<4 x float> %ext
; CHECK-NEXT: s_addc_u32 s13, s13, 0
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13
-; CHECK-NEXT: s_load_dwordx8 s[36:43], s[8:9], 0x0
+; CHECK-NEXT: s_load_dwordx8 s[96:103], s[8:9], 0x0
; CHECK-NEXT: s_add_u32 s0, s0, s17
; CHECK-NEXT: s_addc_u32 s1, s1, 0
; CHECK-NEXT: s_mov_b32 s12, 0
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
-; CHECK-NEXT: s_cmp_lg_u32 s40, 0
+; CHECK-NEXT: s_cmp_lg_u32 s100, 0
; CHECK-NEXT: s_cbranch_scc1 .LBB0_8
; CHECK-NEXT: ; %bb.1: ; %if.end13.i.i
-; CHECK-NEXT: s_cmp_eq_u32 s42, 0
+; CHECK-NEXT: s_cmp_eq_u32 s102, 0
; CHECK-NEXT: s_cbranch_scc1 .LBB0_4
; CHECK-NEXT: ; %bb.2: ; %if.else251.i.i
-; CHECK-NEXT: s_cmp_lg_u32 s43, 0
+; CHECK-NEXT: s_cmp_lg_u32 s103, 0
; CHECK-NEXT: s_mov_b32 s17, 0
; CHECK-NEXT: s_cselect_b32 s12, -1, 0
; CHECK-NEXT: s_and_b32 vcc_lo, exec_lo, s12
; CHECK-NEXT: s_cbranch_vccz .LBB0_5
; CHECK-NEXT: ; %bb.3:
-; CHECK-NEXT: s_mov_b32 s36, 0
+; CHECK-NEXT: s_mov_b32 s96, 0
; CHECK-NEXT: s_andn2_b32 vcc_lo, exec_lo, s12
; CHECK-NEXT: s_cbranch_vccz .LBB0_6
; CHECK-NEXT: s_branch .LBB0_7
@@ -34,16 +34,16 @@ define amdgpu_kernel void @blender_no_live_segment_at_def_error(<4 x float> %ext
; CHECK-NEXT: s_mov_b32 s14, s12
; CHECK-NEXT: s_mov_b32 s15, s12
; CHECK-NEXT: s_mov_b32 s13, s12
-; CHECK-NEXT: s_mov_b64 s[38:39], s[14:15]
-; CHECK-NEXT: s_mov_b64 s[36:37], s[12:13]
+; CHECK-NEXT: s_mov_b64 s[98:99], s[14:15]
+; CHECK-NEXT: s_mov_b64 s[96:97], s[12:13]
; CHECK-NEXT: s_branch .LBB0_7
; CHECK-NEXT: .LBB0_5: ; %if.then263.i.i
-; CHECK-NEXT: v_cmp_lt_f32_e64 s12, s41, 0
-; CHECK-NEXT: s_mov_b32 s36, 1.0
+; CHECK-NEXT: v_cmp_lt_f32_e64 s12, s101, 0
+; CHECK-NEXT: s_mov_b32 s96, 1.0
; CHECK-NEXT: s_mov_b32 s17, 0x7fc00000
-; CHECK-NEXT: s_mov_b32 s37, s36
-; CHECK-NEXT: s_mov_b32 s38, s36
-; CHECK-NEXT: s_mov_b32 s39, s36
+; CHECK-NEXT: s_mov_b32 s97, s96
+; CHECK-NEXT: s_mov_b32 s98, s96
+; CHECK-NEXT: s_mov_b32 s99, s96
; CHECK-NEXT: s_andn2_b32 vcc_lo, exec_lo, s12
; CHECK-NEXT: s_cbranch_vccnz .LBB0_7
; CHECK-NEXT: .LBB0_6: ; %if.end273.i.i
@@ -55,7 +55,7 @@ define amdgpu_kernel void @blender_no_live_segment_at_def_error(<4 x float> %ext
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; CHECK-NEXT: s_load_dwordx2 s[18:19], s[18:19], 0x0
; CHECK-NEXT: v_lshlrev_b32_e32 v3, 10, v1
-; CHECK-NEXT: v_add_f32_e64 v1, s17, s36
+; CHECK-NEXT: v_add_f32_e64 v1, s17, s96
; CHECK-NEXT: s_mov_b64 s[34:35], s[8:9]
; CHECK-NEXT: s_mov_b64 s[8:9], s[12:13]
; CHECK-NEXT: s_mov_b32 s12, s14
@@ -65,13 +65,13 @@ define amdgpu_kernel void @blender_no_live_segment_at_def_error(<4 x float> %ext
; CHECK-NEXT: v_mov_b32_e32 v2, 0
; CHECK-NEXT: s_mov_b32 s13, s15
; CHECK-NEXT: s_mov_b32 s14, s16
-; CHECK-NEXT: s_mov_b32 s36, 0
+; CHECK-NEXT: s_mov_b32 s96, 0
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[18:19]
; CHECK-NEXT: s_mov_b64 s[8:9], s[34:35]
-; CHECK-NEXT: s_mov_b32 s37, s36
-; CHECK-NEXT: s_mov_b32 s38, s36
-; CHECK-NEXT: s_mov_b32 s39, s36
+; CHECK-NEXT: s_mov_b32 s97, s96
+; CHECK-NEXT: s_mov_b32 s98, s96
+; CHECK-NEXT: s_mov_b32 s99, s96
; CHECK-NEXT: .LBB0_7: ; %if.end294.i.i
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:12
@@ -80,11 +80,11 @@ define amdgpu_kernel void @blender_no_live_segment_at_def_error(<4 x float> %ext
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0
; CHECK-NEXT: .LBB0_8: ; %kernel_direct_lighting.exit
; CHECK-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x20
-; CHECK-NEXT: v_mov_b32_e32 v0, s36
+; CHECK-NEXT: v_mov_b32_e32 v0, s96
; CHECK-NEXT: v_mov_b32_e32 v4, 0
-; CHECK-NEXT: v_mov_b32_e32 v1, s37
-; CHECK-NEXT: v_mov_b32_e32 v2, s38
-; CHECK-NEXT: v_mov_b32_e32 v3, s39
+; CHECK-NEXT: v_mov_b32_e32 v1, s97
+; CHECK-NEXT: v_mov_b32_e32 v2, s98
+; CHECK-NEXT: v_mov_b32_e32 v3, s99
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5]
; CHECK-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
index 98136347ab702..bce02a4cfacde 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
@@ -17,7 +17,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: early-clobber renamable $sgpr20_sgpr21_sgpr22_sgpr23 = S_LOAD_DWORDX4_IMM_ec renamable $sgpr8_sgpr9, 24, 0 :: (dereferenceable invariant load (s128) from %ir.arg6.kernarg.offset.align.down, align 8, addrspace 4)
; GFX90A-NEXT: renamable $sgpr17 = S_LOAD_DWORD_IMM renamable $sgpr8_sgpr9, 40, 0 :: (dereferenceable invariant load (s32) from %ir.arg6.kernarg.offset.align.down + 16, align 8, addrspace 4)
; GFX90A-NEXT: renamable $sgpr24_sgpr25_sgpr26_sgpr27 = S_LOAD_DWORDX4_IMM renamable $sgpr8_sgpr9, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset1, addrspace 4)
- ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_LOAD_DWORDX2_IMM renamable $sgpr8_sgpr9, 16, 0 :: (dereferenceable invariant load (s64) from %ir.arg.kernarg.offset1 + 16, align 16, addrspace 4)
+ ; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_LOAD_DWORDX2_IMM renamable $sgpr8_sgpr9, 16, 0 :: (dereferenceable invariant load (s64) from %ir.arg.kernarg.offset1 + 16, align 16, addrspace 4)
; GFX90A-NEXT: S_BITCMP1_B32 renamable $sgpr33, 0, implicit-def $scc
; GFX90A-NEXT: renamable $sgpr12_sgpr13 = S_CSELECT_B64 -1, 0, implicit killed $scc
; GFX90A-NEXT: renamable $sgpr34_sgpr35 = S_MOV_B64 -1
@@ -33,7 +33,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.1.bb103:
; GFX90A-NEXT: successors: %bb.58(0x40000000), %bb.2(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x00000000000000FF, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr2_vgpr3:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr44_sgpr45:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x00000000000000FF, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr2_vgpr3:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr34_sgpr35 = S_MOV_B64 0
; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, renamable $sgpr30_sgpr31, implicit-def dead $scc
@@ -41,7 +41,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.2:
; GFX90A-NEXT: successors: %bb.3(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8, $sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr56, $sgpr57, $sgpr20_sgpr21_sgpr22, $sgpr24_sgpr25_sgpr26, $sgpr26_sgpr27, $vgpr2, $vgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8, $sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr44, $sgpr45, $sgpr20_sgpr21_sgpr22, $sgpr24_sgpr25_sgpr26, $sgpr26_sgpr27, $vgpr2, $vgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr17 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $sgpr23 = IMPLICIT_DEF
@@ -54,7 +54,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.3.Flow17:
; GFX90A-NEXT: successors: %bb.4(0x40000000), %bb.57(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr23, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr2_vgpr3:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr23, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr44_sgpr45:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr2_vgpr3:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr30 = V_AND_B32_e32 1023, $vgpr31, implicit $exec
; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, killed renamable $sgpr34_sgpr35, implicit-def dead $scc
@@ -62,7 +62,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.4.bb15:
; GFX90A-NEXT: successors: %bb.35(0x40000000), %bb.5(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr33, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr2_vgpr3:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr18_sgpr19
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr33, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr44_sgpr45:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr2_vgpr3:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr18_sgpr19
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr0_vgpr1 = V_LSHLREV_B64_e64 2, $vgpr2_vgpr3, implicit $exec
; GFX90A-NEXT: renamable $vgpr4 = COPY renamable $sgpr25, implicit $exec
@@ -79,16 +79,16 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr18_sgpr19
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr34_sgpr35 = S_MOV_B64 -1
- ; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_MOV_B64 0
- ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_MOV_B64 0
+ ; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_MOV_B64 0
+ ; GFX90A-NEXT: renamable $sgpr40_sgpr41 = S_MOV_B64 0
+ ; GFX90A-NEXT: renamable $sgpr38_sgpr39 = S_MOV_B64 0
+ ; GFX90A-NEXT: renamable $sgpr66_sgpr67 = S_MOV_B64 0
+ ; GFX90A-NEXT: renamable $sgpr64_sgpr65 = S_MOV_B64 0
+ ; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_MOV_B64 0
; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_MOV_B64 0
; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_MOV_B64 0
; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_MOV_B64 0
; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_MOV_B64 0
- ; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_MOV_B64 0
- ; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_MOV_B64 0
- ; GFX90A-NEXT: renamable $sgpr40_sgpr41 = S_MOV_B64 0
- ; GFX90A-NEXT: renamable $sgpr38_sgpr39 = S_MOV_B64 0
; GFX90A-NEXT: renamable $vgpr8_vgpr9 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr6_vgpr7 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr4_vgpr5 = IMPLICIT_DEF
@@ -111,7 +111,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.6.Flow20:
; GFX90A-NEXT: successors: %bb.7(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr19 = COPY renamable $sgpr17, implicit $exec
; GFX90A-NEXT: renamable $vgpr18 = COPY $sgpr17, implicit $exec
@@ -124,15 +124,15 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.7.Flow19:
; GFX90A-NEXT: successors: %bb.62(0x40000000), %bb.8(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_MOV_B64 0
+ ; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_MOV_B64 0
; GFX90A-NEXT: $sgpr24_sgpr25 = S_AND_SAVEEXEC_B64 $sgpr36_sgpr37, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.62, implicit $exec
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.8.Flow32:
; GFX90A-NEXT: successors: %bb.9(0x40000000), %bb.10(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr58_sgpr59, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr24_sgpr25, implicit-def $scc
; GFX90A-NEXT: $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $sgpr18_sgpr19, implicit-def $exec, implicit-def $scc, implicit $exec
@@ -141,58 +141,58 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.9.bb89:
; GFX90A-NEXT: successors: %bb.10(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr58_sgpr59, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
- ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_OR_B64 killed renamable $sgpr56_sgpr57, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_OR_B64 killed renamable $sgpr68_sgpr69, $exec, implicit-def dead $scc
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.10.Flow33:
; GFX90A-NEXT: successors: %bb.11(0x40000000), %bb.12(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr58_sgpr59, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def $scc
- ; GFX90A-NEXT: $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $sgpr58_sgpr59, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX90A-NEXT: $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $sgpr42_sgpr43, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A-NEXT: renamable $sgpr12_sgpr13 = S_XOR_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def dead $scc
; GFX90A-NEXT: S_CBRANCH_EXECZ %bb.12, implicit $exec
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.11.bb84:
; GFX90A-NEXT: successors: %bb.12(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
- ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_OR_B64 killed renamable $sgpr56_sgpr57, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_OR_B64 killed renamable $sgpr68_sgpr69, $exec, implicit-def dead $scc
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.12.Flow34:
; GFX90A-NEXT: successors: %bb.13(0x40000000), %bb.14(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def $scc
- ; GFX90A-NEXT: $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $sgpr54_sgpr55, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX90A-NEXT: $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $sgpr40_sgpr41, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A-NEXT: renamable $sgpr12_sgpr13 = S_XOR_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def dead $scc
; GFX90A-NEXT: S_CBRANCH_EXECZ %bb.14, implicit $exec
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.13.bb79:
; GFX90A-NEXT: successors: %bb.14(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr56_sgpr57, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
- ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_OR_B64 killed renamable $sgpr56_sgpr57, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_OR_B64 killed renamable $sgpr68_sgpr69, $exec, implicit-def dead $scc
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.14.Flow35:
; GFX90A-NEXT: successors: %bb.15(0x40000000), %bb.16(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr56_sgpr57, $vgpr0_vgpr1:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def $scc
- ; GFX90A-NEXT: $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $sgpr52_sgpr53, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX90A-NEXT: $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $sgpr38_sgpr39, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A-NEXT: renamable $sgpr36_sgpr37 = S_XOR_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def dead $scc
; GFX90A-NEXT: S_CBRANCH_EXECZ %bb.16, implicit $exec
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.15.bb72:
; GFX90A-NEXT: successors: %bb.16(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr56_sgpr57, $vgpr0_vgpr1:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr8 = S_ADD_U32 renamable $sgpr8, 48, implicit-def $scc
; GFX90A-NEXT: renamable $sgpr9 = S_ADDC_U32 killed renamable $sgpr9, 0, implicit-def dead $scc, implicit killed $scc
@@ -202,122 +202,122 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: $sgpr13 = COPY killed renamable $sgpr15
; GFX90A-NEXT: $sgpr14 = COPY killed renamable $sgpr16
; GFX90A-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr18_sgpr19, @f2, csr_amdgpu_gfx90ainsts, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit undef $sgpr15, implicit $vgpr31, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $vgpr0, implicit $vgpr1
- ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_OR_B64 killed renamable $sgpr56_sgpr57, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_OR_B64 killed renamable $sgpr68_sgpr69, $exec, implicit-def dead $scc
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.16.Flow36:
; GFX90A-NEXT: successors: %bb.17(0x40000000), %bb.18(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr56_sgpr57, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr36_sgpr37, implicit-def $scc
- ; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr50_sgpr51, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr66_sgpr67, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A-NEXT: renamable $sgpr4_sgpr5 = S_XOR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
; GFX90A-NEXT: S_CBRANCH_EXECZ %bb.18, implicit $exec
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.17.bb67:
; GFX90A-NEXT: successors: %bb.18(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr56_sgpr57, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
- ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_OR_B64 killed renamable $sgpr56_sgpr57, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_OR_B64 killed renamable $sgpr68_sgpr69, $exec, implicit-def dead $scc
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.18.Flow37:
; GFX90A-NEXT: successors: %bb.19(0x40000000), %bb.20(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr56_sgpr57, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc
- ; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr48_sgpr49, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr64_sgpr65, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A-NEXT: renamable $sgpr4_sgpr5 = S_XOR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
; GFX90A-NEXT: S_CBRANCH_EXECZ %bb.20, implicit $exec
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.19.bb62:
; GFX90A-NEXT: successors: %bb.20(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr56_sgpr57, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
- ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_OR_B64 killed renamable $sgpr56_sgpr57, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_OR_B64 killed renamable $sgpr68_sgpr69, $exec, implicit-def dead $scc
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.20.Flow38:
; GFX90A-NEXT: successors: %bb.21(0x40000000), %bb.22(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr56_sgpr57, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc
- ; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr46_sgpr47, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr62_sgpr63, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A-NEXT: renamable $sgpr4_sgpr5 = S_XOR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
; GFX90A-NEXT: S_CBRANCH_EXECZ %bb.22, implicit $exec
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.21.bb54:
; GFX90A-NEXT: successors: %bb.22(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr56_sgpr57, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
- ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_OR_B64 killed renamable $sgpr56_sgpr57, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_OR_B64 killed renamable $sgpr68_sgpr69, $exec, implicit-def dead $scc
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.22.Flow39:
; GFX90A-NEXT: successors: %bb.23(0x40000000), %bb.24(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr56_sgpr57, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc
- ; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr44_sgpr45, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr52_sgpr53, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A-NEXT: renamable $sgpr4_sgpr5 = S_XOR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
; GFX90A-NEXT: S_CBRANCH_EXECZ %bb.24, implicit $exec
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.23.bb47:
; GFX90A-NEXT: successors: %bb.24(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr56_sgpr57, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
- ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_OR_B64 killed renamable $sgpr56_sgpr57, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_OR_B64 killed renamable $sgpr68_sgpr69, $exec, implicit-def dead $scc
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.24.Flow40:
; GFX90A-NEXT: successors: %bb.25(0x40000000), %bb.26(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr56_sgpr57, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc
- ; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr42_sgpr43, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr50_sgpr51, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A-NEXT: renamable $sgpr4_sgpr5 = S_XOR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
; GFX90A-NEXT: S_CBRANCH_EXECZ %bb.26, implicit $exec
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.25.bb40:
; GFX90A-NEXT: successors: %bb.26(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr56_sgpr57, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
- ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_OR_B64 killed renamable $sgpr56_sgpr57, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_OR_B64 killed renamable $sgpr68_sgpr69, $exec, implicit-def dead $scc
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.26.Flow41:
; GFX90A-NEXT: successors: %bb.27(0x40000000), %bb.28(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr56_sgpr57, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc
- ; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr40_sgpr41, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr48_sgpr49, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A-NEXT: renamable $sgpr4_sgpr5 = S_XOR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
; GFX90A-NEXT: S_CBRANCH_EXECZ %bb.28, implicit $exec
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.27.bb33:
; GFX90A-NEXT: successors: %bb.28(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr56_sgpr57, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr46_sgpr47, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
- ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_OR_B64 killed renamable $sgpr56_sgpr57, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_OR_B64 killed renamable $sgpr68_sgpr69, $exec, implicit-def dead $scc
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.28.Flow42:
; GFX90A-NEXT: successors: %bb.34(0x40000000), %bb.29(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr56_sgpr57, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr46_sgpr47, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc
- ; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr38_sgpr39, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr46_sgpr47, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A-NEXT: renamable $sgpr4_sgpr5 = S_XOR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.34, implicit $exec
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.29.Flow43:
; GFX90A-NEXT: successors: %bb.30(0x40000000), %bb.31(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr56_sgpr57, $vgpr40_vgpr41:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc
; GFX90A-NEXT: $vcc = S_ANDN2_B64 $exec, killed renamable $sgpr34_sgpr35, implicit-def dead $scc
@@ -325,17 +325,17 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.30.bb19:
; GFX90A-NEXT: successors: %bb.31(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr56_sgpr57, $vgpr40_vgpr41:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
- ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_OR_B64 killed renamable $sgpr56_sgpr57, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_OR_B64 killed renamable $sgpr68_sgpr69, $exec, implicit-def dead $scc
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.31.Flow44:
; GFX90A-NEXT: successors: %bb.32(0x40000000), %bb.33(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr56_sgpr57, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr68_sgpr69, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr56_sgpr57, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr68_sgpr69, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A-NEXT: S_CBRANCH_EXECZ %bb.33, implicit $exec
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.32.UnifiedUnreachableBlock:
@@ -351,32 +351,32 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.34.bb26:
; GFX90A-NEXT: successors: %bb.29(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr56_sgpr57, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
- ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_OR_B64 killed renamable $sgpr56_sgpr57, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_OR_B64 killed renamable $sgpr68_sgpr69, $exec, implicit-def dead $scc
; GFX90A-NEXT: S_BRANCH %bb.29
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.35.bb20:
; GFX90A-NEXT: successors: %bb.37(0x40000000), %bb.36(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr33, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr18_sgpr19
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr33, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr44_sgpr45:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr18_sgpr19
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr0 = GLOBAL_LOAD_SBYTE renamable $vgpr40_vgpr41, 1024, 0, implicit $exec :: (load (s8) from %ir.i21, addrspace 1)
; GFX90A-NEXT: renamable $vgpr42 = V_ADD_CO_U32_e32 1024, $vgpr40, implicit-def $vcc, implicit $exec
; GFX90A-NEXT: renamable $sgpr34_sgpr35 = S_MOV_B64 0
- ; GFX90A-NEXT: renamable $sgpr38_sgpr39 = S_MOV_B64 -1
- ; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_MOV_B64 0
- ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_MOV_B64 0
+ ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_MOV_B64 -1
+ ; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_MOV_B64 0
+ ; GFX90A-NEXT: renamable $sgpr40_sgpr41 = S_MOV_B64 0
+ ; GFX90A-NEXT: renamable $sgpr38_sgpr39 = S_MOV_B64 0
+ ; GFX90A-NEXT: renamable $sgpr66_sgpr67 = S_MOV_B64 0
+ ; GFX90A-NEXT: renamable $sgpr64_sgpr65 = S_MOV_B64 0
+ ; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_MOV_B64 0
; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_MOV_B64 0
; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_MOV_B64 0
- ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_MOV_B64 0
- ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_MOV_B64 0
- ; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_MOV_B64 0
- ; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_MOV_B64 0
; GFX90A-NEXT: renamable $vgpr43, dead renamable $vcc = V_ADDC_U32_e64 0, $vgpr41, killed $vcc, 0, implicit $exec
; GFX90A-NEXT: renamable $vcc = V_CMP_LT_I16_e64 0, killed $vgpr0, implicit $exec
- ; GFX90A-NEXT: renamable $sgpr40_sgpr41 = S_MOV_B64 0
+ ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_MOV_B64 0
; GFX90A-NEXT: renamable $vgpr8_vgpr9 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr6_vgpr7 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr4_vgpr5 = IMPLICIT_DEF
@@ -400,19 +400,22 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.36.Flow21:
; GFX90A-NEXT: successors: %bb.6(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr24_sgpr25, implicit-def $scc
; GFX90A-NEXT: S_BRANCH %bb.6
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.37.bb27:
; GFX90A-NEXT: successors: %bb.39(0x40000000), %bb.38(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr33, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr18_sgpr19, $sgpr58_sgpr59, $sgpr54_sgpr55, $sgpr52_sgpr53, $sgpr50_sgpr51, $sgpr48_sgpr49, $sgpr46_sgpr47, $sgpr44_sgpr45, $sgpr42_sgpr43
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr33, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr44_sgpr45:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr18_sgpr19, $sgpr42_sgpr43, $sgpr64_sgpr65, $sgpr62_sgpr63, $sgpr52_sgpr53, $sgpr66_sgpr67, $sgpr48_sgpr49, $sgpr50_sgpr51
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr0 = GLOBAL_LOAD_UBYTE renamable $vgpr40_vgpr41, 2048, 0, implicit $exec :: (load (s8) from %ir.i28, addrspace 1)
; GFX90A-NEXT: renamable $vgpr44 = V_ADD_CO_U32_e32 2048, $vgpr40, implicit-def $vcc, implicit $exec
; GFX90A-NEXT: renamable $sgpr40_sgpr41 = S_MOV_B64 -1
- ; GFX90A-NEXT: renamable $sgpr60_sgpr61 = COPY renamable $sgpr36_sgpr37
+ ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = COPY renamable $sgpr36_sgpr37
+ ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_MOV_B64 0
+ ; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_MOV_B64 0
+ ; GFX90A-NEXT: renamable $sgpr60_sgpr61 = S_MOV_B64 0
; GFX90A-NEXT: renamable $vgpr45, dead renamable $vcc = V_ADDC_U32_e64 0, $vgpr41, killed $vcc, 0, implicit $exec
; GFX90A-NEXT: renamable $vcc = V_CMP_EQ_U16_e64 0, killed $vgpr0, implicit $exec
; GFX90A-NEXT: renamable $vgpr8_vgpr9 = IMPLICIT_DEF
@@ -437,33 +440,34 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.38.Flow22:
; GFX90A-NEXT: successors: %bb.36(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr38_sgpr39, implicit-def $scc
- ; GFX90A-NEXT: renamable $sgpr38_sgpr39 = S_XOR_B64 $exec, -1, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr40_sgpr41 = S_AND_B64 killed renamable $sgpr40_sgpr41, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_AND_B64 killed renamable $sgpr42_sgpr43, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_AND_B64 killed renamable $sgpr44_sgpr45, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_AND_B64 killed renamable $sgpr46_sgpr47, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_AND_B64 killed renamable $sgpr48_sgpr49, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_AND_B64 killed renamable $sgpr50_sgpr51, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_XOR_B64 $exec, -1, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_AND_B64 killed renamable $sgpr40_sgpr41, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_AND_B64 killed renamable $sgpr66_sgpr67, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_AND_B64 killed renamable $sgpr52_sgpr53, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_AND_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_AND_B64 killed renamable $sgpr58_sgpr59, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_AND_B64 killed renamable $sgpr62_sgpr63, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr64_sgpr65 = S_AND_B64 killed renamable $sgpr64_sgpr65, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr66_sgpr67 = S_AND_B64 killed renamable $sgpr60_sgpr61, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr38_sgpr39 = S_AND_B64 killed renamable $sgpr58_sgpr59, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr40_sgpr41 = S_AND_B64 killed renamable $sgpr56_sgpr57, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_AND_B64 killed renamable $sgpr42_sgpr43, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr18_sgpr19 = S_AND_B64 killed renamable $sgpr18_sgpr19, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr36_sgpr37 = S_ANDN2_B64 killed renamable $sgpr36_sgpr37, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_AND_B64 killed renamable $sgpr60_sgpr61, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr36_sgpr37 = S_OR_B64 killed renamable $sgpr36_sgpr37, killed renamable $sgpr56_sgpr57, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_AND_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr36_sgpr37 = S_OR_B64 killed renamable $sgpr36_sgpr37, killed renamable $sgpr44_sgpr45, implicit-def dead $scc
; GFX90A-NEXT: S_BRANCH %bb.36
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.39.bb34:
; GFX90A-NEXT: successors: %bb.41(0x40000000), %bb.40(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr33, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr18_sgpr19, $sgpr58_sgpr59, $sgpr54_sgpr55, $sgpr52_sgpr53, $sgpr50_sgpr51, $sgpr48_sgpr49, $sgpr46_sgpr47, $sgpr44_sgpr45
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr33, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr44_sgpr45:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr18_sgpr19, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr0 = GLOBAL_LOAD_UBYTE renamable $vgpr40_vgpr41, 3072, 0, implicit $exec :: (load (s8) from %ir.i35, addrspace 1)
; GFX90A-NEXT: renamable $vgpr56 = V_ADD_CO_U32_e32 3072, $vgpr40, implicit-def $vcc, implicit $exec
; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_MOV_B64 -1
- ; GFX90A-NEXT: renamable $sgpr60_sgpr61 = COPY renamable $sgpr36_sgpr37
+ ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = COPY renamable $sgpr36_sgpr37
+ ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_MOV_B64 0
; GFX90A-NEXT: renamable $vgpr57, dead renamable $vcc = V_ADDC_U32_e64 0, $vgpr41, killed $vcc, 0, implicit $exec
; GFX90A-NEXT: renamable $vcc = V_CMP_EQ_U16_e64 0, killed $vgpr0, implicit $exec
; GFX90A-NEXT: renamable $vgpr8_vgpr9 = IMPLICIT_DEF
@@ -487,38 +491,37 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.40.Flow23:
; GFX90A-NEXT: successors: %bb.38(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr40_sgpr41, implicit-def $scc
; GFX90A-NEXT: renamable $sgpr40_sgpr41 = S_XOR_B64 $exec, -1, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_AND_B64 killed renamable $sgpr42_sgpr43, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_AND_B64 killed renamable $sgpr44_sgpr45, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_AND_B64 killed renamable $sgpr46_sgpr47, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_AND_B64 killed renamable $sgpr48_sgpr49, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_AND_B64 killed renamable $sgpr50_sgpr51, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_AND_B64 killed renamable $sgpr52_sgpr53, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_AND_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_AND_B64 killed renamable $sgpr58_sgpr59, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr66_sgpr67 = S_AND_B64 killed renamable $sgpr42_sgpr43, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_AND_B64 killed renamable $sgpr62_sgpr63, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_AND_B64 killed renamable $sgpr60_sgpr61, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr64_sgpr65 = S_AND_B64 killed renamable $sgpr58_sgpr59, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr60_sgpr61 = S_AND_B64 killed renamable $sgpr56_sgpr57, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_AND_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_AND_B64 killed renamable $sgpr50_sgpr51, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_AND_B64 killed renamable $sgpr48_sgpr49, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr18_sgpr19 = S_AND_B64 killed renamable $sgpr18_sgpr19, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_ANDN2_B64 renamable $sgpr36_sgpr37, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr60_sgpr61 = S_AND_B64 killed renamable $sgpr60_sgpr61, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr60_sgpr61 = S_OR_B64 killed renamable $sgpr56_sgpr57, killed renamable $sgpr60_sgpr61, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_ANDN2_B64 renamable $sgpr36_sgpr37, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_AND_B64 killed renamable $sgpr46_sgpr47, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_OR_B64 killed renamable $sgpr44_sgpr45, killed renamable $sgpr46_sgpr47, implicit-def dead $scc
; GFX90A-NEXT: S_BRANCH %bb.38
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.41.bb41:
; GFX90A-NEXT: successors: %bb.46(0x40000000), %bb.42(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr33, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr58_sgpr59, $sgpr54_sgpr55, $sgpr52_sgpr53, $sgpr50_sgpr51, $sgpr48_sgpr49, $sgpr46_sgpr47
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr33, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr44_sgpr45:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr64_sgpr65, $sgpr60_sgpr61, $sgpr66_sgpr67
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr58 = V_ADD_CO_U32_e32 4096, $vgpr40, implicit-def $vcc, implicit $exec
; GFX90A-NEXT: renamable $sgpr18_sgpr19 = COPY $vcc
; GFX90A-NEXT: renamable $vgpr59, dead renamable $sgpr18_sgpr19 = V_ADDC_U32_e64 0, $vgpr41, killed $sgpr18_sgpr19, 0, implicit $exec
; GFX90A-NEXT: renamable $vgpr0 = GLOBAL_LOAD_UBYTE renamable $vgpr58_vgpr59, 0, 0, implicit $exec :: (load (s8) from %ir.i42, addrspace 1)
; GFX90A-NEXT: renamable $sgpr18_sgpr19 = S_MOV_B64 0
- ; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_MOV_B64 -1
- ; GFX90A-NEXT: renamable $sgpr60_sgpr61 = COPY renamable $sgpr36_sgpr37
+ ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_MOV_B64 -1
+ ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = COPY renamable $sgpr36_sgpr37
; GFX90A-NEXT: renamable $vgpr18, dead renamable $vcc = V_ADDC_U32_e64 0, $vgpr41, killed $vcc, 0, implicit $exec
; GFX90A-NEXT: renamable $vcc = V_CMP_EQ_U16_e64 0, killed $vgpr0, implicit $exec
- ; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_MOV_B64 0
; GFX90A-NEXT: renamable $vgpr8_vgpr9 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr6_vgpr7 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr4_vgpr5 = IMPLICIT_DEF
@@ -539,41 +542,41 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.42.Flow24:
; GFX90A-NEXT: successors: %bb.40(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr18, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr18, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr42_sgpr43, implicit-def $scc
; GFX90A-NEXT: renamable $vgpr59 = COPY killed renamable $vgpr18, implicit $exec
; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_XOR_B64 $exec, -1, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_AND_B64 killed renamable $sgpr44_sgpr45, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_AND_B64 killed renamable $sgpr62_sgpr63, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_AND_B64 killed renamable $sgpr48_sgpr49, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_AND_B64 killed renamable $sgpr50_sgpr51, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_AND_B64 killed renamable $sgpr52_sgpr53, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_AND_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_AND_B64 killed renamable $sgpr46_sgpr47, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr60_sgpr61 = S_AND_B64 killed renamable $sgpr64_sgpr65, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_AND_B64 killed renamable $sgpr58_sgpr59, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_AND_B64 killed renamable $sgpr56_sgpr57, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_AND_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_AND_B64 killed renamable $sgpr50_sgpr51, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_AND_B64 killed renamable $sgpr48_sgpr49, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr18_sgpr19 = S_AND_B64 killed renamable $sgpr18_sgpr19, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_ANDN2_B64 renamable $sgpr36_sgpr37, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr60_sgpr61 = S_AND_B64 killed renamable $sgpr60_sgpr61, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr60_sgpr61 = S_OR_B64 killed renamable $sgpr56_sgpr57, killed renamable $sgpr60_sgpr61, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_ANDN2_B64 renamable $sgpr36_sgpr37, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_AND_B64 killed renamable $sgpr52_sgpr53, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_OR_B64 killed renamable $sgpr44_sgpr45, killed renamable $sgpr46_sgpr47, implicit-def dead $scc
; GFX90A-NEXT: S_BRANCH %bb.40
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.43.bb55:
; GFX90A-NEXT: successors: %bb.48(0x40000000), %bb.44(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr33, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr44_sgpr45, $sgpr52_sgpr53, $sgpr58_sgpr59, $sgpr54_sgpr55, $sgpr46_sgpr47
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr33, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr46_sgpr47, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr56_sgpr57, $sgpr48_sgpr49
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: S_BITCMP1_B32 killed renamable $sgpr33, 16, implicit-def $scc
; GFX90A-NEXT: renamable $sgpr64_sgpr65 = S_CSELECT_B64 -1, 0, implicit killed $scc
- ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_XOR_B64 renamable $sgpr64_sgpr65, -1, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_XOR_B64 renamable $sgpr64_sgpr65, -1, implicit-def dead $scc
; GFX90A-NEXT: renamable $vgpr62 = V_ADD_CO_U32_e32 6144, $vgpr40, implicit-def $vcc, implicit $exec
; GFX90A-NEXT: renamable $vgpr63, dead renamable $vcc = V_ADDC_U32_e64 0, $vgpr41, killed $vcc, 0, implicit $exec
- ; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, renamable $sgpr48_sgpr49, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, renamable $sgpr50_sgpr51, implicit-def dead $scc
; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.48, implicit $vcc
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.44:
; GFX90A-NEXT: successors: %bb.45(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr56, $vgpr47, $vgpr18, $vgpr30, $vgpr31, $vgpr58, $vgpr61, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8, $sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $vgpr57, $vgpr63, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr20_sgpr21_sgpr22, $sgpr22_sgpr23, $sgpr24_sgpr25_sgpr26, $sgpr26_sgpr27, $vgpr46, $vgpr45, $vgpr2, $vgpr3, $vgpr44, $vgpr43, $vgpr42, $vgpr41, $vgpr40, $vgpr60, $vgpr62
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr56, $vgpr47, $vgpr18, $vgpr30, $vgpr31, $vgpr58, $vgpr61, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8, $sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $vgpr57, $vgpr63, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr20_sgpr21_sgpr22, $sgpr22_sgpr23, $sgpr24_sgpr25_sgpr26, $sgpr26_sgpr27, $vgpr46, $vgpr45, $vgpr2, $vgpr3, $vgpr44, $vgpr43, $vgpr42, $vgpr41, $vgpr40, $vgpr60, $vgpr62
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = COPY renamable $sgpr36_sgpr37
+ ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = COPY renamable $sgpr36_sgpr37
; GFX90A-NEXT: renamable $vgpr8_vgpr9 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr6_vgpr7 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr4_vgpr5 = IMPLICIT_DEF
@@ -587,36 +590,35 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
; GFX90A-NEXT: renamable $sgpr17 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_MOV_B64 0
+ ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_MOV_B64 0
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.45.Flow26:
; GFX90A-NEXT: successors: %bb.47(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr18, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
- ; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: renamable $sgpr60_sgpr61 = S_XOR_B64 $exec, -1, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr70_sgpr71 = S_AND_B64 killed renamable $sgpr44_sgpr45, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_AND_B64 killed renamable $sgpr46_sgpr47, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr66_sgpr67 = S_AND_B64 killed renamable $sgpr48_sgpr49, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_AND_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_AND_B64 killed renamable $sgpr58_sgpr59, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_AND_B64 killed renamable $sgpr52_sgpr53, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr18, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_XOR_B64 $exec, -1, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr70_sgpr71 = S_AND_B64 killed renamable $sgpr46_sgpr47, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_AND_B64 killed renamable $sgpr48_sgpr49, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr66_sgpr67 = S_AND_B64 killed renamable $sgpr50_sgpr51, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_AND_B64 killed renamable $sgpr56_sgpr57, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_AND_B64 killed renamable $sgpr58_sgpr59, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr60_sgpr61 = S_AND_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_ANDN2_B64 renamable $sgpr36_sgpr37, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_AND_B64 killed renamable $sgpr50_sgpr51, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr64_sgpr65 = S_OR_B64 killed renamable $sgpr44_sgpr45, killed renamable $sgpr48_sgpr49, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_AND_B64 killed renamable $sgpr52_sgpr53, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_OR_B64 killed renamable $sgpr44_sgpr45, killed renamable $sgpr46_sgpr47, implicit-def dead $scc
; GFX90A-NEXT: S_BRANCH %bb.47
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.46.bb48:
; GFX90A-NEXT: successors: %bb.43(0x40000000), %bb.47(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr33, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr46_sgpr47, $sgpr58_sgpr59, $sgpr54_sgpr55, $sgpr44_sgpr45, $sgpr52_sgpr53
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr33, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr60_sgpr61, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr66_sgpr67, $sgpr46_sgpr47, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr56_sgpr57
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr60 = V_ADD_CO_U32_e32 5120, $vgpr40, implicit-def $vcc, implicit $exec
; GFX90A-NEXT: renamable $sgpr18_sgpr19 = COPY $vcc
; GFX90A-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 4096, $vgpr40, implicit-def $vcc, implicit $exec
; GFX90A-NEXT: renamable $vgpr1, dead renamable $vcc = V_ADDC_U32_e64 0, $vgpr41, killed $vcc, 0, implicit $exec
; GFX90A-NEXT: renamable $vgpr0 = GLOBAL_LOAD_UBYTE killed renamable $vgpr0_vgpr1, 1024, 0, implicit $exec :: (load (s8) from %ir.i49, addrspace 1)
- ; GFX90A-NEXT: renamable $sgpr60_sgpr61 = S_MOV_B64 -1
- ; GFX90A-NEXT: renamable $sgpr64_sgpr65 = COPY renamable $sgpr36_sgpr37
- ; GFX90A-NEXT: renamable $sgpr66_sgpr67 = S_MOV_B64 0
+ ; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_MOV_B64 -1
+ ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = COPY renamable $sgpr36_sgpr37
; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_MOV_B64 0
; GFX90A-NEXT: renamable $vgpr61, dead renamable $vcc = V_ADDC_U32_e64 0, $vgpr41, killed $sgpr18_sgpr19, 0, implicit $exec
; GFX90A-NEXT: renamable $vcc = V_CMP_EQ_U16_e64 0, killed $vgpr0, implicit $exec
@@ -640,51 +642,51 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.47.Flow25:
; GFX90A-NEXT: successors: %bb.42(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr18, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $sgpr70_sgpr71, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr18, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr66_sgpr67, $sgpr68_sgpr69, $sgpr70_sgpr71, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr18_sgpr19, implicit-def $scc
- ; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_XOR_B64 $exec, -1, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_AND_B64 killed renamable $sgpr60_sgpr61, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_AND_B64 killed renamable $sgpr70_sgpr71, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_AND_B64 killed renamable $sgpr68_sgpr69, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_AND_B64 killed renamable $sgpr66_sgpr67, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_AND_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_AND_B64 killed renamable $sgpr58_sgpr59, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr18_sgpr19 = S_AND_B64 killed renamable $sgpr46_sgpr47, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_ANDN2_B64 renamable $sgpr36_sgpr37, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_AND_B64 killed renamable $sgpr64_sgpr65, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr60_sgpr61 = S_OR_B64 killed renamable $sgpr46_sgpr47, killed renamable $sgpr56_sgpr57, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_XOR_B64 $exec, -1, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr64_sgpr65 = S_AND_B64 killed renamable $sgpr62_sgpr63, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_AND_B64 killed renamable $sgpr70_sgpr71, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_AND_B64 killed renamable $sgpr68_sgpr69, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_AND_B64 killed renamable $sgpr66_sgpr67, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_AND_B64 killed renamable $sgpr50_sgpr51, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_AND_B64 killed renamable $sgpr48_sgpr49, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr18_sgpr19 = S_AND_B64 killed renamable $sgpr60_sgpr61, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_ANDN2_B64 renamable $sgpr36_sgpr37, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_AND_B64 killed renamable $sgpr52_sgpr53, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_OR_B64 killed renamable $sgpr44_sgpr45, killed renamable $sgpr52_sgpr53, implicit-def dead $scc
; GFX90A-NEXT: S_BRANCH %bb.42
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.48.bb63:
; GFX90A-NEXT: successors: %bb.50(0x40000000), %bb.49(0x40000000)
- ; GFX90A-NEXT: liveins: $vcc, $sgpr14, $sgpr15, $sgpr16, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr48_sgpr49, $sgpr56_sgpr57:0x000000000000000F, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr52_sgpr53, $sgpr58_sgpr59, $sgpr54_sgpr55, $sgpr46_sgpr47
+ ; GFX90A-NEXT: liveins: $vcc, $sgpr14, $sgpr15, $sgpr16, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45:0x000000000000000F, $sgpr50_sgpr51, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr56_sgpr57, $sgpr48_sgpr49
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_MOV_B64 0
+ ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_MOV_B64 0
; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.50, implicit $vcc
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.49:
; GFX90A-NEXT: successors: %bb.44(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr52_sgpr53, $sgpr58_sgpr59, $sgpr54_sgpr55
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr56_sgpr57
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_MOV_B64 -1
+ ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_MOV_B64 -1
; GFX90A-NEXT: S_BRANCH %bb.44
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.50.bb68:
; GFX90A-NEXT: successors: %bb.54(0x40000000), %bb.51(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr56_sgpr57:0x000000000000000F, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr46_sgpr47, $sgpr52_sgpr53, $sgpr58_sgpr59, $sgpr54_sgpr55
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45:0x000000000000000F, $sgpr46_sgpr47, $sgpr50_sgpr51, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr56_sgpr57
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr0 = nuw nsw V_LSHLREV_B32_e32 3, $vgpr30, implicit $exec
; GFX90A-NEXT: renamable $vgpr1 = V_MOV_B32_e32 0, implicit $exec
- ; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, killed renamable $sgpr48_sgpr49, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, killed renamable $sgpr50_sgpr51, implicit-def dead $scc
; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.54, implicit $vcc
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.51:
; GFX90A-NEXT: successors: %bb.45(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr52_sgpr53, $sgpr58_sgpr59, $sgpr54_sgpr55
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr56_sgpr57
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_MOV_B64 -1
- ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = COPY renamable $sgpr36_sgpr37
+ ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_MOV_B64 -1
+ ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = COPY renamable $sgpr36_sgpr37
; GFX90A-NEXT: renamable $vgpr8_vgpr9 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr6_vgpr7 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr4_vgpr5 = IMPLICIT_DEF
@@ -701,20 +703,20 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.52.bb80:
; GFX90A-NEXT: successors: %bb.59(0x40000000), %bb.53(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr56_sgpr57:0x000000000000000F, $sgpr60_sgpr61, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45:0x000000000000000F, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr60_sgpr61, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr17 = S_BFE_U32 renamable $sgpr20, 65560, implicit-def dead $scc
; GFX90A-NEXT: S_CMP_EQ_U32 killed renamable $sgpr17, 0, implicit-def $scc
; GFX90A-NEXT: renamable $vgpr6 = V_ADD_CO_U32_e32 4096, $vgpr0, implicit-def $vcc, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr7, dead renamable $sgpr50_sgpr51 = V_ADDC_U32_e64 0, 0, killed $vcc, 0, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr7, dead renamable $sgpr52_sgpr53 = V_ADDC_U32_e64 0, 0, killed $vcc, 0, implicit $exec
; GFX90A-NEXT: S_CBRANCH_SCC1 %bb.59, implicit killed $scc
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.53:
; GFX90A-NEXT: successors: %bb.61(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr60_sgpr61, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr60_sgpr61, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_MOV_B64 0
- ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_MOV_B64 -1
+ ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_MOV_B64 0
+ ; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_MOV_B64 -1
; GFX90A-NEXT: renamable $sgpr62_sgpr63 = COPY renamable $sgpr36_sgpr37
; GFX90A-NEXT: renamable $vgpr8_vgpr9 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr17 = IMPLICIT_DEF
@@ -730,13 +732,13 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.54.bb73:
; GFX90A-NEXT: successors: %bb.52(0x40000000), %bb.55(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr56_sgpr57:0x000000000000000F, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr52_sgpr53
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45:0x000000000000000F, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr6 = GLOBAL_LOAD_UBYTE renamable $vgpr0_vgpr1, 2048, 0, implicit $exec :: (load (s8) from %ir.i74, addrspace 1)
; GFX90A-NEXT: renamable $vgpr4 = V_ADD_CO_U32_e32 2048, $vgpr0, implicit-def $vcc, implicit $exec
- ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_MOV_B64 0
- ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_MOV_B64 -1
- ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = COPY renamable $sgpr36_sgpr37
+ ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_MOV_B64 0
+ ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_MOV_B64 -1
+ ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = COPY renamable $sgpr36_sgpr37
; GFX90A-NEXT: renamable $vgpr5, dead renamable $sgpr58_sgpr59 = V_ADDC_U32_e64 0, 0, killed $vcc, 0, implicit $exec
; GFX90A-NEXT: renamable $vcc = V_CMP_EQ_U16_e64 0, killed $vgpr6, implicit $exec
; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_MOV_B64 0
@@ -756,14 +758,14 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.55.Flow29:
; GFX90A-NEXT: successors: %bb.45(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr18, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr18, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr60_sgpr61, implicit-def $scc
; GFX90A-NEXT: S_BRANCH %bb.45
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.56.bb90:
; GFX90A-NEXT: successors: %bb.60(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr52_sgpr53, $sgpr56_sgpr57:0x000000000000000F, $sgpr60_sgpr61, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45:0x000000000000000F, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr53 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed $sgpr64_sgpr65, implicit $exec
; GFX90A-NEXT: renamable $vgpr10 = V_MOV_B32_e32 0, implicit $exec
@@ -772,12 +774,12 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $vgpr16_vgpr17 = DS_READ_B64_gfx9 killed renamable $vgpr10, 0, 0, implicit $exec :: (load (s64) from %ir.7, addrspace 3)
; GFX90A-NEXT: renamable $vgpr10 = COPY renamable $sgpr22, implicit $exec
; GFX90A-NEXT: renamable $vgpr12_vgpr13 = DS_READ_B64_gfx9 killed renamable $vgpr10, 0, 0, implicit $exec :: (load (s64) from %ir.8, addrspace 3)
- ; GFX90A-NEXT: renamable $vgpr10 = COPY renamable $sgpr56, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr11 = V_ALIGNBIT_B32_e64 killed $sgpr57, killed $vgpr10, 1, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr10 = COPY renamable $sgpr44, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr11 = V_ALIGNBIT_B32_e64 killed $sgpr45, killed $vgpr10, 1, implicit $exec
; GFX90A-NEXT: renamable $vgpr52 = V_ALIGNBIT_B32_e64 $vgpr17, $vgpr16, 1, implicit $exec
; GFX90A-NEXT: renamable $vgpr17 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr12_sgpr13, implicit $exec
; GFX90A-NEXT: renamable $vgpr15 = V_ALIGNBIT_B32_e64 $vgpr15, $vgpr14, 1, implicit $exec
- ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_XOR_B64 $exec, -1, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_XOR_B64 $exec, -1, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_OR_B64 renamable $sgpr36_sgpr37, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $vgpr10 = COPY renamable $vgpr14, implicit $exec
; GFX90A-NEXT: S_BRANCH %bb.60
@@ -788,16 +790,16 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr15 = COPY killed renamable $sgpr23, implicit $exec
; GFX90A-NEXT: renamable $vgpr17 = COPY killed renamable $sgpr17, implicit $exec
- ; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_MOV_B64 0
- ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_MOV_B64 0
+ ; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_MOV_B64 0
+ ; GFX90A-NEXT: renamable $sgpr40_sgpr41 = S_MOV_B64 0
+ ; GFX90A-NEXT: renamable $sgpr38_sgpr39 = S_MOV_B64 0
+ ; GFX90A-NEXT: renamable $sgpr66_sgpr67 = S_MOV_B64 0
+ ; GFX90A-NEXT: renamable $sgpr64_sgpr65 = S_MOV_B64 0
+ ; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_MOV_B64 0
; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_MOV_B64 0
; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_MOV_B64 0
; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_MOV_B64 0
; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_MOV_B64 0
- ; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_MOV_B64 0
- ; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_MOV_B64 0
- ; GFX90A-NEXT: renamable $sgpr40_sgpr41 = S_MOV_B64 0
- ; GFX90A-NEXT: renamable $sgpr38_sgpr39 = S_MOV_B64 0
; GFX90A-NEXT: renamable $vgpr8_vgpr9 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr6_vgpr7 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr4_vgpr5 = IMPLICIT_DEF
@@ -821,7 +823,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.58.bb105:
; GFX90A-NEXT: successors: %bb.3(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x00000000000000FF, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr2_vgpr3:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr44_sgpr45:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x00000000000000FF, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr2_vgpr3:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; GFX90A-NEXT: renamable $vgpr22_vgpr23 = DS_READ_B64_gfx9 killed renamable $vgpr0, 0, 0, implicit $exec :: (load (s64) from `ptr addrspace(3) null`, addrspace 3)
@@ -840,13 +842,13 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.59.bb85:
; GFX90A-NEXT: successors: %bb.56(0x40000000), %bb.60(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr56_sgpr57:0x000000000000000F, $sgpr60_sgpr61, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45:0x000000000000000F, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr60_sgpr61, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr8 = V_OR_B32_e32 1, $vgpr6, implicit $exec
; GFX90A-NEXT: renamable $vgpr9 = COPY renamable $vgpr7, implicit $exec
; GFX90A-NEXT: renamable $vgpr10 = FLAT_LOAD_UBYTE renamable $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s8) from %ir.i86)
; GFX90A-NEXT: renamable $sgpr17 = S_MOV_B32 0
- ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_MOV_B64 -1
+ ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_MOV_B64 -1
; GFX90A-NEXT: renamable $vcc = V_CMP_EQ_U16_e64 0, killed $vgpr10, implicit $exec
; GFX90A-NEXT: renamable $sgpr62_sgpr63 = COPY renamable $sgpr36_sgpr37
; GFX90A-NEXT: renamable $vgpr17 = IMPLICIT_DEF
@@ -857,31 +859,31 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
- ; GFX90A-NEXT: $sgpr52_sgpr53 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX90A-NEXT: $sgpr54_sgpr55 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.56, implicit $exec
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.60.Flow31:
; GFX90A-NEXT: successors: %bb.61(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr18, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr18, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr52_sgpr53, implicit-def $scc
- ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_MOV_B64 0
+ ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr54_sgpr55, implicit-def $scc
+ ; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_MOV_B64 0
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.61.Flow30:
; GFX90A-NEXT: successors: %bb.55(0x80000000)
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr18, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_XOR_B64 $exec, -1, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_AND_B64 killed renamable $sgpr52_sgpr53, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_AND_B64 killed renamable $sgpr50_sgpr51, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_ANDN2_B64 renamable $sgpr36_sgpr37, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_AND_B64 killed renamable $sgpr62_sgpr63, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_OR_B64 killed renamable $sgpr50_sgpr51, killed renamable $sgpr56_sgpr57, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_XOR_B64 $exec, -1, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_AND_B64 killed renamable $sgpr44_sgpr45, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_AND_B64 killed renamable $sgpr52_sgpr53, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_ANDN2_B64 renamable $sgpr36_sgpr37, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_AND_B64 killed renamable $sgpr62_sgpr63, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_OR_B64 killed renamable $sgpr44_sgpr45, killed renamable $sgpr52_sgpr53, implicit-def dead $scc
; GFX90A-NEXT: S_BRANCH %bb.55
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.62.bb140:
; GFX90A-NEXT: successors: %bb.68(0x40000000), %bb.63(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr36_sgpr37 = S_MOV_B64 -1
; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, killed renamable $sgpr30_sgpr31, implicit-def dead $scc
@@ -889,14 +891,14 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.63.Flow13:
; GFX90A-NEXT: successors: %bb.64(0x40000000), %bb.66(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $vcc = S_ANDN2_B64 $exec, killed renamable $sgpr36_sgpr37, implicit-def dead $scc
; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.66, implicit $vcc
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.64.bb159:
; GFX90A-NEXT: successors: %bb.67(0x40000000), %bb.65(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vcc = V_CMP_NE_U32_e64 0, killed $vgpr30, implicit $exec
; GFX90A-NEXT: $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
@@ -905,21 +907,21 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.65.Flow10:
; GFX90A-NEXT: successors: %bb.66(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $sgpr12_sgpr13 = S_ANDN2_SAVEEXEC_B64 $sgpr12_sgpr13, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def $scc
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.66.Flow14:
; GFX90A-NEXT: successors: %bb.8(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = COPY $exec
+ ; GFX90A-NEXT: renamable $sgpr68_sgpr69 = COPY $exec
; GFX90A-NEXT: S_BRANCH %bb.8
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.67.bb161:
; GFX90A-NEXT: successors: %bb.65(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr2 = V_OR_B32_e32 killed $vgpr21, killed $vgpr23, implicit $exec
; GFX90A-NEXT: renamable $vgpr2 = V_OR_B32_e32 killed $vgpr2, killed $vgpr25, implicit $exec
@@ -938,7 +940,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.68.bb174:
; GFX90A-NEXT: successors: %bb.72(0x40000000), %bb.69(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr26 = V_OR_B32_e32 1, $vgpr24, implicit $exec
; GFX90A-NEXT: renamable $vgpr48 = V_OR_B32_e32 $vgpr26, $vgpr22, implicit $exec
@@ -954,14 +956,14 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.69.Flow:
; GFX90A-NEXT: successors: %bb.70(0x40000000), %bb.71(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x0000000000000003, $vgpr28_vgpr29:0x0000000000000003, $vgpr32_vgpr33:0x0000000000000003, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x0000000000000003, $vgpr28_vgpr29:0x0000000000000003, $vgpr32_vgpr33:0x0000000000000003, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $vcc = S_ANDN2_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def dead $scc
; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.71, implicit $vcc
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.70.bb186:
; GFX90A-NEXT: successors: %bb.71(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x0000000000000003, $vgpr28_vgpr29:0x0000000000000003, $vgpr32_vgpr33:0x0000000000000003, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x0000000000000003, $vgpr28_vgpr29:0x0000000000000003, $vgpr32_vgpr33:0x0000000000000003, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr2_vgpr3 = V_LSHLREV_B64_e64 3, killed $vgpr2_vgpr3, implicit $exec
; GFX90A-NEXT: renamable $vgpr10 = COPY renamable $sgpr27, implicit $exec
@@ -990,14 +992,14 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.71.Flow9:
; GFX90A-NEXT: successors: %bb.63(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr36_sgpr37 = S_MOV_B64 0
; GFX90A-NEXT: S_BRANCH %bb.63
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.72.bb196:
; GFX90A-NEXT: successors: %bb.69(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x0000000000000003, $vgpr28_vgpr29:0x0000000000000003, $vgpr32_vgpr33:0x0000000000000003, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x0000000000000003, $vgpr28_vgpr29:0x0000000000000003, $vgpr32_vgpr33:0x0000000000000003, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr10 = V_OR_B32_e32 $vgpr50, killed $vgpr16, implicit $exec
; GFX90A-NEXT: renamable $vgpr54 = V_OR_B32_e32 killed $vgpr10, killed $vgpr14, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll b/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll
index 05c2e0077f4ae..7c0c433ac3c51 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll
@@ -900,9 +900,8 @@ define void @spill_func(ptr addrspace(1) %arg) #0 {
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
-; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; CHECK-NEXT: s_mov_b64 exec, s[4:5]
-; CHECK-NEXT: s_waitcnt expcnt(1)
+; CHECK-NEXT: s_waitcnt expcnt(0)
; CHECK-NEXT: v_writelane_b32 v0, s30, 0
; CHECK-NEXT: v_writelane_b32 v0, s31, 1
; CHECK-NEXT: v_writelane_b32 v0, s33, 2
@@ -910,73 +909,40 @@ define void @spill_func(ptr addrspace(1) %arg) #0 {
; CHECK-NEXT: v_writelane_b32 v0, s35, 4
; CHECK-NEXT: v_writelane_b32 v0, s36, 5
; CHECK-NEXT: v_writelane_b32 v0, s37, 6
-; CHECK-NEXT: v_writelane_b32 v0, s38, 7
-; CHECK-NEXT: v_writelane_b32 v0, s39, 8
-; CHECK-NEXT: v_writelane_b32 v0, s40, 9
-; CHECK-NEXT: v_writelane_b32 v0, s41, 10
-; CHECK-NEXT: v_writelane_b32 v0, s42, 11
-; CHECK-NEXT: v_writelane_b32 v0, s43, 12
-; CHECK-NEXT: v_writelane_b32 v0, s44, 13
-; CHECK-NEXT: v_writelane_b32 v0, s45, 14
-; CHECK-NEXT: v_writelane_b32 v0, s46, 15
-; CHECK-NEXT: v_writelane_b32 v0, s47, 16
-; CHECK-NEXT: v_writelane_b32 v0, s48, 17
-; CHECK-NEXT: v_writelane_b32 v0, s49, 18
-; CHECK-NEXT: v_writelane_b32 v0, s50, 19
-; CHECK-NEXT: v_writelane_b32 v0, s51, 20
-; CHECK-NEXT: v_writelane_b32 v0, s52, 21
-; CHECK-NEXT: v_writelane_b32 v0, s53, 22
-; CHECK-NEXT: v_writelane_b32 v0, s54, 23
-; CHECK-NEXT: v_writelane_b32 v0, s55, 24
-; CHECK-NEXT: v_writelane_b32 v0, s56, 25
-; CHECK-NEXT: v_writelane_b32 v0, s57, 26
-; CHECK-NEXT: v_writelane_b32 v0, s58, 27
-; CHECK-NEXT: v_writelane_b32 v0, s59, 28
-; CHECK-NEXT: v_writelane_b32 v0, s60, 29
-; CHECK-NEXT: v_writelane_b32 v0, s61, 30
-; CHECK-NEXT: v_writelane_b32 v0, s62, 31
-; CHECK-NEXT: v_writelane_b32 v0, s63, 32
-; CHECK-NEXT: v_writelane_b32 v0, s64, 33
-; CHECK-NEXT: v_writelane_b32 v0, s65, 34
-; CHECK-NEXT: v_writelane_b32 v0, s66, 35
-; CHECK-NEXT: v_writelane_b32 v0, s67, 36
-; CHECK-NEXT: v_writelane_b32 v0, s68, 37
-; CHECK-NEXT: v_writelane_b32 v0, s69, 38
-; CHECK-NEXT: v_writelane_b32 v0, s70, 39
-; CHECK-NEXT: v_writelane_b32 v0, s71, 40
-; CHECK-NEXT: v_writelane_b32 v0, s72, 41
-; CHECK-NEXT: v_writelane_b32 v0, s73, 42
-; CHECK-NEXT: v_writelane_b32 v0, s74, 43
-; CHECK-NEXT: v_writelane_b32 v0, s75, 44
-; CHECK-NEXT: v_writelane_b32 v0, s76, 45
-; CHECK-NEXT: v_writelane_b32 v0, s77, 46
-; CHECK-NEXT: v_writelane_b32 v0, s78, 47
-; CHECK-NEXT: v_writelane_b32 v0, s79, 48
-; CHECK-NEXT: v_writelane_b32 v0, s80, 49
-; CHECK-NEXT: v_writelane_b32 v0, s81, 50
-; CHECK-NEXT: v_writelane_b32 v0, s82, 51
-; CHECK-NEXT: v_writelane_b32 v0, s83, 52
-; CHECK-NEXT: v_writelane_b32 v0, s84, 53
-; CHECK-NEXT: v_writelane_b32 v0, s85, 54
-; CHECK-NEXT: v_writelane_b32 v0, s86, 55
-; CHECK-NEXT: v_writelane_b32 v0, s87, 56
-; CHECK-NEXT: v_writelane_b32 v0, s88, 57
-; CHECK-NEXT: s_waitcnt expcnt(0)
-; CHECK-NEXT: v_writelane_b32 v1, s95, 0
-; CHECK-NEXT: v_writelane_b32 v0, s89, 58
-; CHECK-NEXT: v_writelane_b32 v1, s96, 1
-; CHECK-NEXT: v_writelane_b32 v0, s90, 59
-; CHECK-NEXT: v_writelane_b32 v1, s97, 2
-; CHECK-NEXT: v_writelane_b32 v0, s91, 60
-; CHECK-NEXT: v_writelane_b32 v1, s98, 3
-; CHECK-NEXT: v_writelane_b32 v0, s92, 61
-; CHECK-NEXT: v_writelane_b32 v1, s99, 4
-; CHECK-NEXT: s_mov_b32 s31, s12
-; CHECK-NEXT: v_writelane_b32 v0, s93, 62
-; CHECK-NEXT: v_writelane_b32 v1, s100, 5
-; CHECK-NEXT: s_cmp_eq_u32 s31, 0
-; CHECK-NEXT: v_writelane_b32 v0, s94, 63
-; CHECK-NEXT: v_writelane_b32 v1, s101, 6
+; CHECK-NEXT: v_writelane_b32 v0, s46, 7
+; CHECK-NEXT: v_writelane_b32 v0, s47, 8
+; CHECK-NEXT: v_writelane_b32 v0, s48, 9
+; CHECK-NEXT: v_writelane_b32 v0, s49, 10
+; CHECK-NEXT: v_writelane_b32 v0, s50, 11
+; CHECK-NEXT: v_writelane_b32 v0, s51, 12
+; CHECK-NEXT: v_writelane_b32 v0, s52, 13
+; CHECK-NEXT: v_writelane_b32 v0, s53, 14
+; CHECK-NEXT: v_writelane_b32 v0, s62, 15
+; CHECK-NEXT: v_writelane_b32 v0, s63, 16
+; CHECK-NEXT: v_writelane_b32 v0, s64, 17
+; CHECK-NEXT: v_writelane_b32 v0, s65, 18
+; CHECK-NEXT: v_writelane_b32 v0, s66, 19
+; CHECK-NEXT: v_writelane_b32 v0, s67, 20
+; CHECK-NEXT: v_writelane_b32 v0, s68, 21
+; CHECK-NEXT: v_writelane_b32 v0, s69, 22
+; CHECK-NEXT: v_writelane_b32 v0, s78, 23
+; CHECK-NEXT: v_writelane_b32 v0, s79, 24
+; CHECK-NEXT: v_writelane_b32 v0, s80, 25
+; CHECK-NEXT: v_writelane_b32 v0, s81, 26
+; CHECK-NEXT: v_writelane_b32 v0, s82, 27
+; CHECK-NEXT: v_writelane_b32 v0, s83, 28
+; CHECK-NEXT: v_writelane_b32 v0, s84, 29
+; CHECK-NEXT: v_writelane_b32 v0, s85, 30
+; CHECK-NEXT: v_writelane_b32 v0, s94, 31
+; CHECK-NEXT: v_writelane_b32 v0, s95, 32
+; CHECK-NEXT: v_writelane_b32 v0, s96, 33
+; CHECK-NEXT: v_writelane_b32 v0, s97, 34
+; CHECK-NEXT: v_writelane_b32 v0, s98, 35
+; CHECK-NEXT: v_writelane_b32 v0, s99, 36
+; CHECK-NEXT: s_mov_b32 s38, s12
+; CHECK-NEXT: v_writelane_b32 v0, s100, 37
+; CHECK-NEXT: s_cmp_eq_u32 s38, 0
+; CHECK-NEXT: v_writelane_b32 v0, s101, 38
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: s_mov_b32 s0, 0
; CHECK-NEXT: ;;#ASMEND
@@ -1292,9 +1258,9 @@ define void @spill_func(ptr addrspace(1) %arg) #0 {
; CHECK-NEXT: s_cbranch_scc0 .LBB1_1
; CHECK-NEXT: ; %bb.3: ; %entry
; CHECK-NEXT: s_not_b64 exec, exec
-; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8
-; CHECK-NEXT: v_writelane_b32 v2, s0, 0
-; CHECK-NEXT: v_writelane_b32 v2, s1, 1
+; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4
+; CHECK-NEXT: v_writelane_b32 v1, s0, 0
+; CHECK-NEXT: v_writelane_b32 v1, s1, 1
; CHECK-NEXT: s_getpc_b64 s[0:1]
; CHECK-NEXT: .Lpost_getpc1:
; CHECK-NEXT: s_add_u32 s0, s0, (.LBB1_4-.Lpost_getpc1)&4294967295
@@ -1313,9 +1279,9 @@ define void @spill_func(ptr addrspace(1) %arg) #0 {
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: s_branch .LBB1_2
; CHECK-NEXT: .LBB1_4: ; %bb3
-; CHECK-NEXT: v_readlane_b32 s0, v2, 0
-; CHECK-NEXT: v_readlane_b32 s1, v2, 1
-; CHECK-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:8
+; CHECK-NEXT: v_readlane_b32 s0, v1, 0
+; CHECK-NEXT: v_readlane_b32 s1, v1, 1
+; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4
; CHECK-NEXT: s_not_b64 exec, exec
; CHECK-NEXT: .LBB1_2: ; %bb3
; CHECK-NEXT: ;;#ASMSTART
@@ -1630,70 +1596,38 @@ define void @spill_func(ptr addrspace(1) %arg) #0 {
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; reg use vcc_hi
; CHECK-NEXT: ;;#ASMEND
-; CHECK-NEXT: v_readlane_b32 s101, v1, 6
-; CHECK-NEXT: v_readlane_b32 s100, v1, 5
-; CHECK-NEXT: v_readlane_b32 s99, v1, 4
-; CHECK-NEXT: v_readlane_b32 s98, v1, 3
-; CHECK-NEXT: v_readlane_b32 s97, v1, 2
-; CHECK-NEXT: v_readlane_b32 s96, v1, 1
-; CHECK-NEXT: v_readlane_b32 s95, v1, 0
-; CHECK-NEXT: v_readlane_b32 s94, v0, 63
-; CHECK-NEXT: v_readlane_b32 s93, v0, 62
-; CHECK-NEXT: v_readlane_b32 s92, v0, 61
-; CHECK-NEXT: v_readlane_b32 s91, v0, 60
-; CHECK-NEXT: v_readlane_b32 s90, v0, 59
-; CHECK-NEXT: v_readlane_b32 s89, v0, 58
-; CHECK-NEXT: v_readlane_b32 s88, v0, 57
-; CHECK-NEXT: v_readlane_b32 s87, v0, 56
-; CHECK-NEXT: v_readlane_b32 s86, v0, 55
-; CHECK-NEXT: v_readlane_b32 s85, v0, 54
-; CHECK-NEXT: v_readlane_b32 s84, v0, 53
-; CHECK-NEXT: v_readlane_b32 s83, v0, 52
-; CHECK-NEXT: v_readlane_b32 s82, v0, 51
-; CHECK-NEXT: v_readlane_b32 s81, v0, 50
-; CHECK-NEXT: v_readlane_b32 s80, v0, 49
-; CHECK-NEXT: v_readlane_b32 s79, v0, 48
-; CHECK-NEXT: v_readlane_b32 s78, v0, 47
-; CHECK-NEXT: v_readlane_b32 s77, v0, 46
-; CHECK-NEXT: v_readlane_b32 s76, v0, 45
-; CHECK-NEXT: v_readlane_b32 s75, v0, 44
-; CHECK-NEXT: v_readlane_b32 s74, v0, 43
-; CHECK-NEXT: v_readlane_b32 s73, v0, 42
-; CHECK-NEXT: v_readlane_b32 s72, v0, 41
-; CHECK-NEXT: v_readlane_b32 s71, v0, 40
-; CHECK-NEXT: v_readlane_b32 s70, v0, 39
-; CHECK-NEXT: v_readlane_b32 s69, v0, 38
-; CHECK-NEXT: v_readlane_b32 s68, v0, 37
-; CHECK-NEXT: v_readlane_b32 s67, v0, 36
-; CHECK-NEXT: v_readlane_b32 s66, v0, 35
-; CHECK-NEXT: v_readlane_b32 s65, v0, 34
-; CHECK-NEXT: v_readlane_b32 s64, v0, 33
-; CHECK-NEXT: v_readlane_b32 s63, v0, 32
-; CHECK-NEXT: v_readlane_b32 s62, v0, 31
-; CHECK-NEXT: v_readlane_b32 s61, v0, 30
-; CHECK-NEXT: v_readlane_b32 s60, v0, 29
-; CHECK-NEXT: v_readlane_b32 s59, v0, 28
-; CHECK-NEXT: v_readlane_b32 s58, v0, 27
-; CHECK-NEXT: v_readlane_b32 s57, v0, 26
-; CHECK-NEXT: v_readlane_b32 s56, v0, 25
-; CHECK-NEXT: v_readlane_b32 s55, v0, 24
-; CHECK-NEXT: v_readlane_b32 s54, v0, 23
-; CHECK-NEXT: v_readlane_b32 s53, v0, 22
-; CHECK-NEXT: v_readlane_b32 s52, v0, 21
-; CHECK-NEXT: v_readlane_b32 s51, v0, 20
-; CHECK-NEXT: v_readlane_b32 s50, v0, 19
-; CHECK-NEXT: v_readlane_b32 s49, v0, 18
-; CHECK-NEXT: v_readlane_b32 s48, v0, 17
-; CHECK-NEXT: v_readlane_b32 s47, v0, 16
-; CHECK-NEXT: v_readlane_b32 s46, v0, 15
-; CHECK-NEXT: v_readlane_b32 s45, v0, 14
-; CHECK-NEXT: v_readlane_b32 s44, v0, 13
-; CHECK-NEXT: v_readlane_b32 s43, v0, 12
-; CHECK-NEXT: v_readlane_b32 s42, v0, 11
-; CHECK-NEXT: v_readlane_b32 s41, v0, 10
-; CHECK-NEXT: v_readlane_b32 s40, v0, 9
-; CHECK-NEXT: v_readlane_b32 s39, v0, 8
-; CHECK-NEXT: v_readlane_b32 s38, v0, 7
+; CHECK-NEXT: v_readlane_b32 s101, v0, 38
+; CHECK-NEXT: v_readlane_b32 s100, v0, 37
+; CHECK-NEXT: v_readlane_b32 s99, v0, 36
+; CHECK-NEXT: v_readlane_b32 s98, v0, 35
+; CHECK-NEXT: v_readlane_b32 s97, v0, 34
+; CHECK-NEXT: v_readlane_b32 s96, v0, 33
+; CHECK-NEXT: v_readlane_b32 s95, v0, 32
+; CHECK-NEXT: v_readlane_b32 s94, v0, 31
+; CHECK-NEXT: v_readlane_b32 s85, v0, 30
+; CHECK-NEXT: v_readlane_b32 s84, v0, 29
+; CHECK-NEXT: v_readlane_b32 s83, v0, 28
+; CHECK-NEXT: v_readlane_b32 s82, v0, 27
+; CHECK-NEXT: v_readlane_b32 s81, v0, 26
+; CHECK-NEXT: v_readlane_b32 s80, v0, 25
+; CHECK-NEXT: v_readlane_b32 s79, v0, 24
+; CHECK-NEXT: v_readlane_b32 s78, v0, 23
+; CHECK-NEXT: v_readlane_b32 s69, v0, 22
+; CHECK-NEXT: v_readlane_b32 s68, v0, 21
+; CHECK-NEXT: v_readlane_b32 s67, v0, 20
+; CHECK-NEXT: v_readlane_b32 s66, v0, 19
+; CHECK-NEXT: v_readlane_b32 s65, v0, 18
+; CHECK-NEXT: v_readlane_b32 s64, v0, 17
+; CHECK-NEXT: v_readlane_b32 s63, v0, 16
+; CHECK-NEXT: v_readlane_b32 s62, v0, 15
+; CHECK-NEXT: v_readlane_b32 s53, v0, 14
+; CHECK-NEXT: v_readlane_b32 s52, v0, 13
+; CHECK-NEXT: v_readlane_b32 s51, v0, 12
+; CHECK-NEXT: v_readlane_b32 s50, v0, 11
+; CHECK-NEXT: v_readlane_b32 s49, v0, 10
+; CHECK-NEXT: v_readlane_b32 s48, v0, 9
+; CHECK-NEXT: v_readlane_b32 s47, v0, 8
+; CHECK-NEXT: v_readlane_b32 s46, v0, 7
; CHECK-NEXT: v_readlane_b32 s37, v0, 6
; CHECK-NEXT: v_readlane_b32 s36, v0, 5
; CHECK-NEXT: v_readlane_b32 s35, v0, 4
@@ -1703,7 +1637,6 @@ define void @spill_func(ptr addrspace(1) %arg) #0 {
; CHECK-NEXT: v_readlane_b32 s30, v0, 0
; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1
; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
-; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
; CHECK-NEXT: s_mov_b64 exec, s[4:5]
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/call-args-inreg-no-sgpr-for-csrspill-xfail.ll b/llvm/test/CodeGen/AMDGPU/call-args-inreg-no-sgpr-for-csrspill-xfail.ll
index d4c50cf2c7e4a..34f4476f7fd6a 100644
--- a/llvm/test/CodeGen/AMDGPU/call-args-inreg-no-sgpr-for-csrspill-xfail.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-args-inreg-no-sgpr-for-csrspill-xfail.ll
@@ -1,6 +1,6 @@
-; RUN: not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs=0 -filetype=null %s 2>&1 | FileCheck -enable-var-scope %s
+; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs=0 -filetype=null %s 2>&1 | FileCheck -enable-var-scope %s
-; CHECK: LLVM ERROR: failed to find free scratch register
+; CHECK: illegal VGPR to SGPR copy
declare hidden void @external_void_func_a15i32_inreg([15 x i32] inreg) #0
declare hidden void @external_void_func_a16i32_inreg([16 x i32] inreg) #0
diff --git a/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll b/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll
index 0b8ad359ccb94..394c32c8e4bcf 100644
--- a/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll
@@ -1385,15 +1385,15 @@ define void @test_call_external_void_func_a15i32_inreg([13 x i32] inreg %arg0) #
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_mov_b32 s29, s33
; GFX9-NEXT: s_mov_b32 s33, s32
-; GFX9-NEXT: s_or_saveexec_b64 vcc, -1
+; GFX9-NEXT: s_or_saveexec_b64 s[38:39], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, vcc
+; GFX9-NEXT: s_mov_b64 exec, s[38:39]
; GFX9-NEXT: v_writelane_b32 v40, s29, 2
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
-; GFX9-NEXT: s_getpc_b64 vcc
-; GFX9-NEXT: s_add_u32 vcc_lo, vcc_lo, external_void_func_a15i32_inreg at rel32@lo+4
-; GFX9-NEXT: s_addc_u32 vcc_hi, vcc_hi, external_void_func_a15i32_inreg at rel32@hi+12
+; GFX9-NEXT: s_getpc_b64 s[38:39]
+; GFX9-NEXT: s_add_u32 s38, s38, external_void_func_a15i32_inreg at rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s39, s39, external_void_func_a15i32_inreg at rel32@hi+12
; GFX9-NEXT: s_mov_b32 s3, s19
; GFX9-NEXT: s_mov_b32 s2, s18
; GFX9-NEXT: s_mov_b32 s1, s17
@@ -1408,7 +1408,7 @@ define void @test_call_external_void_func_a15i32_inreg([13 x i32] inreg %arg0) #
; GFX9-NEXT: s_mov_b32 s23, s27
; GFX9-NEXT: s_mov_b32 s24, s28
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_swappc_b64 s[30:31], vcc
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[38:39]
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
; GFX9-NEXT: s_mov_b32 s32, s33
diff --git a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll
index 16fe85bf138b2..48f32a87203a3 100644
--- a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll
@@ -63,18 +63,18 @@ declare hidden void @external_void_func_v16i8(<16 x i8>) #0
define amdgpu_kernel void @test_call_external_void_func_i1_imm() #0 {
; VI-LABEL: test_call_external_void_func_i1_imm:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s38, -1
-; VI-NEXT: s_mov_b32 s39, 0xe80000
-; VI-NEXT: s_add_u32 s36, s36, s3
-; VI-NEXT: s_addc_u32 s37, s37, 0
+; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s50, -1
+; VI-NEXT: s_mov_b32 s51, 0xe80000
+; VI-NEXT: s_add_u32 s48, s48, s3
+; VI-NEXT: s_addc_u32 s49, s49, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_i1 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i1 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
; VI-NEXT: v_mov_b32_e32 v0, 1
; VI-NEXT: s_mov_b32 s32, 0
; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
@@ -82,18 +82,18 @@ define amdgpu_kernel void @test_call_external_void_func_i1_imm() #0 {
;
; CI-LABEL: test_call_external_void_func_i1_imm:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s38, -1
-; CI-NEXT: s_mov_b32 s39, 0xe8f000
-; CI-NEXT: s_add_u32 s36, s36, s3
-; CI-NEXT: s_addc_u32 s37, s37, 0
+; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s50, -1
+; CI-NEXT: s_mov_b32 s51, 0xe8f000
+; CI-NEXT: s_add_u32 s48, s48, s3
+; CI-NEXT: s_addc_u32 s49, s49, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_i1 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i1 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
; CI-NEXT: v_mov_b32_e32 v0, 1
; CI-NEXT: s_mov_b32 s32, 0
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
@@ -101,18 +101,18 @@ define amdgpu_kernel void @test_call_external_void_func_i1_imm() #0 {
;
; GFX9-LABEL: test_call_external_void_func_i1_imm:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s3
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i1 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i1 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: v_mov_b32_e32 v0, 1
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
@@ -155,18 +155,18 @@ define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 {
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc
; VI-NEXT: s_waitcnt vmcnt(0)
-; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s38, -1
-; VI-NEXT: s_mov_b32 s39, 0xe80000
-; VI-NEXT: s_add_u32 s36, s36, s5
-; VI-NEXT: s_addc_u32 s37, s37, 0
+; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s50, -1
+; VI-NEXT: s_mov_b32 s51, 0xe80000
+; VI-NEXT: s_add_u32 s48, s48, s5
+; VI-NEXT: s_addc_u32 s49, s49, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_i1_signext at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i1_signext at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
; VI-NEXT: s_mov_b32 s32, 0
; VI-NEXT: v_bfe_i32 v0, v0, 0, 1
; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
@@ -178,18 +178,18 @@ define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 {
; CI-NEXT: s_mov_b32 s2, -1
; CI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc
; CI-NEXT: s_waitcnt vmcnt(0)
-; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s38, -1
-; CI-NEXT: s_mov_b32 s39, 0xe8f000
-; CI-NEXT: s_add_u32 s36, s36, s5
-; CI-NEXT: s_addc_u32 s37, s37, 0
+; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s50, -1
+; CI-NEXT: s_mov_b32 s51, 0xe8f000
+; CI-NEXT: s_add_u32 s48, s48, s5
+; CI-NEXT: s_addc_u32 s49, s49, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_i1_signext at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i1_signext at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
; CI-NEXT: s_mov_b32 s32, 0
; CI-NEXT: v_bfe_i32 v0, v0, 0, 1
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
@@ -201,18 +201,18 @@ define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 {
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s5
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s5
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i1_signext at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i1_signext at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: v_bfe_i32 v0, v0, 0, 1
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
@@ -265,18 +265,18 @@ define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 {
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc
; VI-NEXT: s_waitcnt vmcnt(0)
-; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s38, -1
-; VI-NEXT: s_mov_b32 s39, 0xe80000
-; VI-NEXT: s_add_u32 s36, s36, s5
-; VI-NEXT: s_addc_u32 s37, s37, 0
+; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s50, -1
+; VI-NEXT: s_mov_b32 s51, 0xe80000
+; VI-NEXT: s_add_u32 s48, s48, s5
+; VI-NEXT: s_addc_u32 s49, s49, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_i1_zeroext at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i1_zeroext at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
; VI-NEXT: s_mov_b32 s32, 0
; VI-NEXT: v_and_b32_e32 v0, 1, v0
; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
@@ -288,18 +288,18 @@ define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 {
; CI-NEXT: s_mov_b32 s2, -1
; CI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc
; CI-NEXT: s_waitcnt vmcnt(0)
-; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s38, -1
-; CI-NEXT: s_mov_b32 s39, 0xe8f000
-; CI-NEXT: s_add_u32 s36, s36, s5
-; CI-NEXT: s_addc_u32 s37, s37, 0
+; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s50, -1
+; CI-NEXT: s_mov_b32 s51, 0xe8f000
+; CI-NEXT: s_add_u32 s48, s48, s5
+; CI-NEXT: s_addc_u32 s49, s49, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_i1_zeroext at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i1_zeroext at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
; CI-NEXT: s_mov_b32 s32, 0
; CI-NEXT: v_and_b32_e32 v0, 1, v0
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
@@ -311,18 +311,18 @@ define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 {
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s5
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s5
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i1_zeroext at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i1_zeroext at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
@@ -370,18 +370,18 @@ define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 {
define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) #0 {
; VI-LABEL: test_call_external_void_func_i8_imm:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s38, -1
-; VI-NEXT: s_mov_b32 s39, 0xe80000
-; VI-NEXT: s_add_u32 s36, s36, s5
-; VI-NEXT: s_addc_u32 s37, s37, 0
+; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s50, -1
+; VI-NEXT: s_mov_b32 s51, 0xe80000
+; VI-NEXT: s_add_u32 s48, s48, s5
+; VI-NEXT: s_addc_u32 s49, s49, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_i8 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i8 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
; VI-NEXT: v_mov_b32_e32 v0, 0x7b
; VI-NEXT: s_mov_b32 s32, 0
; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
@@ -389,18 +389,18 @@ define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) #0 {
;
; CI-LABEL: test_call_external_void_func_i8_imm:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s38, -1
-; CI-NEXT: s_mov_b32 s39, 0xe8f000
-; CI-NEXT: s_add_u32 s36, s36, s5
-; CI-NEXT: s_addc_u32 s37, s37, 0
+; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s50, -1
+; CI-NEXT: s_mov_b32 s51, 0xe8f000
+; CI-NEXT: s_add_u32 s48, s48, s5
+; CI-NEXT: s_addc_u32 s49, s49, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_i8 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i8 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
; CI-NEXT: v_mov_b32_e32 v0, 0x7b
; CI-NEXT: s_mov_b32 s32, 0
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
@@ -408,18 +408,18 @@ define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) #0 {
;
; GFX9-LABEL: test_call_external_void_func_i8_imm:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s5
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s5
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i8 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i8 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
@@ -463,18 +463,18 @@ define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 {
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: buffer_load_sbyte v0, off, s[0:3], 0 glc
; VI-NEXT: s_waitcnt vmcnt(0)
-; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s38, -1
-; VI-NEXT: s_mov_b32 s39, 0xe80000
-; VI-NEXT: s_add_u32 s36, s36, s5
-; VI-NEXT: s_addc_u32 s37, s37, 0
+; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s50, -1
+; VI-NEXT: s_mov_b32 s51, 0xe80000
+; VI-NEXT: s_add_u32 s48, s48, s5
+; VI-NEXT: s_addc_u32 s49, s49, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_i8_signext at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i8_signext at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
; VI-NEXT: s_mov_b32 s32, 0
; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; VI-NEXT: s_endpgm
@@ -485,18 +485,18 @@ define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 {
; CI-NEXT: s_mov_b32 s2, -1
; CI-NEXT: buffer_load_sbyte v0, off, s[0:3], 0 glc
; CI-NEXT: s_waitcnt vmcnt(0)
-; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s38, -1
-; CI-NEXT: s_mov_b32 s39, 0xe8f000
-; CI-NEXT: s_add_u32 s36, s36, s5
-; CI-NEXT: s_addc_u32 s37, s37, 0
+; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s50, -1
+; CI-NEXT: s_mov_b32 s51, 0xe8f000
+; CI-NEXT: s_add_u32 s48, s48, s5
+; CI-NEXT: s_addc_u32 s49, s49, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_i8_signext at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i8_signext at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
; CI-NEXT: s_mov_b32 s32, 0
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
@@ -507,18 +507,18 @@ define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 {
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: buffer_load_sbyte v0, off, s[0:3], 0 glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s5
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s5
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i8_signext at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i8_signext at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX9-NEXT: s_endpgm
@@ -567,18 +567,18 @@ define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 {
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc
; VI-NEXT: s_waitcnt vmcnt(0)
-; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s38, -1
-; VI-NEXT: s_mov_b32 s39, 0xe80000
-; VI-NEXT: s_add_u32 s36, s36, s5
-; VI-NEXT: s_addc_u32 s37, s37, 0
+; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s50, -1
+; VI-NEXT: s_mov_b32 s51, 0xe80000
+; VI-NEXT: s_add_u32 s48, s48, s5
+; VI-NEXT: s_addc_u32 s49, s49, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_i8_zeroext at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i8_zeroext at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
; VI-NEXT: s_mov_b32 s32, 0
; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; VI-NEXT: s_endpgm
@@ -589,18 +589,18 @@ define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 {
; CI-NEXT: s_mov_b32 s2, -1
; CI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc
; CI-NEXT: s_waitcnt vmcnt(0)
-; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s38, -1
-; CI-NEXT: s_mov_b32 s39, 0xe8f000
-; CI-NEXT: s_add_u32 s36, s36, s5
-; CI-NEXT: s_addc_u32 s37, s37, 0
+; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s50, -1
+; CI-NEXT: s_mov_b32 s51, 0xe8f000
+; CI-NEXT: s_add_u32 s48, s48, s5
+; CI-NEXT: s_addc_u32 s49, s49, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_i8_zeroext at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i8_zeroext at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
; CI-NEXT: s_mov_b32 s32, 0
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
@@ -611,18 +611,18 @@ define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 {
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s5
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s5
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i8_zeroext at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i8_zeroext at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX9-NEXT: s_endpgm
@@ -667,18 +667,18 @@ define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 {
define amdgpu_kernel void @test_call_external_void_func_i16_imm() #0 {
; VI-LABEL: test_call_external_void_func_i16_imm:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s38, -1
-; VI-NEXT: s_mov_b32 s39, 0xe80000
-; VI-NEXT: s_add_u32 s36, s36, s3
-; VI-NEXT: s_addc_u32 s37, s37, 0
+; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s50, -1
+; VI-NEXT: s_mov_b32 s51, 0xe80000
+; VI-NEXT: s_add_u32 s48, s48, s3
+; VI-NEXT: s_addc_u32 s49, s49, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_i16 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i16 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
; VI-NEXT: v_mov_b32_e32 v0, 0x7b
; VI-NEXT: s_mov_b32 s32, 0
; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
@@ -686,18 +686,18 @@ define amdgpu_kernel void @test_call_external_void_func_i16_imm() #0 {
;
; CI-LABEL: test_call_external_void_func_i16_imm:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s38, -1
-; CI-NEXT: s_mov_b32 s39, 0xe8f000
-; CI-NEXT: s_add_u32 s36, s36, s3
-; CI-NEXT: s_addc_u32 s37, s37, 0
+; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s50, -1
+; CI-NEXT: s_mov_b32 s51, 0xe8f000
+; CI-NEXT: s_add_u32 s48, s48, s3
+; CI-NEXT: s_addc_u32 s49, s49, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_i16 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i16 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
; CI-NEXT: v_mov_b32_e32 v0, 0x7b
; CI-NEXT: s_mov_b32 s32, 0
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
@@ -705,18 +705,18 @@ define amdgpu_kernel void @test_call_external_void_func_i16_imm() #0 {
;
; GFX9-LABEL: test_call_external_void_func_i16_imm:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s3
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i16 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i16 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
@@ -759,18 +759,18 @@ define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 {
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: buffer_load_sshort v0, off, s[0:3], 0 glc
; VI-NEXT: s_waitcnt vmcnt(0)
-; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s38, -1
-; VI-NEXT: s_mov_b32 s39, 0xe80000
-; VI-NEXT: s_add_u32 s36, s36, s5
-; VI-NEXT: s_addc_u32 s37, s37, 0
+; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s50, -1
+; VI-NEXT: s_mov_b32 s51, 0xe80000
+; VI-NEXT: s_add_u32 s48, s48, s5
+; VI-NEXT: s_addc_u32 s49, s49, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_i16_signext at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i16_signext at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
; VI-NEXT: s_mov_b32 s32, 0
; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; VI-NEXT: s_endpgm
@@ -781,18 +781,18 @@ define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 {
; CI-NEXT: s_mov_b32 s2, -1
; CI-NEXT: buffer_load_sshort v0, off, s[0:3], 0 glc
; CI-NEXT: s_waitcnt vmcnt(0)
-; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s38, -1
-; CI-NEXT: s_mov_b32 s39, 0xe8f000
-; CI-NEXT: s_add_u32 s36, s36, s5
-; CI-NEXT: s_addc_u32 s37, s37, 0
+; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s50, -1
+; CI-NEXT: s_mov_b32 s51, 0xe8f000
+; CI-NEXT: s_add_u32 s48, s48, s5
+; CI-NEXT: s_addc_u32 s49, s49, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_i16_signext at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i16_signext at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
; CI-NEXT: s_mov_b32 s32, 0
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
@@ -803,18 +803,18 @@ define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 {
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: buffer_load_sshort v0, off, s[0:3], 0 glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s5
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s5
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i16_signext at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i16_signext at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX9-NEXT: s_endpgm
@@ -863,18 +863,18 @@ define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 {
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: buffer_load_ushort v0, off, s[0:3], 0 glc
; VI-NEXT: s_waitcnt vmcnt(0)
-; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s38, -1
-; VI-NEXT: s_mov_b32 s39, 0xe80000
-; VI-NEXT: s_add_u32 s36, s36, s5
-; VI-NEXT: s_addc_u32 s37, s37, 0
+; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s50, -1
+; VI-NEXT: s_mov_b32 s51, 0xe80000
+; VI-NEXT: s_add_u32 s48, s48, s5
+; VI-NEXT: s_addc_u32 s49, s49, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_i16_zeroext at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i16_zeroext at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
; VI-NEXT: s_mov_b32 s32, 0
; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; VI-NEXT: s_endpgm
@@ -885,18 +885,18 @@ define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 {
; CI-NEXT: s_mov_b32 s2, -1
; CI-NEXT: buffer_load_ushort v0, off, s[0:3], 0 glc
; CI-NEXT: s_waitcnt vmcnt(0)
-; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s38, -1
-; CI-NEXT: s_mov_b32 s39, 0xe8f000
-; CI-NEXT: s_add_u32 s36, s36, s5
-; CI-NEXT: s_addc_u32 s37, s37, 0
+; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s50, -1
+; CI-NEXT: s_mov_b32 s51, 0xe8f000
+; CI-NEXT: s_add_u32 s48, s48, s5
+; CI-NEXT: s_addc_u32 s49, s49, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_i16_zeroext at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i16_zeroext at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
; CI-NEXT: s_mov_b32 s32, 0
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
@@ -907,18 +907,18 @@ define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 {
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: buffer_load_ushort v0, off, s[0:3], 0 glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s5
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s5
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i16_zeroext at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i16_zeroext at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX9-NEXT: s_endpgm
@@ -963,18 +963,18 @@ define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 {
define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 {
; VI-LABEL: test_call_external_void_func_i32_imm:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s38, -1
-; VI-NEXT: s_mov_b32 s39, 0xe80000
-; VI-NEXT: s_add_u32 s36, s36, s5
-; VI-NEXT: s_addc_u32 s37, s37, 0
+; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s50, -1
+; VI-NEXT: s_mov_b32 s51, 0xe80000
+; VI-NEXT: s_add_u32 s48, s48, s5
+; VI-NEXT: s_addc_u32 s49, s49, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_i32 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i32 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
; VI-NEXT: v_mov_b32_e32 v0, 42
; VI-NEXT: s_mov_b32 s32, 0
; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
@@ -982,18 +982,18 @@ define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 {
;
; CI-LABEL: test_call_external_void_func_i32_imm:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s38, -1
-; CI-NEXT: s_mov_b32 s39, 0xe8f000
-; CI-NEXT: s_add_u32 s36, s36, s5
-; CI-NEXT: s_addc_u32 s37, s37, 0
+; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s50, -1
+; CI-NEXT: s_mov_b32 s51, 0xe8f000
+; CI-NEXT: s_add_u32 s48, s48, s5
+; CI-NEXT: s_addc_u32 s49, s49, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_i32 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i32 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
; CI-NEXT: v_mov_b32_e32 v0, 42
; CI-NEXT: s_mov_b32 s32, 0
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
@@ -1001,18 +1001,18 @@ define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 {
;
; GFX9-LABEL: test_call_external_void_func_i32_imm:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s5
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s5
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i32 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i32 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: v_mov_b32_e32 v0, 42
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
@@ -1051,18 +1051,18 @@ define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 {
define amdgpu_kernel void @test_call_external_void_func_i64_imm() #0 {
; VI-LABEL: test_call_external_void_func_i64_imm:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s38, -1
-; VI-NEXT: s_mov_b32 s39, 0xe80000
-; VI-NEXT: s_add_u32 s36, s36, s3
-; VI-NEXT: s_addc_u32 s37, s37, 0
+; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s50, -1
+; VI-NEXT: s_mov_b32 s51, 0xe80000
+; VI-NEXT: s_add_u32 s48, s48, s3
+; VI-NEXT: s_addc_u32 s49, s49, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_i64 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i64 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
; VI-NEXT: v_mov_b32_e32 v0, 0x7b
; VI-NEXT: v_mov_b32_e32 v1, 0
; VI-NEXT: s_mov_b32 s32, 0
@@ -1071,18 +1071,18 @@ define amdgpu_kernel void @test_call_external_void_func_i64_imm() #0 {
;
; CI-LABEL: test_call_external_void_func_i64_imm:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s38, -1
-; CI-NEXT: s_mov_b32 s39, 0xe8f000
-; CI-NEXT: s_add_u32 s36, s36, s3
-; CI-NEXT: s_addc_u32 s37, s37, 0
+; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s50, -1
+; CI-NEXT: s_mov_b32 s51, 0xe8f000
+; CI-NEXT: s_add_u32 s48, s48, s3
+; CI-NEXT: s_addc_u32 s49, s49, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_i64 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i64 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
; CI-NEXT: v_mov_b32_e32 v0, 0x7b
; CI-NEXT: v_mov_b32_e32 v1, 0
; CI-NEXT: s_mov_b32 s32, 0
@@ -1091,18 +1091,18 @@ define amdgpu_kernel void @test_call_external_void_func_i64_imm() #0 {
;
; GFX9-LABEL: test_call_external_void_func_i64_imm:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s3
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i64 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i64 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: s_mov_b32 s32, 0
@@ -1143,69 +1143,69 @@ define amdgpu_kernel void @test_call_external_void_func_i64_imm() #0 {
define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 {
; VI-LABEL: test_call_external_void_func_v2i64:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s38, -1
-; VI-NEXT: s_mov_b32 s39, 0xe80000
+; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s50, -1
+; VI-NEXT: s_mov_b32 s51, 0xe80000
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
; VI-NEXT: s_mov_b32 s0, 0
-; VI-NEXT: s_add_u32 s36, s36, s3
+; VI-NEXT: s_add_u32 s48, s48, s3
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: s_mov_b32 s1, s0
; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
-; VI-NEXT: s_addc_u32 s37, s37, 0
-; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; VI-NEXT: s_addc_u32 s49, s49, 0
+; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2i64 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
; VI-NEXT: s_mov_b32 s32, 0
; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; VI-NEXT: s_endpgm
;
; CI-LABEL: test_call_external_void_func_v2i64:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s38, -1
-; CI-NEXT: s_mov_b32 s39, 0xe8f000
+; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s50, -1
+; CI-NEXT: s_mov_b32 s51, 0xe8f000
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
; CI-NEXT: s_mov_b32 s0, 0
-; CI-NEXT: s_add_u32 s36, s36, s3
+; CI-NEXT: s_add_u32 s48, s48, s3
; CI-NEXT: s_mov_b32 s3, 0xf000
; CI-NEXT: s_mov_b32 s2, -1
; CI-NEXT: s_mov_b32 s1, s0
; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
-; CI-NEXT: s_addc_u32 s37, s37, 0
-; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; CI-NEXT: s_addc_u32 s49, s49, 0
+; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2i64 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
; CI-NEXT: s_mov_b32 s32, 0
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
; GFX9-LABEL: test_call_external_void_func_v2i64:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
; GFX9-NEXT: s_mov_b32 s0, 0
-; GFX9-NEXT: s_add_u32 s36, s36, s3
+; GFX9-NEXT: s_add_u32 s48, s48, s3
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: s_mov_b32 s1, s0
; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i64 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX9-NEXT: s_endpgm
@@ -1252,18 +1252,18 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 {
define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() #0 {
; VI-LABEL: test_call_external_void_func_v2i64_imm:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s38, -1
-; VI-NEXT: s_mov_b32 s39, 0xe80000
-; VI-NEXT: s_add_u32 s36, s36, s3
-; VI-NEXT: s_addc_u32 s37, s37, 0
+; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s50, -1
+; VI-NEXT: s_mov_b32 s51, 0xe80000
+; VI-NEXT: s_add_u32 s48, s48, s3
+; VI-NEXT: s_addc_u32 s49, s49, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2i64 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
; VI-NEXT: v_mov_b32_e32 v0, 1
; VI-NEXT: v_mov_b32_e32 v1, 2
; VI-NEXT: v_mov_b32_e32 v2, 3
@@ -1274,18 +1274,18 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() #0 {
;
; CI-LABEL: test_call_external_void_func_v2i64_imm:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s38, -1
-; CI-NEXT: s_mov_b32 s39, 0xe8f000
-; CI-NEXT: s_add_u32 s36, s36, s3
-; CI-NEXT: s_addc_u32 s37, s37, 0
+; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s50, -1
+; CI-NEXT: s_mov_b32 s51, 0xe8f000
+; CI-NEXT: s_add_u32 s48, s48, s3
+; CI-NEXT: s_addc_u32 s49, s49, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2i64 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
; CI-NEXT: v_mov_b32_e32 v0, 1
; CI-NEXT: v_mov_b32_e32 v1, 2
; CI-NEXT: v_mov_b32_e32 v2, 3
@@ -1296,18 +1296,18 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() #0 {
;
; GFX9-LABEL: test_call_external_void_func_v2i64_imm:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s3
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i64 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: v_mov_b32_e32 v0, 1
; GFX9-NEXT: v_mov_b32_e32 v1, 2
; GFX9-NEXT: v_mov_b32_e32 v2, 3
@@ -1353,23 +1353,23 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() #0 {
define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 {
; VI-LABEL: test_call_external_void_func_v3i64:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s38, -1
-; VI-NEXT: s_mov_b32 s39, 0xe80000
+; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s50, -1
+; VI-NEXT: s_mov_b32 s51, 0xe80000
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
; VI-NEXT: s_mov_b32 s0, 0
-; VI-NEXT: s_add_u32 s36, s36, s3
+; VI-NEXT: s_add_u32 s48, s48, s3
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: s_mov_b32 s1, s0
; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
-; VI-NEXT: s_addc_u32 s37, s37, 0
-; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; VI-NEXT: s_addc_u32 s49, s49, 0
+; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3i64 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i64 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
; VI-NEXT: v_mov_b32_e32 v4, 1
; VI-NEXT: v_mov_b32_e32 v5, 2
; VI-NEXT: s_mov_b32 s32, 0
@@ -1378,23 +1378,23 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 {
;
; CI-LABEL: test_call_external_void_func_v3i64:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s38, -1
-; CI-NEXT: s_mov_b32 s39, 0xe8f000
+; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s50, -1
+; CI-NEXT: s_mov_b32 s51, 0xe8f000
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
; CI-NEXT: s_mov_b32 s0, 0
-; CI-NEXT: s_add_u32 s36, s36, s3
+; CI-NEXT: s_add_u32 s48, s48, s3
; CI-NEXT: s_mov_b32 s3, 0xf000
; CI-NEXT: s_mov_b32 s2, -1
; CI-NEXT: s_mov_b32 s1, s0
; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
-; CI-NEXT: s_addc_u32 s37, s37, 0
-; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; CI-NEXT: s_addc_u32 s49, s49, 0
+; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3i64 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i64 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
; CI-NEXT: v_mov_b32_e32 v4, 1
; CI-NEXT: v_mov_b32_e32 v5, 2
; CI-NEXT: s_mov_b32 s32, 0
@@ -1403,23 +1403,23 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 {
;
; GFX9-LABEL: test_call_external_void_func_v3i64:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
; GFX9-NEXT: s_mov_b32 s0, 0
-; GFX9-NEXT: s_add_u32 s36, s36, s3
+; GFX9-NEXT: s_add_u32 s48, s48, s3
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: s_mov_b32 s1, s0
; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i64 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i64 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: v_mov_b32_e32 v4, 1
; GFX9-NEXT: v_mov_b32_e32 v5, 2
; GFX9-NEXT: s_mov_b32 s32, 0
@@ -1473,23 +1473,23 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 {
define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 {
; VI-LABEL: test_call_external_void_func_v4i64:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s38, -1
-; VI-NEXT: s_mov_b32 s39, 0xe80000
+; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s50, -1
+; VI-NEXT: s_mov_b32 s51, 0xe80000
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
; VI-NEXT: s_mov_b32 s0, 0
-; VI-NEXT: s_add_u32 s36, s36, s3
+; VI-NEXT: s_add_u32 s48, s48, s3
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: s_mov_b32 s1, s0
; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
-; VI-NEXT: s_addc_u32 s37, s37, 0
-; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; VI-NEXT: s_addc_u32 s49, s49, 0
+; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v4i64 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i64 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
; VI-NEXT: v_mov_b32_e32 v4, 1
; VI-NEXT: v_mov_b32_e32 v5, 2
; VI-NEXT: v_mov_b32_e32 v6, 3
@@ -1500,23 +1500,23 @@ define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 {
;
; CI-LABEL: test_call_external_void_func_v4i64:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s38, -1
-; CI-NEXT: s_mov_b32 s39, 0xe8f000
+; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s50, -1
+; CI-NEXT: s_mov_b32 s51, 0xe8f000
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
; CI-NEXT: s_mov_b32 s0, 0
-; CI-NEXT: s_add_u32 s36, s36, s3
+; CI-NEXT: s_add_u32 s48, s48, s3
; CI-NEXT: s_mov_b32 s3, 0xf000
; CI-NEXT: s_mov_b32 s2, -1
; CI-NEXT: s_mov_b32 s1, s0
; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
-; CI-NEXT: s_addc_u32 s37, s37, 0
-; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; CI-NEXT: s_addc_u32 s49, s49, 0
+; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v4i64 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i64 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
; CI-NEXT: v_mov_b32_e32 v4, 1
; CI-NEXT: v_mov_b32_e32 v5, 2
; CI-NEXT: v_mov_b32_e32 v6, 3
@@ -1527,23 +1527,23 @@ define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 {
;
; GFX9-LABEL: test_call_external_void_func_v4i64:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
; GFX9-NEXT: s_mov_b32 s0, 0
-; GFX9-NEXT: s_add_u32 s36, s36, s3
+; GFX9-NEXT: s_add_u32 s48, s48, s3
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: s_mov_b32 s1, s0
; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i64 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i64 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: v_mov_b32_e32 v4, 1
; GFX9-NEXT: v_mov_b32_e32 v5, 2
; GFX9-NEXT: v_mov_b32_e32 v6, 3
@@ -1601,18 +1601,18 @@ define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 {
define amdgpu_kernel void @test_call_external_void_func_f16_imm() #0 {
; VI-LABEL: test_call_external_void_func_f16_imm:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s38, -1
-; VI-NEXT: s_mov_b32 s39, 0xe80000
-; VI-NEXT: s_add_u32 s36, s36, s3
-; VI-NEXT: s_addc_u32 s37, s37, 0
+; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s50, -1
+; VI-NEXT: s_mov_b32 s51, 0xe80000
+; VI-NEXT: s_add_u32 s48, s48, s3
+; VI-NEXT: s_addc_u32 s49, s49, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_f16 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_f16 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
; VI-NEXT: v_mov_b32_e32 v0, 0x4400
; VI-NEXT: s_mov_b32 s32, 0
; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
@@ -1620,18 +1620,18 @@ define amdgpu_kernel void @test_call_external_void_func_f16_imm() #0 {
;
; CI-LABEL: test_call_external_void_func_f16_imm:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s38, -1
-; CI-NEXT: s_mov_b32 s39, 0xe8f000
-; CI-NEXT: s_add_u32 s36, s36, s3
-; CI-NEXT: s_addc_u32 s37, s37, 0
+; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s50, -1
+; CI-NEXT: s_mov_b32 s51, 0xe8f000
+; CI-NEXT: s_add_u32 s48, s48, s3
+; CI-NEXT: s_addc_u32 s49, s49, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_f16 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_f16 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
; CI-NEXT: v_mov_b32_e32 v0, 4.0
; CI-NEXT: s_mov_b32 s32, 0
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
@@ -1639,18 +1639,18 @@ define amdgpu_kernel void @test_call_external_void_func_f16_imm() #0 {
;
; GFX9-LABEL: test_call_external_void_func_f16_imm:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s3
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_f16 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_f16 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: v_mov_b32_e32 v0, 0x4400
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
@@ -1689,18 +1689,18 @@ define amdgpu_kernel void @test_call_external_void_func_f16_imm() #0 {
define amdgpu_kernel void @test_call_external_void_func_f32_imm() #0 {
; VI-LABEL: test_call_external_void_func_f32_imm:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s38, -1
-; VI-NEXT: s_mov_b32 s39, 0xe80000
-; VI-NEXT: s_add_u32 s36, s36, s3
-; VI-NEXT: s_addc_u32 s37, s37, 0
+; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s50, -1
+; VI-NEXT: s_mov_b32 s51, 0xe80000
+; VI-NEXT: s_add_u32 s48, s48, s3
+; VI-NEXT: s_addc_u32 s49, s49, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_f32 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_f32 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
; VI-NEXT: v_mov_b32_e32 v0, 4.0
; VI-NEXT: s_mov_b32 s32, 0
; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
@@ -1708,18 +1708,18 @@ define amdgpu_kernel void @test_call_external_void_func_f32_imm() #0 {
;
; CI-LABEL: test_call_external_void_func_f32_imm:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s38, -1
-; CI-NEXT: s_mov_b32 s39, 0xe8f000
-; CI-NEXT: s_add_u32 s36, s36, s3
-; CI-NEXT: s_addc_u32 s37, s37, 0
+; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s50, -1
+; CI-NEXT: s_mov_b32 s51, 0xe8f000
+; CI-NEXT: s_add_u32 s48, s48, s3
+; CI-NEXT: s_addc_u32 s49, s49, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_f32 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_f32 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
; CI-NEXT: v_mov_b32_e32 v0, 4.0
; CI-NEXT: s_mov_b32 s32, 0
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
@@ -1727,18 +1727,18 @@ define amdgpu_kernel void @test_call_external_void_func_f32_imm() #0 {
;
; GFX9-LABEL: test_call_external_void_func_f32_imm:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s3
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_f32 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_f32 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: v_mov_b32_e32 v0, 4.0
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
@@ -1777,18 +1777,18 @@ define amdgpu_kernel void @test_call_external_void_func_f32_imm() #0 {
define amdgpu_kernel void @test_call_external_void_func_v2f32_imm() #0 {
; VI-LABEL: test_call_external_void_func_v2f32_imm:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s38, -1
-; VI-NEXT: s_mov_b32 s39, 0xe80000
-; VI-NEXT: s_add_u32 s36, s36, s3
-; VI-NEXT: s_addc_u32 s37, s37, 0
+; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s50, -1
+; VI-NEXT: s_mov_b32 s51, 0xe80000
+; VI-NEXT: s_add_u32 s48, s48, s3
+; VI-NEXT: s_addc_u32 s49, s49, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2f32 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2f32 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
; VI-NEXT: v_mov_b32_e32 v0, 1.0
; VI-NEXT: v_mov_b32_e32 v1, 2.0
; VI-NEXT: s_mov_b32 s32, 0
@@ -1797,18 +1797,18 @@ define amdgpu_kernel void @test_call_external_void_func_v2f32_imm() #0 {
;
; CI-LABEL: test_call_external_void_func_v2f32_imm:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s38, -1
-; CI-NEXT: s_mov_b32 s39, 0xe8f000
-; CI-NEXT: s_add_u32 s36, s36, s3
-; CI-NEXT: s_addc_u32 s37, s37, 0
+; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s50, -1
+; CI-NEXT: s_mov_b32 s51, 0xe8f000
+; CI-NEXT: s_add_u32 s48, s48, s3
+; CI-NEXT: s_addc_u32 s49, s49, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2f32 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2f32 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
; CI-NEXT: v_mov_b32_e32 v0, 1.0
; CI-NEXT: v_mov_b32_e32 v1, 2.0
; CI-NEXT: s_mov_b32 s32, 0
@@ -1817,18 +1817,18 @@ define amdgpu_kernel void @test_call_external_void_func_v2f32_imm() #0 {
;
; GFX9-LABEL: test_call_external_void_func_v2f32_imm:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s3
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2f32 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2f32 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: v_mov_b32_e32 v0, 1.0
; GFX9-NEXT: v_mov_b32_e32 v1, 2.0
; GFX9-NEXT: s_mov_b32 s32, 0
@@ -1869,18 +1869,18 @@ define amdgpu_kernel void @test_call_external_void_func_v2f32_imm() #0 {
define amdgpu_kernel void @test_call_external_void_func_v3f32_imm() #0 {
; VI-LABEL: test_call_external_void_func_v3f32_imm:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s38, -1
-; VI-NEXT: s_mov_b32 s39, 0xe80000
-; VI-NEXT: s_add_u32 s36, s36, s3
-; VI-NEXT: s_addc_u32 s37, s37, 0
+; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s50, -1
+; VI-NEXT: s_mov_b32 s51, 0xe80000
+; VI-NEXT: s_add_u32 s48, s48, s3
+; VI-NEXT: s_addc_u32 s49, s49, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3f32 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f32 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
; VI-NEXT: v_mov_b32_e32 v0, 1.0
; VI-NEXT: v_mov_b32_e32 v1, 2.0
; VI-NEXT: v_mov_b32_e32 v2, 4.0
@@ -1890,18 +1890,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3f32_imm() #0 {
;
; CI-LABEL: test_call_external_void_func_v3f32_imm:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s38, -1
-; CI-NEXT: s_mov_b32 s39, 0xe8f000
-; CI-NEXT: s_add_u32 s36, s36, s3
-; CI-NEXT: s_addc_u32 s37, s37, 0
+; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s50, -1
+; CI-NEXT: s_mov_b32 s51, 0xe8f000
+; CI-NEXT: s_add_u32 s48, s48, s3
+; CI-NEXT: s_addc_u32 s49, s49, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3f32 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f32 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
; CI-NEXT: v_mov_b32_e32 v0, 1.0
; CI-NEXT: v_mov_b32_e32 v1, 2.0
; CI-NEXT: v_mov_b32_e32 v2, 4.0
@@ -1911,18 +1911,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3f32_imm() #0 {
;
; GFX9-LABEL: test_call_external_void_func_v3f32_imm:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s3
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3f32 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3f32 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: v_mov_b32_e32 v0, 1.0
; GFX9-NEXT: v_mov_b32_e32 v1, 2.0
; GFX9-NEXT: v_mov_b32_e32 v2, 4.0
@@ -1966,18 +1966,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3f32_imm() #0 {
define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 {
; VI-LABEL: test_call_external_void_func_v5f32_imm:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s38, -1
-; VI-NEXT: s_mov_b32 s39, 0xe80000
-; VI-NEXT: s_add_u32 s36, s36, s3
-; VI-NEXT: s_addc_u32 s37, s37, 0
+; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s50, -1
+; VI-NEXT: s_mov_b32 s51, 0xe80000
+; VI-NEXT: s_add_u32 s48, s48, s3
+; VI-NEXT: s_addc_u32 s49, s49, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v5f32 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v5f32 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
; VI-NEXT: v_mov_b32_e32 v0, 1.0
; VI-NEXT: v_mov_b32_e32 v1, 2.0
; VI-NEXT: v_mov_b32_e32 v2, 4.0
@@ -1989,18 +1989,18 @@ define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 {
;
; CI-LABEL: test_call_external_void_func_v5f32_imm:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s38, -1
-; CI-NEXT: s_mov_b32 s39, 0xe8f000
-; CI-NEXT: s_add_u32 s36, s36, s3
-; CI-NEXT: s_addc_u32 s37, s37, 0
+; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s50, -1
+; CI-NEXT: s_mov_b32 s51, 0xe8f000
+; CI-NEXT: s_add_u32 s48, s48, s3
+; CI-NEXT: s_addc_u32 s49, s49, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v5f32 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v5f32 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
; CI-NEXT: v_mov_b32_e32 v0, 1.0
; CI-NEXT: v_mov_b32_e32 v1, 2.0
; CI-NEXT: v_mov_b32_e32 v2, 4.0
@@ -2012,18 +2012,18 @@ define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 {
;
; GFX9-LABEL: test_call_external_void_func_v5f32_imm:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s3
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v5f32 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v5f32 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: v_mov_b32_e32 v0, 1.0
; GFX9-NEXT: v_mov_b32_e32 v1, 2.0
; GFX9-NEXT: v_mov_b32_e32 v2, 4.0
@@ -2072,18 +2072,18 @@ define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 {
define amdgpu_kernel void @test_call_external_void_func_f64_imm() #0 {
; VI-LABEL: test_call_external_void_func_f64_imm:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s38, -1
-; VI-NEXT: s_mov_b32 s39, 0xe80000
-; VI-NEXT: s_add_u32 s36, s36, s3
-; VI-NEXT: s_addc_u32 s37, s37, 0
+; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s50, -1
+; VI-NEXT: s_mov_b32 s51, 0xe80000
+; VI-NEXT: s_add_u32 s48, s48, s3
+; VI-NEXT: s_addc_u32 s49, s49, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_f64 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_f64 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
; VI-NEXT: v_mov_b32_e32 v0, 0
; VI-NEXT: v_mov_b32_e32 v1, 0x40100000
; VI-NEXT: s_mov_b32 s32, 0
@@ -2092,18 +2092,18 @@ define amdgpu_kernel void @test_call_external_void_func_f64_imm() #0 {
;
; CI-LABEL: test_call_external_void_func_f64_imm:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s38, -1
-; CI-NEXT: s_mov_b32 s39, 0xe8f000
-; CI-NEXT: s_add_u32 s36, s36, s3
-; CI-NEXT: s_addc_u32 s37, s37, 0
+; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s50, -1
+; CI-NEXT: s_mov_b32 s51, 0xe8f000
+; CI-NEXT: s_add_u32 s48, s48, s3
+; CI-NEXT: s_addc_u32 s49, s49, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_f64 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_f64 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
; CI-NEXT: v_mov_b32_e32 v0, 0
; CI-NEXT: v_mov_b32_e32 v1, 0x40100000
; CI-NEXT: s_mov_b32 s32, 0
@@ -2112,18 +2112,18 @@ define amdgpu_kernel void @test_call_external_void_func_f64_imm() #0 {
;
; GFX9-LABEL: test_call_external_void_func_f64_imm:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s3
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_f64 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_f64 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: v_mov_b32_e32 v1, 0x40100000
; GFX9-NEXT: s_mov_b32 s32, 0
@@ -2164,18 +2164,18 @@ define amdgpu_kernel void @test_call_external_void_func_f64_imm() #0 {
define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() #0 {
; VI-LABEL: test_call_external_void_func_v2f64_imm:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s38, -1
-; VI-NEXT: s_mov_b32 s39, 0xe80000
-; VI-NEXT: s_add_u32 s36, s36, s3
-; VI-NEXT: s_addc_u32 s37, s37, 0
+; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s50, -1
+; VI-NEXT: s_mov_b32 s51, 0xe80000
+; VI-NEXT: s_add_u32 s48, s48, s3
+; VI-NEXT: s_addc_u32 s49, s49, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2f64 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2f64 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
; VI-NEXT: v_mov_b32_e32 v0, 0
; VI-NEXT: v_mov_b32_e32 v1, 2.0
; VI-NEXT: v_mov_b32_e32 v2, 0
@@ -2186,18 +2186,18 @@ define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() #0 {
;
; CI-LABEL: test_call_external_void_func_v2f64_imm:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s38, -1
-; CI-NEXT: s_mov_b32 s39, 0xe8f000
-; CI-NEXT: s_add_u32 s36, s36, s3
-; CI-NEXT: s_addc_u32 s37, s37, 0
+; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s50, -1
+; CI-NEXT: s_mov_b32 s51, 0xe8f000
+; CI-NEXT: s_add_u32 s48, s48, s3
+; CI-NEXT: s_addc_u32 s49, s49, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2f64 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2f64 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
; CI-NEXT: v_mov_b32_e32 v0, 0
; CI-NEXT: v_mov_b32_e32 v1, 2.0
; CI-NEXT: v_mov_b32_e32 v2, 0
@@ -2208,18 +2208,18 @@ define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() #0 {
;
; GFX9-LABEL: test_call_external_void_func_v2f64_imm:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s3
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2f64 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2f64 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: v_mov_b32_e32 v1, 2.0
; GFX9-NEXT: v_mov_b32_e32 v2, 0
@@ -2265,18 +2265,18 @@ define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() #0 {
define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 {
; VI-LABEL: test_call_external_void_func_v3f64_imm:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s38, -1
-; VI-NEXT: s_mov_b32 s39, 0xe80000
-; VI-NEXT: s_add_u32 s36, s36, s3
-; VI-NEXT: s_addc_u32 s37, s37, 0
+; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s50, -1
+; VI-NEXT: s_mov_b32 s51, 0xe80000
+; VI-NEXT: s_add_u32 s48, s48, s3
+; VI-NEXT: s_addc_u32 s49, s49, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3f64 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f64 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
; VI-NEXT: v_mov_b32_e32 v0, 0
; VI-NEXT: v_mov_b32_e32 v1, 2.0
; VI-NEXT: v_mov_b32_e32 v2, 0
@@ -2289,18 +2289,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 {
;
; CI-LABEL: test_call_external_void_func_v3f64_imm:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s38, -1
-; CI-NEXT: s_mov_b32 s39, 0xe8f000
-; CI-NEXT: s_add_u32 s36, s36, s3
-; CI-NEXT: s_addc_u32 s37, s37, 0
+; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s50, -1
+; CI-NEXT: s_mov_b32 s51, 0xe8f000
+; CI-NEXT: s_add_u32 s48, s48, s3
+; CI-NEXT: s_addc_u32 s49, s49, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3f64 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f64 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
; CI-NEXT: v_mov_b32_e32 v0, 0
; CI-NEXT: v_mov_b32_e32 v1, 2.0
; CI-NEXT: v_mov_b32_e32 v2, 0
@@ -2313,18 +2313,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 {
;
; GFX9-LABEL: test_call_external_void_func_v3f64_imm:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s3
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3f64 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3f64 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: v_mov_b32_e32 v1, 2.0
; GFX9-NEXT: v_mov_b32_e32 v2, 0
@@ -2375,42 +2375,42 @@ define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 {
define amdgpu_kernel void @test_call_external_void_func_v2i16() #0 {
; VI-LABEL: test_call_external_void_func_v2i16:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s38, -1
-; VI-NEXT: s_mov_b32 s39, 0xe80000
-; VI-NEXT: s_add_u32 s36, s36, s3
+; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s50, -1
+; VI-NEXT: s_mov_b32 s51, 0xe80000
+; VI-NEXT: s_add_u32 s48, s48, s3
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: buffer_load_dword v0, off, s[0:3], 0
-; VI-NEXT: s_addc_u32 s37, s37, 0
+; VI-NEXT: s_addc_u32 s49, s49, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2i16 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i16 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
; VI-NEXT: s_mov_b32 s32, 0
; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; VI-NEXT: s_endpgm
;
; CI-LABEL: test_call_external_void_func_v2i16:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s38, -1
-; CI-NEXT: s_mov_b32 s39, 0xe8f000
-; CI-NEXT: s_add_u32 s36, s36, s3
+; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s50, -1
+; CI-NEXT: s_mov_b32 s51, 0xe8f000
+; CI-NEXT: s_add_u32 s48, s48, s3
; CI-NEXT: s_mov_b32 s3, 0xf000
; CI-NEXT: s_mov_b32 s2, -1
; CI-NEXT: buffer_load_dword v0, off, s[0:3], 0
-; CI-NEXT: s_addc_u32 s37, s37, 0
+; CI-NEXT: s_addc_u32 s49, s49, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2i16 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i16 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
; CI-NEXT: s_mov_b32 s32, 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v0
@@ -2419,21 +2419,21 @@ define amdgpu_kernel void @test_call_external_void_func_v2i16() #0 {
;
; GFX9-LABEL: test_call_external_void_func_v2i16:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s3
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], 0
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i16 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i16 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX9-NEXT: s_endpgm
@@ -2476,42 +2476,42 @@ define amdgpu_kernel void @test_call_external_void_func_v2i16() #0 {
define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 {
; VI-LABEL: test_call_external_void_func_v3i16:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s38, -1
-; VI-NEXT: s_mov_b32 s39, 0xe80000
-; VI-NEXT: s_add_u32 s36, s36, s3
+; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s50, -1
+; VI-NEXT: s_mov_b32 s51, 0xe80000
+; VI-NEXT: s_add_u32 s48, s48, s3
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0
-; VI-NEXT: s_addc_u32 s37, s37, 0
+; VI-NEXT: s_addc_u32 s49, s49, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3i16 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
; VI-NEXT: s_mov_b32 s32, 0
; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; VI-NEXT: s_endpgm
;
; CI-LABEL: test_call_external_void_func_v3i16:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s38, -1
-; CI-NEXT: s_mov_b32 s39, 0xe8f000
-; CI-NEXT: s_add_u32 s36, s36, s3
+; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s50, -1
+; CI-NEXT: s_mov_b32 s51, 0xe8f000
+; CI-NEXT: s_add_u32 s48, s48, s3
; CI-NEXT: s_mov_b32 s3, 0xf000
; CI-NEXT: s_mov_b32 s2, -1
; CI-NEXT: buffer_load_dwordx2 v[2:3], off, s[0:3], 0
-; CI-NEXT: s_addc_u32 s37, s37, 0
+; CI-NEXT: s_addc_u32 s49, s49, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3i16 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
; CI-NEXT: s_mov_b32 s32, 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: v_alignbit_b32 v1, v3, v2, 16
@@ -2522,21 +2522,21 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 {
;
; GFX9-LABEL: test_call_external_void_func_v3i16:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s3
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i16 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX9-NEXT: s_endpgm
@@ -2579,42 +2579,42 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 {
define amdgpu_kernel void @test_call_external_void_func_v3f16() #0 {
; VI-LABEL: test_call_external_void_func_v3f16:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s38, -1
-; VI-NEXT: s_mov_b32 s39, 0xe80000
-; VI-NEXT: s_add_u32 s36, s36, s3
+; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s50, -1
+; VI-NEXT: s_mov_b32 s51, 0xe80000
+; VI-NEXT: s_add_u32 s48, s48, s3
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0
-; VI-NEXT: s_addc_u32 s37, s37, 0
+; VI-NEXT: s_addc_u32 s49, s49, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3f16 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
; VI-NEXT: s_mov_b32 s32, 0
; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; VI-NEXT: s_endpgm
;
; CI-LABEL: test_call_external_void_func_v3f16:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s38, -1
-; CI-NEXT: s_mov_b32 s39, 0xe8f000
-; CI-NEXT: s_add_u32 s36, s36, s3
+; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s50, -1
+; CI-NEXT: s_mov_b32 s51, 0xe8f000
+; CI-NEXT: s_add_u32 s48, s48, s3
; CI-NEXT: s_mov_b32 s3, 0xf000
; CI-NEXT: s_mov_b32 s2, -1
; CI-NEXT: buffer_load_dwordx2 v[1:2], off, s[0:3], 0
-; CI-NEXT: s_addc_u32 s37, s37, 0
+; CI-NEXT: s_addc_u32 s49, s49, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3f16 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
; CI-NEXT: s_mov_b32 s32, 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: v_cvt_f32_f16_e32 v0, v1
@@ -2626,21 +2626,21 @@ define amdgpu_kernel void @test_call_external_void_func_v3f16() #0 {
;
; GFX9-LABEL: test_call_external_void_func_v3f16:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s3
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3f16 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX9-NEXT: s_endpgm
@@ -2683,18 +2683,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3f16() #0 {
define amdgpu_kernel void @test_call_external_void_func_v3i16_imm() #0 {
; VI-LABEL: test_call_external_void_func_v3i16_imm:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s38, -1
-; VI-NEXT: s_mov_b32 s39, 0xe80000
-; VI-NEXT: s_add_u32 s36, s36, s3
-; VI-NEXT: s_addc_u32 s37, s37, 0
+; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s50, -1
+; VI-NEXT: s_mov_b32 s51, 0xe80000
+; VI-NEXT: s_add_u32 s48, s48, s3
+; VI-NEXT: s_addc_u32 s49, s49, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3i16 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
; VI-NEXT: v_mov_b32_e32 v0, 0x20001
; VI-NEXT: v_mov_b32_e32 v1, 3
; VI-NEXT: s_mov_b32 s32, 0
@@ -2703,18 +2703,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16_imm() #0 {
;
; CI-LABEL: test_call_external_void_func_v3i16_imm:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s38, -1
-; CI-NEXT: s_mov_b32 s39, 0xe8f000
-; CI-NEXT: s_add_u32 s36, s36, s3
-; CI-NEXT: s_addc_u32 s37, s37, 0
+; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s50, -1
+; CI-NEXT: s_mov_b32 s51, 0xe8f000
+; CI-NEXT: s_add_u32 s48, s48, s3
+; CI-NEXT: s_addc_u32 s49, s49, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3i16 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
; CI-NEXT: v_mov_b32_e32 v0, 1
; CI-NEXT: v_mov_b32_e32 v1, 2
; CI-NEXT: v_mov_b32_e32 v2, 3
@@ -2724,18 +2724,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16_imm() #0 {
;
; GFX9-LABEL: test_call_external_void_func_v3i16_imm:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s3
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i16 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: v_mov_b32_e32 v0, 0x20001
; GFX9-NEXT: v_mov_b32_e32 v1, 3
; GFX9-NEXT: s_mov_b32 s32, 0
@@ -2776,18 +2776,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16_imm() #0 {
define amdgpu_kernel void @test_call_external_void_func_v3f16_imm() #0 {
; VI-LABEL: test_call_external_void_func_v3f16_imm:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s38, -1
-; VI-NEXT: s_mov_b32 s39, 0xe80000
-; VI-NEXT: s_add_u32 s36, s36, s3
-; VI-NEXT: s_addc_u32 s37, s37, 0
+; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s50, -1
+; VI-NEXT: s_mov_b32 s51, 0xe80000
+; VI-NEXT: s_add_u32 s48, s48, s3
+; VI-NEXT: s_addc_u32 s49, s49, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3f16 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
; VI-NEXT: v_mov_b32_e32 v0, 0x40003c00
; VI-NEXT: v_mov_b32_e32 v1, 0x4400
; VI-NEXT: s_mov_b32 s32, 0
@@ -2796,18 +2796,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3f16_imm() #0 {
;
; CI-LABEL: test_call_external_void_func_v3f16_imm:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s38, -1
-; CI-NEXT: s_mov_b32 s39, 0xe8f000
-; CI-NEXT: s_add_u32 s36, s36, s3
-; CI-NEXT: s_addc_u32 s37, s37, 0
+; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s50, -1
+; CI-NEXT: s_mov_b32 s51, 0xe8f000
+; CI-NEXT: s_add_u32 s48, s48, s3
+; CI-NEXT: s_addc_u32 s49, s49, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3f16 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
; CI-NEXT: v_mov_b32_e32 v0, 1.0
; CI-NEXT: v_mov_b32_e32 v1, 2.0
; CI-NEXT: v_mov_b32_e32 v2, 4.0
@@ -2817,18 +2817,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3f16_imm() #0 {
;
; GFX9-LABEL: test_call_external_void_func_v3f16_imm:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s3
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3f16 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: v_mov_b32_e32 v0, 0x40003c00
; GFX9-NEXT: v_mov_b32_e32 v1, 0x4400
; GFX9-NEXT: s_mov_b32 s32, 0
@@ -2870,42 +2870,42 @@ define amdgpu_kernel void @test_call_external_void_func_v3f16_imm() #0 {
define amdgpu_kernel void @test_call_external_void_func_v4i16() #0 {
; VI-LABEL: test_call_external_void_func_v4i16:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s38, -1
-; VI-NEXT: s_mov_b32 s39, 0xe80000
-; VI-NEXT: s_add_u32 s36, s36, s3
+; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s50, -1
+; VI-NEXT: s_mov_b32 s51, 0xe80000
+; VI-NEXT: s_add_u32 s48, s48, s3
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0
-; VI-NEXT: s_addc_u32 s37, s37, 0
+; VI-NEXT: s_addc_u32 s49, s49, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v4i16 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
; VI-NEXT: s_mov_b32 s32, 0
; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; VI-NEXT: s_endpgm
;
; CI-LABEL: test_call_external_void_func_v4i16:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s38, -1
-; CI-NEXT: s_mov_b32 s39, 0xe8f000
-; CI-NEXT: s_add_u32 s36, s36, s3
+; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s50, -1
+; CI-NEXT: s_mov_b32 s51, 0xe8f000
+; CI-NEXT: s_add_u32 s48, s48, s3
; CI-NEXT: s_mov_b32 s3, 0xf000
; CI-NEXT: s_mov_b32 s2, -1
; CI-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0
-; CI-NEXT: s_addc_u32 s37, s37, 0
+; CI-NEXT: s_addc_u32 s49, s49, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v4i16 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
; CI-NEXT: s_mov_b32 s32, 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: v_lshrrev_b32_e32 v4, 16, v0
@@ -2917,21 +2917,21 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16() #0 {
;
; GFX9-LABEL: test_call_external_void_func_v4i16:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s3
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i16 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX9-NEXT: s_endpgm
@@ -2974,18 +2974,18 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16() #0 {
define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 {
; VI-LABEL: test_call_external_void_func_v4i16_imm:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s38, -1
-; VI-NEXT: s_mov_b32 s39, 0xe80000
-; VI-NEXT: s_add_u32 s36, s36, s3
-; VI-NEXT: s_addc_u32 s37, s37, 0
+; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s50, -1
+; VI-NEXT: s_mov_b32 s51, 0xe80000
+; VI-NEXT: s_add_u32 s48, s48, s3
+; VI-NEXT: s_addc_u32 s49, s49, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v4i16 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
; VI-NEXT: v_mov_b32_e32 v0, 0x20001
; VI-NEXT: v_mov_b32_e32 v1, 0x40003
; VI-NEXT: s_mov_b32 s32, 0
@@ -2994,18 +2994,18 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 {
;
; CI-LABEL: test_call_external_void_func_v4i16_imm:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s38, -1
-; CI-NEXT: s_mov_b32 s39, 0xe8f000
-; CI-NEXT: s_add_u32 s36, s36, s3
-; CI-NEXT: s_addc_u32 s37, s37, 0
+; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s50, -1
+; CI-NEXT: s_mov_b32 s51, 0xe8f000
+; CI-NEXT: s_add_u32 s48, s48, s3
+; CI-NEXT: s_addc_u32 s49, s49, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v4i16 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
; CI-NEXT: v_mov_b32_e32 v0, 1
; CI-NEXT: v_mov_b32_e32 v1, 2
; CI-NEXT: v_mov_b32_e32 v2, 3
@@ -3016,18 +3016,18 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 {
;
; GFX9-LABEL: test_call_external_void_func_v4i16_imm:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s3
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i16 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: v_mov_b32_e32 v0, 0x20001
; GFX9-NEXT: v_mov_b32_e32 v1, 0x40003
; GFX9-NEXT: s_mov_b32 s32, 0
@@ -3069,42 +3069,42 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 {
define amdgpu_kernel void @test_call_external_void_func_v2f16() #0 {
; VI-LABEL: test_call_external_void_func_v2f16:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s38, -1
-; VI-NEXT: s_mov_b32 s39, 0xe80000
-; VI-NEXT: s_add_u32 s36, s36, s3
+; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s50, -1
+; VI-NEXT: s_mov_b32 s51, 0xe80000
+; VI-NEXT: s_add_u32 s48, s48, s3
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: buffer_load_dword v0, off, s[0:3], 0
-; VI-NEXT: s_addc_u32 s37, s37, 0
+; VI-NEXT: s_addc_u32 s49, s49, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2f16 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2f16 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
; VI-NEXT: s_mov_b32 s32, 0
; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; VI-NEXT: s_endpgm
;
; CI-LABEL: test_call_external_void_func_v2f16:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s38, -1
-; CI-NEXT: s_mov_b32 s39, 0xe8f000
-; CI-NEXT: s_add_u32 s36, s36, s3
+; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s50, -1
+; CI-NEXT: s_mov_b32 s51, 0xe8f000
+; CI-NEXT: s_add_u32 s48, s48, s3
; CI-NEXT: s_mov_b32 s3, 0xf000
; CI-NEXT: s_mov_b32 s2, -1
; CI-NEXT: buffer_load_dword v1, off, s[0:3], 0
-; CI-NEXT: s_addc_u32 s37, s37, 0
+; CI-NEXT: s_addc_u32 s49, s49, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2f16 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2f16 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
; CI-NEXT: s_mov_b32 s32, 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: v_cvt_f32_f16_e32 v0, v1
@@ -3115,21 +3115,21 @@ define amdgpu_kernel void @test_call_external_void_func_v2f16() #0 {
;
; GFX9-LABEL: test_call_external_void_func_v2f16:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s3
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], 0
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2f16 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2f16 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX9-NEXT: s_endpgm
@@ -3172,63 +3172,63 @@ define amdgpu_kernel void @test_call_external_void_func_v2f16() #0 {
define amdgpu_kernel void @test_call_external_void_func_v2i32() #0 {
; VI-LABEL: test_call_external_void_func_v2i32:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s38, -1
-; VI-NEXT: s_mov_b32 s39, 0xe80000
-; VI-NEXT: s_add_u32 s36, s36, s3
+; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s50, -1
+; VI-NEXT: s_mov_b32 s51, 0xe80000
+; VI-NEXT: s_add_u32 s48, s48, s3
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0
-; VI-NEXT: s_addc_u32 s37, s37, 0
+; VI-NEXT: s_addc_u32 s49, s49, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2i32 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
; VI-NEXT: s_mov_b32 s32, 0
; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; VI-NEXT: s_endpgm
;
; CI-LABEL: test_call_external_void_func_v2i32:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s38, -1
-; CI-NEXT: s_mov_b32 s39, 0xe8f000
-; CI-NEXT: s_add_u32 s36, s36, s3
+; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s50, -1
+; CI-NEXT: s_mov_b32 s51, 0xe8f000
+; CI-NEXT: s_add_u32 s48, s48, s3
; CI-NEXT: s_mov_b32 s3, 0xf000
; CI-NEXT: s_mov_b32 s2, -1
; CI-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0
-; CI-NEXT: s_addc_u32 s37, s37, 0
+; CI-NEXT: s_addc_u32 s49, s49, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2i32 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
; CI-NEXT: s_mov_b32 s32, 0
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
; GFX9-LABEL: test_call_external_void_func_v2i32:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s3
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i32 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX9-NEXT: s_endpgm
@@ -3271,18 +3271,18 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32() #0 {
define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() #0 {
; VI-LABEL: test_call_external_void_func_v2i32_imm:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s38, -1
-; VI-NEXT: s_mov_b32 s39, 0xe80000
-; VI-NEXT: s_add_u32 s36, s36, s3
-; VI-NEXT: s_addc_u32 s37, s37, 0
+; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s50, -1
+; VI-NEXT: s_mov_b32 s51, 0xe80000
+; VI-NEXT: s_add_u32 s48, s48, s3
+; VI-NEXT: s_addc_u32 s49, s49, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2i32 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
; VI-NEXT: v_mov_b32_e32 v0, 1
; VI-NEXT: v_mov_b32_e32 v1, 2
; VI-NEXT: s_mov_b32 s32, 0
@@ -3291,18 +3291,18 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() #0 {
;
; CI-LABEL: test_call_external_void_func_v2i32_imm:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s38, -1
-; CI-NEXT: s_mov_b32 s39, 0xe8f000
-; CI-NEXT: s_add_u32 s36, s36, s3
-; CI-NEXT: s_addc_u32 s37, s37, 0
+; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s50, -1
+; CI-NEXT: s_mov_b32 s51, 0xe8f000
+; CI-NEXT: s_add_u32 s48, s48, s3
+; CI-NEXT: s_addc_u32 s49, s49, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2i32 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
; CI-NEXT: v_mov_b32_e32 v0, 1
; CI-NEXT: v_mov_b32_e32 v1, 2
; CI-NEXT: s_mov_b32 s32, 0
@@ -3311,18 +3311,18 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() #0 {
;
; GFX9-LABEL: test_call_external_void_func_v2i32_imm:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s3
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i32 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: v_mov_b32_e32 v0, 1
; GFX9-NEXT: v_mov_b32_e32 v1, 2
; GFX9-NEXT: s_mov_b32 s32, 0
@@ -3363,18 +3363,18 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() #0 {
define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 {
; VI-LABEL: test_call_external_void_func_v3i32_imm:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s38, -1
-; VI-NEXT: s_mov_b32 s39, 0xe80000
-; VI-NEXT: s_add_u32 s36, s36, s5
-; VI-NEXT: s_addc_u32 s37, s37, 0
+; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s50, -1
+; VI-NEXT: s_mov_b32 s51, 0xe80000
+; VI-NEXT: s_add_u32 s48, s48, s5
+; VI-NEXT: s_addc_u32 s49, s49, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3i32 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
; VI-NEXT: v_mov_b32_e32 v0, 3
; VI-NEXT: v_mov_b32_e32 v1, 4
; VI-NEXT: v_mov_b32_e32 v2, 5
@@ -3384,18 +3384,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 {
;
; CI-LABEL: test_call_external_void_func_v3i32_imm:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s38, -1
-; CI-NEXT: s_mov_b32 s39, 0xe8f000
-; CI-NEXT: s_add_u32 s36, s36, s5
-; CI-NEXT: s_addc_u32 s37, s37, 0
+; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s50, -1
+; CI-NEXT: s_mov_b32 s51, 0xe8f000
+; CI-NEXT: s_add_u32 s48, s48, s5
+; CI-NEXT: s_addc_u32 s49, s49, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3i32 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
; CI-NEXT: v_mov_b32_e32 v0, 3
; CI-NEXT: v_mov_b32_e32 v1, 4
; CI-NEXT: v_mov_b32_e32 v2, 5
@@ -3405,18 +3405,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 {
;
; GFX9-LABEL: test_call_external_void_func_v3i32_imm:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s5
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s5
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i32 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: v_mov_b32_e32 v0, 3
; GFX9-NEXT: v_mov_b32_e32 v1, 4
; GFX9-NEXT: v_mov_b32_e32 v2, 5
@@ -3460,18 +3460,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 {
define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 {
; VI-LABEL: test_call_external_void_func_v3i32_i32:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s38, -1
-; VI-NEXT: s_mov_b32 s39, 0xe80000
-; VI-NEXT: s_add_u32 s36, s36, s5
-; VI-NEXT: s_addc_u32 s37, s37, 0
+; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s50, -1
+; VI-NEXT: s_mov_b32 s51, 0xe80000
+; VI-NEXT: s_add_u32 s48, s48, s5
+; VI-NEXT: s_addc_u32 s49, s49, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3i32_i32 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32_i32 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
; VI-NEXT: v_mov_b32_e32 v0, 3
; VI-NEXT: v_mov_b32_e32 v1, 4
; VI-NEXT: v_mov_b32_e32 v2, 5
@@ -3482,18 +3482,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 {
;
; CI-LABEL: test_call_external_void_func_v3i32_i32:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s38, -1
-; CI-NEXT: s_mov_b32 s39, 0xe8f000
-; CI-NEXT: s_add_u32 s36, s36, s5
-; CI-NEXT: s_addc_u32 s37, s37, 0
+; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s50, -1
+; CI-NEXT: s_mov_b32 s51, 0xe8f000
+; CI-NEXT: s_add_u32 s48, s48, s5
+; CI-NEXT: s_addc_u32 s49, s49, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3i32_i32 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32_i32 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
; CI-NEXT: v_mov_b32_e32 v0, 3
; CI-NEXT: v_mov_b32_e32 v1, 4
; CI-NEXT: v_mov_b32_e32 v2, 5
@@ -3504,18 +3504,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 {
;
; GFX9-LABEL: test_call_external_void_func_v3i32_i32:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s5
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s5
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i32_i32 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32_i32 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: v_mov_b32_e32 v0, 3
; GFX9-NEXT: v_mov_b32_e32 v1, 4
; GFX9-NEXT: v_mov_b32_e32 v2, 5
@@ -3561,63 +3561,63 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 {
define amdgpu_kernel void @test_call_external_void_func_v4i32() #0 {
; VI-LABEL: test_call_external_void_func_v4i32:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s38, -1
-; VI-NEXT: s_mov_b32 s39, 0xe80000
-; VI-NEXT: s_add_u32 s36, s36, s3
+; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s50, -1
+; VI-NEXT: s_mov_b32 s51, 0xe80000
+; VI-NEXT: s_add_u32 s48, s48, s3
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
-; VI-NEXT: s_addc_u32 s37, s37, 0
+; VI-NEXT: s_addc_u32 s49, s49, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v4i32 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
; VI-NEXT: s_mov_b32 s32, 0
; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; VI-NEXT: s_endpgm
;
; CI-LABEL: test_call_external_void_func_v4i32:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s38, -1
-; CI-NEXT: s_mov_b32 s39, 0xe8f000
-; CI-NEXT: s_add_u32 s36, s36, s3
+; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s50, -1
+; CI-NEXT: s_mov_b32 s51, 0xe8f000
+; CI-NEXT: s_add_u32 s48, s48, s3
; CI-NEXT: s_mov_b32 s3, 0xf000
; CI-NEXT: s_mov_b32 s2, -1
; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
-; CI-NEXT: s_addc_u32 s37, s37, 0
+; CI-NEXT: s_addc_u32 s49, s49, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v4i32 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
; CI-NEXT: s_mov_b32 s32, 0
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
; GFX9-LABEL: test_call_external_void_func_v4i32:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s3
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i32 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX9-NEXT: s_endpgm
@@ -3660,18 +3660,18 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32() #0 {
define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() #0 {
; VI-LABEL: test_call_external_void_func_v4i32_imm:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s38, -1
-; VI-NEXT: s_mov_b32 s39, 0xe80000
-; VI-NEXT: s_add_u32 s36, s36, s3
-; VI-NEXT: s_addc_u32 s37, s37, 0
+; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s50, -1
+; VI-NEXT: s_mov_b32 s51, 0xe80000
+; VI-NEXT: s_add_u32 s48, s48, s3
+; VI-NEXT: s_addc_u32 s49, s49, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v4i32 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
; VI-NEXT: v_mov_b32_e32 v0, 1
; VI-NEXT: v_mov_b32_e32 v1, 2
; VI-NEXT: v_mov_b32_e32 v2, 3
@@ -3682,18 +3682,18 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() #0 {
;
; CI-LABEL: test_call_external_void_func_v4i32_imm:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s38, -1
-; CI-NEXT: s_mov_b32 s39, 0xe8f000
-; CI-NEXT: s_add_u32 s36, s36, s3
-; CI-NEXT: s_addc_u32 s37, s37, 0
+; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s50, -1
+; CI-NEXT: s_mov_b32 s51, 0xe8f000
+; CI-NEXT: s_add_u32 s48, s48, s3
+; CI-NEXT: s_addc_u32 s49, s49, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v4i32 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
; CI-NEXT: v_mov_b32_e32 v0, 1
; CI-NEXT: v_mov_b32_e32 v1, 2
; CI-NEXT: v_mov_b32_e32 v2, 3
@@ -3704,18 +3704,18 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() #0 {
;
; GFX9-LABEL: test_call_external_void_func_v4i32_imm:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s3
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i32 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: v_mov_b32_e32 v0, 1
; GFX9-NEXT: v_mov_b32_e32 v1, 2
; GFX9-NEXT: v_mov_b32_e32 v2, 3
@@ -3761,18 +3761,18 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() #0 {
define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 {
; VI-LABEL: test_call_external_void_func_v5i32_imm:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s38, -1
-; VI-NEXT: s_mov_b32 s39, 0xe80000
-; VI-NEXT: s_add_u32 s36, s36, s3
-; VI-NEXT: s_addc_u32 s37, s37, 0
+; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s50, -1
+; VI-NEXT: s_mov_b32 s51, 0xe80000
+; VI-NEXT: s_add_u32 s48, s48, s3
+; VI-NEXT: s_addc_u32 s49, s49, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v5i32 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v5i32 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
; VI-NEXT: v_mov_b32_e32 v0, 1
; VI-NEXT: v_mov_b32_e32 v1, 2
; VI-NEXT: v_mov_b32_e32 v2, 3
@@ -3784,18 +3784,18 @@ define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 {
;
; CI-LABEL: test_call_external_void_func_v5i32_imm:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s38, -1
-; CI-NEXT: s_mov_b32 s39, 0xe8f000
-; CI-NEXT: s_add_u32 s36, s36, s3
-; CI-NEXT: s_addc_u32 s37, s37, 0
+; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s50, -1
+; CI-NEXT: s_mov_b32 s51, 0xe8f000
+; CI-NEXT: s_add_u32 s48, s48, s3
+; CI-NEXT: s_addc_u32 s49, s49, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v5i32 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v5i32 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
; CI-NEXT: v_mov_b32_e32 v0, 1
; CI-NEXT: v_mov_b32_e32 v1, 2
; CI-NEXT: v_mov_b32_e32 v2, 3
@@ -3807,18 +3807,18 @@ define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 {
;
; GFX9-LABEL: test_call_external_void_func_v5i32_imm:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s3
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v5i32 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v5i32 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: v_mov_b32_e32 v0, 1
; GFX9-NEXT: v_mov_b32_e32 v1, 2
; GFX9-NEXT: v_mov_b32_e32 v2, 3
@@ -3867,72 +3867,72 @@ define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 {
define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 {
; VI-LABEL: test_call_external_void_func_v8i32:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
-; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s38, -1
-; VI-NEXT: s_mov_b32 s39, 0xe80000
-; VI-NEXT: s_add_u32 s36, s36, s3
+; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s50, -1
+; VI-NEXT: s_mov_b32 s51, 0xe80000
+; VI-NEXT: s_add_u32 s48, s48, s3
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
; VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16
-; VI-NEXT: s_addc_u32 s37, s37, 0
-; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; VI-NEXT: s_addc_u32 s49, s49, 0
+; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v8i32 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
; VI-NEXT: s_mov_b32 s32, 0
; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; VI-NEXT: s_endpgm
;
; CI-LABEL: test_call_external_void_func_v8i32:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
-; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s38, -1
-; CI-NEXT: s_mov_b32 s39, 0xe8f000
-; CI-NEXT: s_add_u32 s36, s36, s3
+; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s50, -1
+; CI-NEXT: s_mov_b32 s51, 0xe8f000
+; CI-NEXT: s_add_u32 s48, s48, s3
; CI-NEXT: s_mov_b32 s3, 0xf000
; CI-NEXT: s_mov_b32 s2, -1
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
; CI-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16
-; CI-NEXT: s_addc_u32 s37, s37, 0
-; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; CI-NEXT: s_addc_u32 s49, s49, 0
+; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v8i32 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
; CI-NEXT: s_mov_b32 s32, 0
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
; GFX9-LABEL: test_call_external_void_func_v8i32:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s3
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
; GFX9-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v8i32 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX9-NEXT: s_endpgm
@@ -3983,18 +3983,18 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 {
define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 {
; VI-LABEL: test_call_external_void_func_v8i32_imm:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s38, -1
-; VI-NEXT: s_mov_b32 s39, 0xe80000
-; VI-NEXT: s_add_u32 s36, s36, s3
-; VI-NEXT: s_addc_u32 s37, s37, 0
+; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s50, -1
+; VI-NEXT: s_mov_b32 s51, 0xe80000
+; VI-NEXT: s_add_u32 s48, s48, s3
+; VI-NEXT: s_addc_u32 s49, s49, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v8i32 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
; VI-NEXT: v_mov_b32_e32 v0, 1
; VI-NEXT: v_mov_b32_e32 v1, 2
; VI-NEXT: v_mov_b32_e32 v2, 3
@@ -4009,18 +4009,18 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 {
;
; CI-LABEL: test_call_external_void_func_v8i32_imm:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s38, -1
-; CI-NEXT: s_mov_b32 s39, 0xe8f000
-; CI-NEXT: s_add_u32 s36, s36, s3
-; CI-NEXT: s_addc_u32 s37, s37, 0
+; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s50, -1
+; CI-NEXT: s_mov_b32 s51, 0xe8f000
+; CI-NEXT: s_add_u32 s48, s48, s3
+; CI-NEXT: s_addc_u32 s49, s49, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v8i32 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
; CI-NEXT: v_mov_b32_e32 v0, 1
; CI-NEXT: v_mov_b32_e32 v1, 2
; CI-NEXT: v_mov_b32_e32 v2, 3
@@ -4035,18 +4035,18 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 {
;
; GFX9-LABEL: test_call_external_void_func_v8i32_imm:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s3
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v8i32 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: v_mov_b32_e32 v0, 1
; GFX9-NEXT: v_mov_b32_e32 v1, 2
; GFX9-NEXT: v_mov_b32_e32 v2, 3
@@ -4102,13 +4102,13 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 {
define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 {
; VI-LABEL: test_call_external_void_func_v16i32:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
-; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s38, -1
-; VI-NEXT: s_mov_b32 s39, 0xe80000
-; VI-NEXT: s_add_u32 s36, s36, s3
+; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s50, -1
+; VI-NEXT: s_mov_b32 s51, 0xe80000
+; VI-NEXT: s_add_u32 s48, s48, s3
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: s_waitcnt lgkmcnt(0)
@@ -4116,25 +4116,25 @@ define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 {
; VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16
; VI-NEXT: buffer_load_dwordx4 v[8:11], off, s[0:3], 0 offset:32
; VI-NEXT: buffer_load_dwordx4 v[12:15], off, s[0:3], 0 offset:48
-; VI-NEXT: s_addc_u32 s37, s37, 0
-; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; VI-NEXT: s_addc_u32 s49, s49, 0
+; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v16i32 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v16i32 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
; VI-NEXT: s_mov_b32 s32, 0
; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; VI-NEXT: s_endpgm
;
; CI-LABEL: test_call_external_void_func_v16i32:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
-; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s38, -1
-; CI-NEXT: s_mov_b32 s39, 0xe8f000
-; CI-NEXT: s_add_u32 s36, s36, s3
+; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s50, -1
+; CI-NEXT: s_mov_b32 s51, 0xe8f000
+; CI-NEXT: s_add_u32 s48, s48, s3
; CI-NEXT: s_mov_b32 s3, 0xf000
; CI-NEXT: s_mov_b32 s2, -1
; CI-NEXT: s_waitcnt lgkmcnt(0)
@@ -4142,25 +4142,25 @@ define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 {
; CI-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16
; CI-NEXT: buffer_load_dwordx4 v[8:11], off, s[0:3], 0 offset:32
; CI-NEXT: buffer_load_dwordx4 v[12:15], off, s[0:3], 0 offset:48
-; CI-NEXT: s_addc_u32 s37, s37, 0
-; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; CI-NEXT: s_addc_u32 s49, s49, 0
+; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v16i32 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v16i32 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
; CI-NEXT: s_mov_b32 s32, 0
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
; GFX9-LABEL: test_call_external_void_func_v16i32:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s3
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
@@ -4168,12 +4168,12 @@ define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 {
; GFX9-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16
; GFX9-NEXT: buffer_load_dwordx4 v[8:11], off, s[0:3], 0 offset:32
; GFX9-NEXT: buffer_load_dwordx4 v[12:15], off, s[0:3], 0 offset:48
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v16i32 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v16i32 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX9-NEXT: s_endpgm
@@ -4231,8 +4231,8 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 {
; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
; VI-NEXT: s_mov_b32 s7, 0xf000
; VI-NEXT: s_mov_b32 s6, -1
-; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112
; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0
@@ -4242,19 +4242,19 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 {
; VI-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64
; VI-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80
; VI-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96
-; VI-NEXT: s_mov_b32 s38, -1
-; VI-NEXT: s_mov_b32 s39, 0xe80000
-; VI-NEXT: s_add_u32 s36, s36, s3
-; VI-NEXT: s_addc_u32 s37, s37, 0
+; VI-NEXT: s_mov_b32 s50, -1
+; VI-NEXT: s_mov_b32 s51, 0xe80000
+; VI-NEXT: s_add_u32 s48, s48, s3
+; VI-NEXT: s_addc_u32 s49, s49, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
; VI-NEXT: s_mov_b32 s32, 0
; VI-NEXT: s_getpc_b64 s[8:9]
; VI-NEXT: s_add_u32 s8, s8, external_void_func_v32i32 at rel32@lo+4
; VI-NEXT: s_addc_u32 s9, s9, external_void_func_v32i32 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
; VI-NEXT: s_waitcnt vmcnt(7)
-; VI-NEXT: buffer_store_dword v31, off, s[36:39], s32
+; VI-NEXT: buffer_store_dword v31, off, s[48:51], s32
; VI-NEXT: s_swappc_b64 s[30:31], s[8:9]
; VI-NEXT: s_endpgm
;
@@ -4263,8 +4263,8 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 {
; CI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
-; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112
; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0
@@ -4274,19 +4274,19 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 {
; CI-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64
; CI-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80
; CI-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96
-; CI-NEXT: s_mov_b32 s38, -1
-; CI-NEXT: s_mov_b32 s39, 0xe8f000
-; CI-NEXT: s_add_u32 s36, s36, s3
-; CI-NEXT: s_addc_u32 s37, s37, 0
+; CI-NEXT: s_mov_b32 s50, -1
+; CI-NEXT: s_mov_b32 s51, 0xe8f000
+; CI-NEXT: s_add_u32 s48, s48, s3
+; CI-NEXT: s_addc_u32 s49, s49, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
; CI-NEXT: s_mov_b32 s32, 0
; CI-NEXT: s_getpc_b64 s[8:9]
; CI-NEXT: s_add_u32 s8, s8, external_void_func_v32i32 at rel32@lo+4
; CI-NEXT: s_addc_u32 s9, s9, external_void_func_v32i32 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
; CI-NEXT: s_waitcnt vmcnt(7)
-; CI-NEXT: buffer_store_dword v31, off, s[36:39], s32
+; CI-NEXT: buffer_store_dword v31, off, s[48:51], s32
; CI-NEXT: s_swappc_b64 s[30:31], s[8:9]
; CI-NEXT: s_endpgm
;
@@ -4295,8 +4295,8 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 {
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
; GFX9-NEXT: s_mov_b32 s7, 0xf000
; GFX9-NEXT: s_mov_b32 s6, -1
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112
; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0
@@ -4306,19 +4306,19 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 {
; GFX9-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64
; GFX9-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80
; GFX9-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s3
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_getpc_b64 s[8:9]
; GFX9-NEXT: s_add_u32 s8, s8, external_void_func_v32i32 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s9, s9, external_void_func_v32i32 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: s_waitcnt vmcnt(7)
-; GFX9-NEXT: buffer_store_dword v31, off, s[36:39], s32
+; GFX9-NEXT: buffer_store_dword v31, off, s[48:51], s32
; GFX9-NEXT: s_swappc_b64 s[30:31], s[8:9]
; GFX9-NEXT: s_endpgm
;
@@ -4384,15 +4384,15 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 {
define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 {
; VI-LABEL: test_call_external_void_func_v32i32_i32:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s38, -1
-; VI-NEXT: s_mov_b32 s39, 0xe80000
-; VI-NEXT: s_add_u32 s36, s36, s5
+; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s50, -1
+; VI-NEXT: s_mov_b32 s51, 0xe80000
+; VI-NEXT: s_add_u32 s48, s48, s5
; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
; VI-NEXT: s_mov_b32 s7, 0xf000
; VI-NEXT: s_mov_b32 s6, -1
-; VI-NEXT: s_addc_u32 s37, s37, 0
+; VI-NEXT: s_addc_u32 s49, s49, 0
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_load_dword v32, off, s[4:7], 0
; VI-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112
@@ -4404,30 +4404,30 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 {
; VI-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80
; VI-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
; VI-NEXT: s_mov_b32 s32, 0
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v32i32_i32 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v32i32_i32 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
; VI-NEXT: s_waitcnt vmcnt(8)
-; VI-NEXT: buffer_store_dword v32, off, s[36:39], s32 offset:4
+; VI-NEXT: buffer_store_dword v32, off, s[48:51], s32 offset:4
; VI-NEXT: s_waitcnt vmcnt(8)
-; VI-NEXT: buffer_store_dword v31, off, s[36:39], s32
+; VI-NEXT: buffer_store_dword v31, off, s[48:51], s32
; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; VI-NEXT: s_endpgm
;
; CI-LABEL: test_call_external_void_func_v32i32_i32:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s38, -1
-; CI-NEXT: s_mov_b32 s39, 0xe8f000
-; CI-NEXT: s_add_u32 s36, s36, s5
+; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s50, -1
+; CI-NEXT: s_mov_b32 s51, 0xe8f000
+; CI-NEXT: s_add_u32 s48, s48, s5
; CI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
-; CI-NEXT: s_addc_u32 s37, s37, 0
+; CI-NEXT: s_addc_u32 s49, s49, 0
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: buffer_load_dword v32, off, s[4:7], 0
; CI-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112
@@ -4439,30 +4439,30 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 {
; CI-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80
; CI-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
; CI-NEXT: s_mov_b32 s32, 0
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v32i32_i32 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v32i32_i32 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
; CI-NEXT: s_waitcnt vmcnt(8)
-; CI-NEXT: buffer_store_dword v32, off, s[36:39], s32 offset:4
+; CI-NEXT: buffer_store_dword v32, off, s[48:51], s32 offset:4
; CI-NEXT: s_waitcnt vmcnt(8)
-; CI-NEXT: buffer_store_dword v31, off, s[36:39], s32
+; CI-NEXT: buffer_store_dword v31, off, s[48:51], s32
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
; GFX9-LABEL: test_call_external_void_func_v32i32_i32:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s5
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s5
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
; GFX9-NEXT: s_mov_b32 s7, 0xf000
; GFX9-NEXT: s_mov_b32 s6, -1
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: buffer_load_dword v32, off, s[4:7], 0
; GFX9-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112
@@ -4474,16 +4474,16 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 {
; GFX9-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80
; GFX9-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v32i32_i32 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v32i32_i32 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: s_waitcnt vmcnt(8)
-; GFX9-NEXT: buffer_store_dword v32, off, s[36:39], s32 offset:4
+; GFX9-NEXT: buffer_store_dword v32, off, s[48:51], s32 offset:4
; GFX9-NEXT: s_waitcnt vmcnt(8)
-; GFX9-NEXT: buffer_store_dword v31, off, s[36:39], s32
+; GFX9-NEXT: buffer_store_dword v31, off, s[48:51], s32
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX9-NEXT: s_endpgm
;
@@ -4557,89 +4557,89 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 {
define amdgpu_kernel void @test_call_external_i32_func_i32_imm(ptr addrspace(1) %out) #0 {
; VI-LABEL: test_call_external_i32_func_i32_imm:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s42, -1
-; VI-NEXT: s_mov_b32 s43, 0xe80000
-; VI-NEXT: s_add_u32 s40, s40, s5
-; VI-NEXT: s_load_dwordx2 s[36:37], s[2:3], 0x24
-; VI-NEXT: s_addc_u32 s41, s41, 0
+; VI-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s66, -1
+; VI-NEXT: s_mov_b32 s67, 0xe80000
+; VI-NEXT: s_add_u32 s64, s64, s5
+; VI-NEXT: s_load_dwordx2 s[48:49], s[2:3], 0x24
+; VI-NEXT: s_addc_u32 s65, s65, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[40:41]
+; VI-NEXT: s_mov_b64 s[0:1], s[64:65]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_i32_func_i32 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_i32_func_i32 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[42:43]
+; VI-NEXT: s_mov_b64 s[2:3], s[66:67]
; VI-NEXT: v_mov_b32_e32 v0, 42
; VI-NEXT: s_mov_b32 s32, 0
-; VI-NEXT: s_mov_b32 s39, 0xf000
-; VI-NEXT: s_mov_b32 s38, -1
+; VI-NEXT: s_mov_b32 s51, 0xf000
+; VI-NEXT: s_mov_b32 s50, -1
; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; VI-NEXT: buffer_store_dword v0, off, s[36:39], 0
+; VI-NEXT: buffer_store_dword v0, off, s[48:51], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: s_endpgm
;
; CI-LABEL: test_call_external_i32_func_i32_imm:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s42, -1
-; CI-NEXT: s_mov_b32 s43, 0xe8f000
-; CI-NEXT: s_add_u32 s40, s40, s5
-; CI-NEXT: s_load_dwordx2 s[36:37], s[2:3], 0x9
-; CI-NEXT: s_addc_u32 s41, s41, 0
+; CI-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s66, -1
+; CI-NEXT: s_mov_b32 s67, 0xe8f000
+; CI-NEXT: s_add_u32 s64, s64, s5
+; CI-NEXT: s_load_dwordx2 s[48:49], s[2:3], 0x9
+; CI-NEXT: s_addc_u32 s65, s65, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[40:41]
+; CI-NEXT: s_mov_b64 s[0:1], s[64:65]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_i32_func_i32 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_i32_func_i32 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[42:43]
+; CI-NEXT: s_mov_b64 s[2:3], s[66:67]
; CI-NEXT: v_mov_b32_e32 v0, 42
; CI-NEXT: s_mov_b32 s32, 0
-; CI-NEXT: s_mov_b32 s39, 0xf000
-; CI-NEXT: s_mov_b32 s38, -1
+; CI-NEXT: s_mov_b32 s51, 0xf000
+; CI-NEXT: s_mov_b32 s50, -1
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; CI-NEXT: buffer_store_dword v0, off, s[36:39], 0
+; CI-NEXT: buffer_store_dword v0, off, s[48:51], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: s_endpgm
;
; GFX9-LABEL: test_call_external_i32_func_i32_imm:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s42, -1
-; GFX9-NEXT: s_mov_b32 s43, 0xe00000
-; GFX9-NEXT: s_add_u32 s40, s40, s5
-; GFX9-NEXT: s_load_dwordx2 s[36:37], s[2:3], 0x24
-; GFX9-NEXT: s_addc_u32 s41, s41, 0
+; GFX9-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s66, -1
+; GFX9-NEXT: s_mov_b32 s67, 0xe00000
+; GFX9-NEXT: s_add_u32 s64, s64, s5
+; GFX9-NEXT: s_load_dwordx2 s[48:49], s[2:3], 0x24
+; GFX9-NEXT: s_addc_u32 s65, s65, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[40:41]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_i32_func_i32 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_i32_func_i32 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[42:43]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX9-NEXT: v_mov_b32_e32 v0, 42
; GFX9-NEXT: s_mov_b32 s32, 0
-; GFX9-NEXT: s_mov_b32 s39, 0xf000
-; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xf000
+; GFX9-NEXT: s_mov_b32 s50, -1
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: buffer_store_dword v0, off, s[36:39], 0
+; GFX9-NEXT: buffer_store_dword v0, off, s[48:51], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_endpgm
;
; GFX11-LABEL: test_call_external_i32_func_i32_imm:
; GFX11: ; %bb.0:
-; GFX11-NEXT: s_load_b64 s[36:37], s[2:3], 0x24
+; GFX11-NEXT: s_load_b64 s[48:49], s[2:3], 0x24
; GFX11-NEXT: v_mov_b32_e32 v0, 42
; GFX11-NEXT: s_getpc_b64 s[2:3]
; GFX11-NEXT: s_add_u32 s2, s2, external_i32_func_i32 at rel32@lo+4
; GFX11-NEXT: s_addc_u32 s3, s3, external_i32_func_i32 at rel32@hi+12
; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
; GFX11-NEXT: s_mov_b32 s32, 0
-; GFX11-NEXT: s_mov_b32 s39, 0x31016000
-; GFX11-NEXT: s_mov_b32 s38, -1
+; GFX11-NEXT: s_mov_b32 s51, 0x31016000
+; GFX11-NEXT: s_mov_b32 s50, -1
; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
-; GFX11-NEXT: buffer_store_b32 v0, off, s[36:39], 0 dlc
+; GFX11-NEXT: buffer_store_b32 v0, off, s[48:51], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_endpgm
;
@@ -4647,7 +4647,7 @@ define amdgpu_kernel void @test_call_external_i32_func_i32_imm(ptr addrspace(1)
; HSA: ; %bb.0:
; HSA-NEXT: s_add_i32 s8, s8, s11
; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8
-; HSA-NEXT: s_load_dwordx2 s[36:37], s[6:7], 0x0
+; HSA-NEXT: s_load_dwordx2 s[48:49], s[6:7], 0x0
; HSA-NEXT: s_add_u32 s0, s0, s11
; HSA-NEXT: s_addc_u32 s1, s1, 0
; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9
@@ -4657,10 +4657,10 @@ define amdgpu_kernel void @test_call_external_i32_func_i32_imm(ptr addrspace(1)
; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
; HSA-NEXT: v_mov_b32_e32 v0, 42
; HSA-NEXT: s_mov_b32 s32, 0
-; HSA-NEXT: s_mov_b32 s39, 0x1100f000
-; HSA-NEXT: s_mov_b32 s38, -1
+; HSA-NEXT: s_mov_b32 s51, 0x1100f000
+; HSA-NEXT: s_mov_b32 s50, -1
; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
-; HSA-NEXT: buffer_store_dword v0, off, s[36:39], 0
+; HSA-NEXT: buffer_store_dword v0, off, s[48:51], 0
; HSA-NEXT: s_waitcnt vmcnt(0)
; HSA-NEXT: s_endpgm
%val = call i32 @external_i32_func_i32(i32 42)
@@ -4671,72 +4671,72 @@ define amdgpu_kernel void @test_call_external_i32_func_i32_imm(ptr addrspace(1)
define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 {
; VI-LABEL: test_call_external_void_func_struct_i8_i32:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
-; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s38, -1
-; VI-NEXT: s_mov_b32 s39, 0xe80000
-; VI-NEXT: s_add_u32 s36, s36, s3
+; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s50, -1
+; VI-NEXT: s_mov_b32 s51, 0xe80000
+; VI-NEXT: s_add_u32 s48, s48, s3
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0
; VI-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:4
-; VI-NEXT: s_addc_u32 s37, s37, 0
-; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; VI-NEXT: s_addc_u32 s49, s49, 0
+; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_struct_i8_i32 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_struct_i8_i32 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
; VI-NEXT: s_mov_b32 s32, 0
; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; VI-NEXT: s_endpgm
;
; CI-LABEL: test_call_external_void_func_struct_i8_i32:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
-; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s38, -1
-; CI-NEXT: s_mov_b32 s39, 0xe8f000
-; CI-NEXT: s_add_u32 s36, s36, s3
+; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s50, -1
+; CI-NEXT: s_mov_b32 s51, 0xe8f000
+; CI-NEXT: s_add_u32 s48, s48, s3
; CI-NEXT: s_mov_b32 s3, 0xf000
; CI-NEXT: s_mov_b32 s2, -1
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0
; CI-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:4
-; CI-NEXT: s_addc_u32 s37, s37, 0
-; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; CI-NEXT: s_addc_u32 s49, s49, 0
+; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_struct_i8_i32 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_struct_i8_i32 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
; CI-NEXT: s_mov_b32 s32, 0
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
; GFX9-LABEL: test_call_external_void_func_struct_i8_i32:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s3
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], 0
; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:4
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_struct_i8_i32 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_struct_i8_i32 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX9-NEXT: s_endpgm
@@ -4787,86 +4787,86 @@ define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 {
define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0 {
; VI-LABEL: test_call_external_void_func_byval_struct_i8_i32:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s38, -1
-; VI-NEXT: s_mov_b32 s39, 0xe80000
-; VI-NEXT: s_add_u32 s36, s36, s3
-; VI-NEXT: s_addc_u32 s37, s37, 0
+; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s50, -1
+; VI-NEXT: s_mov_b32 s51, 0xe80000
+; VI-NEXT: s_add_u32 s48, s48, s3
+; VI-NEXT: s_addc_u32 s49, s49, 0
; VI-NEXT: v_mov_b32_e32 v0, 3
-; VI-NEXT: buffer_store_byte v0, off, s[36:39], 0
+; VI-NEXT: buffer_store_byte v0, off, s[48:51], 0
; VI-NEXT: v_mov_b32_e32 v0, 8
-; VI-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4
-; VI-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:4
-; VI-NEXT: buffer_load_dword v1, off, s[36:39], 0
+; VI-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:4
+; VI-NEXT: buffer_load_dword v0, off, s[48:51], 0 offset:4
+; VI-NEXT: buffer_load_dword v1, off, s[48:51], 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
; VI-NEXT: s_movk_i32 s32, 0x400
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_byval_struct_i8_i32 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_byval_struct_i8_i32 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
; VI-NEXT: s_waitcnt vmcnt(1)
-; VI-NEXT: buffer_store_dword v0, off, s[36:39], s32 offset:4
+; VI-NEXT: buffer_store_dword v0, off, s[48:51], s32 offset:4
; VI-NEXT: s_waitcnt vmcnt(1)
-; VI-NEXT: buffer_store_dword v1, off, s[36:39], s32
+; VI-NEXT: buffer_store_dword v1, off, s[48:51], s32
; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; VI-NEXT: s_endpgm
;
; CI-LABEL: test_call_external_void_func_byval_struct_i8_i32:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s38, -1
-; CI-NEXT: s_mov_b32 s39, 0xe8f000
-; CI-NEXT: s_add_u32 s36, s36, s3
-; CI-NEXT: s_addc_u32 s37, s37, 0
+; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s50, -1
+; CI-NEXT: s_mov_b32 s51, 0xe8f000
+; CI-NEXT: s_add_u32 s48, s48, s3
+; CI-NEXT: s_addc_u32 s49, s49, 0
; CI-NEXT: v_mov_b32_e32 v0, 3
-; CI-NEXT: buffer_store_byte v0, off, s[36:39], 0
+; CI-NEXT: buffer_store_byte v0, off, s[48:51], 0
; CI-NEXT: v_mov_b32_e32 v0, 8
-; CI-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4
-; CI-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:4
-; CI-NEXT: buffer_load_dword v1, off, s[36:39], 0
+; CI-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:4
+; CI-NEXT: buffer_load_dword v0, off, s[48:51], 0 offset:4
+; CI-NEXT: buffer_load_dword v1, off, s[48:51], 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
; CI-NEXT: s_movk_i32 s32, 0x400
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_byval_struct_i8_i32 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_byval_struct_i8_i32 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
; CI-NEXT: s_waitcnt vmcnt(1)
-; CI-NEXT: buffer_store_dword v0, off, s[36:39], s32 offset:4
+; CI-NEXT: buffer_store_dword v0, off, s[48:51], s32 offset:4
; CI-NEXT: s_waitcnt vmcnt(1)
-; CI-NEXT: buffer_store_dword v1, off, s[36:39], s32
+; CI-NEXT: buffer_store_dword v1, off, s[48:51], s32
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
; GFX9-LABEL: test_call_external_void_func_byval_struct_i8_i32:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s3
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: v_mov_b32_e32 v0, 3
-; GFX9-NEXT: buffer_store_byte v0, off, s[36:39], 0
+; GFX9-NEXT: buffer_store_byte v0, off, s[48:51], 0
; GFX9-NEXT: v_mov_b32_e32 v0, 8
-; GFX9-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4
-; GFX9-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:4
+; GFX9-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:4
+; GFX9-NEXT: buffer_load_dword v0, off, s[48:51], 0 offset:4
; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: buffer_load_dword v1, off, s[36:39], 0
+; GFX9-NEXT: buffer_load_dword v1, off, s[48:51], 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: s_movk_i32 s32, 0x400
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_byval_struct_i8_i32 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_byval_struct_i8_i32 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: s_waitcnt vmcnt(1)
-; GFX9-NEXT: buffer_store_dword v0, off, s[36:39], s32 offset:4
+; GFX9-NEXT: buffer_store_dword v0, off, s[48:51], s32 offset:4
; GFX9-NEXT: s_waitcnt vmcnt(1)
-; GFX9-NEXT: buffer_store_dword v1, off, s[36:39], s32
+; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], s32
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX9-NEXT: s_endpgm
;
@@ -4923,33 +4923,33 @@ define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0
define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(i32) #0 {
; VI-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s38, -1
-; VI-NEXT: s_mov_b32 s39, 0xe80000
-; VI-NEXT: s_add_u32 s36, s36, s5
-; VI-NEXT: s_addc_u32 s37, s37, 0
+; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s50, -1
+; VI-NEXT: s_mov_b32 s51, 0xe80000
+; VI-NEXT: s_add_u32 s48, s48, s5
+; VI-NEXT: s_addc_u32 s49, s49, 0
; VI-NEXT: v_mov_b32_e32 v0, 3
-; VI-NEXT: buffer_store_byte v0, off, s[36:39], 0
+; VI-NEXT: buffer_store_byte v0, off, s[48:51], 0
; VI-NEXT: v_mov_b32_e32 v0, 8
-; VI-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4
-; VI-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:4
-; VI-NEXT: buffer_load_dword v1, off, s[36:39], 0
+; VI-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:4
+; VI-NEXT: buffer_load_dword v0, off, s[48:51], 0 offset:4
+; VI-NEXT: buffer_load_dword v1, off, s[48:51], 0
; VI-NEXT: s_movk_i32 s32, 0x800
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
; VI-NEXT: s_waitcnt vmcnt(1)
-; VI-NEXT: buffer_store_dword v0, off, s[36:39], s32 offset:4
+; VI-NEXT: buffer_store_dword v0, off, s[48:51], s32 offset:4
; VI-NEXT: s_waitcnt vmcnt(1)
-; VI-NEXT: buffer_store_dword v1, off, s[36:39], s32
+; VI-NEXT: buffer_store_dword v1, off, s[48:51], s32
; VI-NEXT: v_mov_b32_e32 v0, 8
; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; VI-NEXT: buffer_load_ubyte v0, off, s[36:39], 0 offset:8
-; VI-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:12
+; VI-NEXT: buffer_load_ubyte v0, off, s[48:51], 0 offset:8
+; VI-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:12
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: s_waitcnt vmcnt(1)
@@ -4961,33 +4961,33 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval
;
; CI-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s38, -1
-; CI-NEXT: s_mov_b32 s39, 0xe8f000
-; CI-NEXT: s_add_u32 s36, s36, s5
-; CI-NEXT: s_addc_u32 s37, s37, 0
+; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s50, -1
+; CI-NEXT: s_mov_b32 s51, 0xe8f000
+; CI-NEXT: s_add_u32 s48, s48, s5
+; CI-NEXT: s_addc_u32 s49, s49, 0
; CI-NEXT: v_mov_b32_e32 v0, 3
-; CI-NEXT: buffer_store_byte v0, off, s[36:39], 0
+; CI-NEXT: buffer_store_byte v0, off, s[48:51], 0
; CI-NEXT: v_mov_b32_e32 v0, 8
-; CI-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4
-; CI-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:4
-; CI-NEXT: buffer_load_dword v1, off, s[36:39], 0
+; CI-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:4
+; CI-NEXT: buffer_load_dword v0, off, s[48:51], 0 offset:4
+; CI-NEXT: buffer_load_dword v1, off, s[48:51], 0
; CI-NEXT: s_movk_i32 s32, 0x800
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
; CI-NEXT: s_waitcnt vmcnt(1)
-; CI-NEXT: buffer_store_dword v0, off, s[36:39], s32 offset:4
+; CI-NEXT: buffer_store_dword v0, off, s[48:51], s32 offset:4
; CI-NEXT: s_waitcnt vmcnt(1)
-; CI-NEXT: buffer_store_dword v1, off, s[36:39], s32
+; CI-NEXT: buffer_store_dword v1, off, s[48:51], s32
; CI-NEXT: v_mov_b32_e32 v0, 8
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; CI-NEXT: buffer_load_ubyte v0, off, s[36:39], 0 offset:8
-; CI-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:12
+; CI-NEXT: buffer_load_ubyte v0, off, s[48:51], 0 offset:8
+; CI-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:12
; CI-NEXT: s_mov_b32 s3, 0xf000
; CI-NEXT: s_mov_b32 s2, -1
; CI-NEXT: s_waitcnt vmcnt(1)
@@ -4999,34 +4999,34 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval
;
; GFX9-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s5
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s5
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: v_mov_b32_e32 v0, 3
-; GFX9-NEXT: buffer_store_byte v0, off, s[36:39], 0
+; GFX9-NEXT: buffer_store_byte v0, off, s[48:51], 0
; GFX9-NEXT: v_mov_b32_e32 v0, 8
-; GFX9-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4
-; GFX9-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:4
+; GFX9-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:4
+; GFX9-NEXT: buffer_load_dword v0, off, s[48:51], 0 offset:4
; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: buffer_load_dword v1, off, s[36:39], 0
+; GFX9-NEXT: buffer_load_dword v1, off, s[48:51], 0
; GFX9-NEXT: s_movk_i32 s32, 0x800
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: s_waitcnt vmcnt(1)
-; GFX9-NEXT: buffer_store_dword v0, off, s[36:39], s32 offset:4
+; GFX9-NEXT: buffer_store_dword v0, off, s[48:51], s32 offset:4
; GFX9-NEXT: s_waitcnt vmcnt(1)
-; GFX9-NEXT: buffer_store_dword v1, off, s[36:39], s32
+; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], s32
; GFX9-NEXT: v_mov_b32_e32 v0, 8
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: buffer_load_ubyte v0, off, s[36:39], 0 offset:8
-; GFX9-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:12
+; GFX9-NEXT: buffer_load_ubyte v0, off, s[48:51], 0 offset:8
+; GFX9-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:12
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: s_waitcnt vmcnt(1)
@@ -5121,23 +5121,23 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval
define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 {
; VI-LABEL: test_call_external_void_func_v16i8:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
-; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s38, -1
-; VI-NEXT: s_mov_b32 s39, 0xe80000
-; VI-NEXT: s_add_u32 s36, s36, s3
+; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s50, -1
+; VI-NEXT: s_mov_b32 s51, 0xe80000
+; VI-NEXT: s_add_u32 s48, s48, s3
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
-; VI-NEXT: s_addc_u32 s37, s37, 0
-; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; VI-NEXT: s_addc_u32 s49, s49, 0
+; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v16i8 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v16i8 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
; VI-NEXT: s_mov_b32 s32, 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: v_lshrrev_b32_e32 v16, 8, v0
@@ -5163,23 +5163,23 @@ define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 {
;
; CI-LABEL: test_call_external_void_func_v16i8:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
-; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s38, -1
-; CI-NEXT: s_mov_b32 s39, 0xe8f000
-; CI-NEXT: s_add_u32 s36, s36, s3
+; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s50, -1
+; CI-NEXT: s_mov_b32 s51, 0xe8f000
+; CI-NEXT: s_add_u32 s48, s48, s3
; CI-NEXT: s_mov_b32 s3, 0xf000
; CI-NEXT: s_mov_b32 s2, -1
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
-; CI-NEXT: s_addc_u32 s37, s37, 0
-; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
+; CI-NEXT: s_addc_u32 s49, s49, 0
+; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v16i8 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v16i8 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
+; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
; CI-NEXT: s_mov_b32 s32, 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: v_lshrrev_b32_e32 v16, 8, v0
@@ -5205,23 +5205,23 @@ define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 {
;
; GFX9-LABEL: test_call_external_void_func_v16i8:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s3
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s3
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v16i8 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v16i8 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_lshrrev_b32_e32 v16, 8, v0
@@ -5324,29 +5324,29 @@ define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 {
define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, double %tmp) #0 {
; VI-LABEL: stack_passed_arg_alignment_v32i32_f64:
; VI: ; %bb.0: ; %entry
-; VI-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s54, -1
-; VI-NEXT: s_mov_b32 s55, 0xe80000
-; VI-NEXT: s_add_u32 s52, s52, s5
+; VI-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s66, -1
+; VI-NEXT: s_mov_b32 s67, 0xe80000
+; VI-NEXT: s_add_u32 s64, s64, s5
; VI-NEXT: s_load_dwordx16 s[8:23], s[2:3], 0x64
; VI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0xa4
; VI-NEXT: s_load_dwordx16 s[36:51], s[2:3], 0x24
; VI-NEXT: s_mov_b32 s32, 0
-; VI-NEXT: s_addc_u32 s53, s53, 0
+; VI-NEXT: s_addc_u32 s65, s65, 0
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_mov_b32_e32 v0, s23
-; VI-NEXT: buffer_store_dword v0, off, s[52:55], s32
+; VI-NEXT: buffer_store_dword v0, off, s[64:67], s32
; VI-NEXT: v_mov_b32_e32 v0, s4
-; VI-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:4
+; VI-NEXT: buffer_store_dword v0, off, s[64:67], s32 offset:4
; VI-NEXT: v_mov_b32_e32 v0, s5
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[52:53]
-; VI-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:8
+; VI-NEXT: s_mov_b64 s[0:1], s[64:65]
+; VI-NEXT: buffer_store_dword v0, off, s[64:67], s32 offset:8
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, stack_passed_f64_arg at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, stack_passed_f64_arg at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[54:55]
+; VI-NEXT: s_mov_b64 s[2:3], s[66:67]
; VI-NEXT: v_mov_b32_e32 v0, s36
; VI-NEXT: v_mov_b32_e32 v1, s37
; VI-NEXT: v_mov_b32_e32 v2, s38
@@ -5383,29 +5383,29 @@ define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val
;
; CI-LABEL: stack_passed_arg_alignment_v32i32_f64:
; CI: ; %bb.0: ; %entry
-; CI-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s54, -1
-; CI-NEXT: s_mov_b32 s55, 0xe8f000
-; CI-NEXT: s_add_u32 s52, s52, s5
+; CI-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s66, -1
+; CI-NEXT: s_mov_b32 s67, 0xe8f000
+; CI-NEXT: s_add_u32 s64, s64, s5
; CI-NEXT: s_load_dwordx16 s[8:23], s[2:3], 0x19
; CI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x29
; CI-NEXT: s_load_dwordx16 s[36:51], s[2:3], 0x9
; CI-NEXT: s_mov_b32 s32, 0
-; CI-NEXT: s_addc_u32 s53, s53, 0
+; CI-NEXT: s_addc_u32 s65, s65, 0
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: v_mov_b32_e32 v0, s23
-; CI-NEXT: buffer_store_dword v0, off, s[52:55], s32
+; CI-NEXT: buffer_store_dword v0, off, s[64:67], s32
; CI-NEXT: v_mov_b32_e32 v0, s4
-; CI-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:4
+; CI-NEXT: buffer_store_dword v0, off, s[64:67], s32 offset:4
; CI-NEXT: v_mov_b32_e32 v0, s5
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[52:53]
-; CI-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:8
+; CI-NEXT: s_mov_b64 s[0:1], s[64:65]
+; CI-NEXT: buffer_store_dword v0, off, s[64:67], s32 offset:8
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, stack_passed_f64_arg at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, stack_passed_f64_arg at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[54:55]
+; CI-NEXT: s_mov_b64 s[2:3], s[66:67]
; CI-NEXT: v_mov_b32_e32 v0, s36
; CI-NEXT: v_mov_b32_e32 v1, s37
; CI-NEXT: v_mov_b32_e32 v2, s38
@@ -5442,29 +5442,29 @@ define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val
;
; GFX9-LABEL: stack_passed_arg_alignment_v32i32_f64:
; GFX9: ; %bb.0: ; %entry
-; GFX9-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s54, -1
-; GFX9-NEXT: s_mov_b32 s55, 0xe00000
-; GFX9-NEXT: s_add_u32 s52, s52, s5
+; GFX9-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s66, -1
+; GFX9-NEXT: s_mov_b32 s67, 0xe00000
+; GFX9-NEXT: s_add_u32 s64, s64, s5
; GFX9-NEXT: s_load_dwordx16 s[8:23], s[2:3], 0x64
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0xa4
; GFX9-NEXT: s_load_dwordx16 s[36:51], s[2:3], 0x24
; GFX9-NEXT: s_mov_b32 s32, 0
-; GFX9-NEXT: s_addc_u32 s53, s53, 0
+; GFX9-NEXT: s_addc_u32 s65, s65, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s23
-; GFX9-NEXT: buffer_store_dword v0, off, s[52:55], s32
+; GFX9-NEXT: buffer_store_dword v0, off, s[64:67], s32
; GFX9-NEXT: v_mov_b32_e32 v0, s4
-; GFX9-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:4
+; GFX9-NEXT: buffer_store_dword v0, off, s[64:67], s32 offset:4
; GFX9-NEXT: v_mov_b32_e32 v0, s5
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[52:53]
-; GFX9-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:8
+; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX9-NEXT: buffer_store_dword v0, off, s[64:67], s32 offset:8
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, stack_passed_f64_arg at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, stack_passed_f64_arg at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[54:55]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX9-NEXT: v_mov_b32_e32 v0, s36
; GFX9-NEXT: v_mov_b32_e32 v1, s37
; GFX9-NEXT: v_mov_b32_e32 v2, s38
diff --git a/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll b/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll
index db9ce56ecc3cc..67a70cdeb1ecc 100644
--- a/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll
@@ -528,15 +528,16 @@ define void @callee_saved_sgpr_func() #2 {
; MUBUF-NEXT: s_getpc_b64 s[4:5]
; MUBUF-NEXT: s_add_u32 s4, s4, external_void_func_void at rel32@lo+4
; MUBUF-NEXT: s_addc_u32 s5, s5, external_void_func_void at rel32@hi+12
-; MUBUF-NEXT: v_writelane_b32 v40, s40, 2
+; MUBUF-NEXT: v_writelane_b32 v40, s34, 2
; MUBUF-NEXT: ;;#ASMSTART
; MUBUF-NEXT: ; def s40
; MUBUF-NEXT: ;;#ASMEND
+; MUBUF-NEXT: s_mov_b32 s34, s40
; MUBUF-NEXT: s_swappc_b64 s[30:31], s[4:5]
; MUBUF-NEXT: ;;#ASMSTART
-; MUBUF-NEXT: ; use s40
+; MUBUF-NEXT: ; use s34
; MUBUF-NEXT: ;;#ASMEND
-; MUBUF-NEXT: v_readlane_b32 s40, v40, 2
+; MUBUF-NEXT: v_readlane_b32 s34, v40, 2
; MUBUF-NEXT: v_readlane_b32 s31, v40, 1
; MUBUF-NEXT: v_readlane_b32 s30, v40, 0
; MUBUF-NEXT: s_mov_b32 s32, s33
@@ -563,15 +564,16 @@ define void @callee_saved_sgpr_func() #2 {
; FLATSCR-NEXT: s_getpc_b64 s[0:1]
; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_void at rel32@lo+4
; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_void at rel32@hi+12
-; FLATSCR-NEXT: v_writelane_b32 v40, s40, 2
+; FLATSCR-NEXT: v_writelane_b32 v40, s34, 2
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ; def s40
; FLATSCR-NEXT: ;;#ASMEND
+; FLATSCR-NEXT: s_mov_b32 s34, s40
; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1]
; FLATSCR-NEXT: ;;#ASMSTART
-; FLATSCR-NEXT: ; use s40
+; FLATSCR-NEXT: ; use s34
; FLATSCR-NEXT: ;;#ASMEND
-; FLATSCR-NEXT: v_readlane_b32 s40, v40, 2
+; FLATSCR-NEXT: v_readlane_b32 s34, v40, 2
; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1
; FLATSCR-NEXT: v_readlane_b32 s30, v40, 0
; FLATSCR-NEXT: s_mov_b32 s32, s33
@@ -600,9 +602,10 @@ define amdgpu_kernel void @callee_saved_sgpr_kernel() #2 {
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ; def s40
; FLATSCR-NEXT: ;;#ASMEND
+; FLATSCR-NEXT: s_mov_b32 s33, s40
; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1]
; FLATSCR-NEXT: ;;#ASMSTART
-; FLATSCR-NEXT: ; use s40
+; FLATSCR-NEXT: ; use s33
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: s_endpgm
%s40 = call i32 asm sideeffect "; def s40", "={s40}"() #0
@@ -629,22 +632,23 @@ define void @callee_saved_sgpr_vgpr_func() #2 {
; MUBUF-NEXT: s_add_u32 s4, s4, external_void_func_void at rel32@lo+4
; MUBUF-NEXT: s_addc_u32 s5, s5, external_void_func_void at rel32@hi+12
; MUBUF-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; MUBUF-NEXT: v_writelane_b32 v41, s40, 2
+; MUBUF-NEXT: v_writelane_b32 v41, s34, 2
; MUBUF-NEXT: ;;#ASMSTART
; MUBUF-NEXT: ; def s40
; MUBUF-NEXT: ;;#ASMEND
+; MUBUF-NEXT: s_mov_b32 s34, s40
; MUBUF-NEXT: ;;#ASMSTART
; MUBUF-NEXT: ; def v40
; MUBUF-NEXT: ;;#ASMEND
; MUBUF-NEXT: s_swappc_b64 s[30:31], s[4:5]
; MUBUF-NEXT: ;;#ASMSTART
-; MUBUF-NEXT: ; use s40
+; MUBUF-NEXT: ; use s34
; MUBUF-NEXT: ;;#ASMEND
; MUBUF-NEXT: ;;#ASMSTART
; MUBUF-NEXT: ; use v40
; MUBUF-NEXT: ;;#ASMEND
; MUBUF-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; MUBUF-NEXT: v_readlane_b32 s40, v41, 2
+; MUBUF-NEXT: v_readlane_b32 s34, v41, 2
; MUBUF-NEXT: v_readlane_b32 s31, v41, 1
; MUBUF-NEXT: v_readlane_b32 s30, v41, 0
; MUBUF-NEXT: s_mov_b32 s32, s33
@@ -672,22 +676,23 @@ define void @callee_saved_sgpr_vgpr_func() #2 {
; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_void at rel32@lo+4
; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_void at rel32@hi+12
; FLATSCR-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
-; FLATSCR-NEXT: v_writelane_b32 v41, s40, 2
+; FLATSCR-NEXT: v_writelane_b32 v41, s34, 2
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ; def s40
; FLATSCR-NEXT: ;;#ASMEND
+; FLATSCR-NEXT: s_mov_b32 s34, s40
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ; def v40
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1]
; FLATSCR-NEXT: ;;#ASMSTART
-; FLATSCR-NEXT: ; use s40
+; FLATSCR-NEXT: ; use s34
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ; use v40
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
-; FLATSCR-NEXT: v_readlane_b32 s40, v41, 2
+; FLATSCR-NEXT: v_readlane_b32 s34, v41, 2
; FLATSCR-NEXT: v_readlane_b32 s31, v41, 1
; FLATSCR-NEXT: v_readlane_b32 s30, v41, 0
; FLATSCR-NEXT: s_mov_b32 s32, s33
@@ -718,13 +723,14 @@ define amdgpu_kernel void @callee_saved_sgpr_vgpr_kernel() #2 {
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ; def s40
; FLATSCR-NEXT: ;;#ASMEND
+; FLATSCR-NEXT: s_mov_b32 s33, s40
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ; def v32
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: v_mov_b32_e32 v40, v32
; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1]
; FLATSCR-NEXT: ;;#ASMSTART
-; FLATSCR-NEXT: ; use s40
+; FLATSCR-NEXT: ; use s33
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ; use v40
diff --git a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
index 4c6f2d22080e0..002e82f676e8b 100644
--- a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
+++ b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
@@ -255,52 +255,28 @@ define void @callee_func_sgpr_spill_no_calls(i32 %in) #0 {
; MUBUF-NEXT: s_mov_b64 exec, s[4:5]
; MUBUF-NEXT: v_writelane_b32 v40, s36, 0
; MUBUF-NEXT: v_writelane_b32 v40, s37, 1
-; MUBUF-NEXT: v_writelane_b32 v40, s38, 2
-; MUBUF-NEXT: v_writelane_b32 v40, s39, 3
-; MUBUF-NEXT: v_writelane_b32 v40, s40, 4
-; MUBUF-NEXT: v_writelane_b32 v40, s41, 5
-; MUBUF-NEXT: v_writelane_b32 v40, s42, 6
-; MUBUF-NEXT: v_writelane_b32 v40, s43, 7
-; MUBUF-NEXT: v_writelane_b32 v40, s44, 8
-; MUBUF-NEXT: v_writelane_b32 v40, s45, 9
-; MUBUF-NEXT: v_writelane_b32 v40, s46, 10
-; MUBUF-NEXT: v_writelane_b32 v40, s47, 11
-; MUBUF-NEXT: v_writelane_b32 v40, s48, 12
-; MUBUF-NEXT: v_writelane_b32 v40, s49, 13
-; MUBUF-NEXT: v_writelane_b32 v40, s50, 14
-; MUBUF-NEXT: v_writelane_b32 v40, s51, 15
-; MUBUF-NEXT: v_writelane_b32 v40, s52, 16
-; MUBUF-NEXT: v_writelane_b32 v40, s53, 17
-; MUBUF-NEXT: v_writelane_b32 v40, s54, 18
-; MUBUF-NEXT: v_writelane_b32 v40, s55, 19
-; MUBUF-NEXT: v_writelane_b32 v40, s56, 20
-; MUBUF-NEXT: v_writelane_b32 v40, s57, 21
-; MUBUF-NEXT: v_writelane_b32 v40, s58, 22
-; MUBUF-NEXT: v_writelane_b32 v40, s59, 23
-; MUBUF-NEXT: v_writelane_b32 v40, s60, 24
-; MUBUF-NEXT: v_writelane_b32 v40, s61, 25
-; MUBUF-NEXT: v_writelane_b32 v40, s62, 26
-; MUBUF-NEXT: v_writelane_b32 v40, s63, 27
-; MUBUF-NEXT: v_writelane_b32 v40, s64, 28
-; MUBUF-NEXT: v_writelane_b32 v40, s65, 29
-; MUBUF-NEXT: v_writelane_b32 v40, s66, 30
-; MUBUF-NEXT: v_writelane_b32 v40, s67, 31
-; MUBUF-NEXT: v_writelane_b32 v40, s68, 32
-; MUBUF-NEXT: v_writelane_b32 v40, s69, 33
-; MUBUF-NEXT: v_writelane_b32 v40, s70, 34
-; MUBUF-NEXT: v_writelane_b32 v40, s71, 35
-; MUBUF-NEXT: v_writelane_b32 v40, s72, 36
-; MUBUF-NEXT: v_writelane_b32 v40, s73, 37
-; MUBUF-NEXT: v_writelane_b32 v40, s74, 38
-; MUBUF-NEXT: v_writelane_b32 v40, s75, 39
-; MUBUF-NEXT: v_writelane_b32 v40, s76, 40
-; MUBUF-NEXT: v_writelane_b32 v40, s77, 41
-; MUBUF-NEXT: v_writelane_b32 v40, s78, 42
-; MUBUF-NEXT: v_writelane_b32 v40, s79, 43
-; MUBUF-NEXT: v_writelane_b32 v40, s80, 44
-; MUBUF-NEXT: v_writelane_b32 v40, s81, 45
-; MUBUF-NEXT: v_writelane_b32 v40, s82, 46
-; MUBUF-NEXT: v_writelane_b32 v40, s83, 47
+; MUBUF-NEXT: v_writelane_b32 v40, s46, 2
+; MUBUF-NEXT: v_writelane_b32 v40, s47, 3
+; MUBUF-NEXT: v_writelane_b32 v40, s48, 4
+; MUBUF-NEXT: v_writelane_b32 v40, s49, 5
+; MUBUF-NEXT: v_writelane_b32 v40, s50, 6
+; MUBUF-NEXT: v_writelane_b32 v40, s51, 7
+; MUBUF-NEXT: v_writelane_b32 v40, s52, 8
+; MUBUF-NEXT: v_writelane_b32 v40, s53, 9
+; MUBUF-NEXT: v_writelane_b32 v40, s62, 10
+; MUBUF-NEXT: v_writelane_b32 v40, s63, 11
+; MUBUF-NEXT: v_writelane_b32 v40, s64, 12
+; MUBUF-NEXT: v_writelane_b32 v40, s65, 13
+; MUBUF-NEXT: v_writelane_b32 v40, s66, 14
+; MUBUF-NEXT: v_writelane_b32 v40, s67, 15
+; MUBUF-NEXT: v_writelane_b32 v40, s68, 16
+; MUBUF-NEXT: v_writelane_b32 v40, s69, 17
+; MUBUF-NEXT: v_writelane_b32 v40, s78, 18
+; MUBUF-NEXT: v_writelane_b32 v40, s79, 19
+; MUBUF-NEXT: v_writelane_b32 v40, s80, 20
+; MUBUF-NEXT: v_writelane_b32 v40, s81, 21
+; MUBUF-NEXT: v_writelane_b32 v40, s82, 22
+; MUBUF-NEXT: v_writelane_b32 v40, s83, 23
; MUBUF-NEXT: ;;#ASMSTART
; MUBUF-NEXT: ;;#ASMEND
; MUBUF-NEXT: ;;#ASMSTART
@@ -347,52 +323,28 @@ define void @callee_func_sgpr_spill_no_calls(i32 %in) #0 {
; MUBUF-NEXT: ;;#ASMSTART
; MUBUF-NEXT: ; use s[4:19]
; MUBUF-NEXT: ;;#ASMEND
-; MUBUF-NEXT: v_readlane_b32 s83, v40, 47
-; MUBUF-NEXT: v_readlane_b32 s82, v40, 46
-; MUBUF-NEXT: v_readlane_b32 s81, v40, 45
-; MUBUF-NEXT: v_readlane_b32 s80, v40, 44
-; MUBUF-NEXT: v_readlane_b32 s79, v40, 43
-; MUBUF-NEXT: v_readlane_b32 s78, v40, 42
-; MUBUF-NEXT: v_readlane_b32 s77, v40, 41
-; MUBUF-NEXT: v_readlane_b32 s76, v40, 40
-; MUBUF-NEXT: v_readlane_b32 s75, v40, 39
-; MUBUF-NEXT: v_readlane_b32 s74, v40, 38
-; MUBUF-NEXT: v_readlane_b32 s73, v40, 37
-; MUBUF-NEXT: v_readlane_b32 s72, v40, 36
-; MUBUF-NEXT: v_readlane_b32 s71, v40, 35
-; MUBUF-NEXT: v_readlane_b32 s70, v40, 34
-; MUBUF-NEXT: v_readlane_b32 s69, v40, 33
-; MUBUF-NEXT: v_readlane_b32 s68, v40, 32
-; MUBUF-NEXT: v_readlane_b32 s67, v40, 31
-; MUBUF-NEXT: v_readlane_b32 s66, v40, 30
-; MUBUF-NEXT: v_readlane_b32 s65, v40, 29
-; MUBUF-NEXT: v_readlane_b32 s64, v40, 28
-; MUBUF-NEXT: v_readlane_b32 s63, v40, 27
-; MUBUF-NEXT: v_readlane_b32 s62, v40, 26
-; MUBUF-NEXT: v_readlane_b32 s61, v40, 25
-; MUBUF-NEXT: v_readlane_b32 s60, v40, 24
-; MUBUF-NEXT: v_readlane_b32 s59, v40, 23
-; MUBUF-NEXT: v_readlane_b32 s58, v40, 22
-; MUBUF-NEXT: v_readlane_b32 s57, v40, 21
-; MUBUF-NEXT: v_readlane_b32 s56, v40, 20
-; MUBUF-NEXT: v_readlane_b32 s55, v40, 19
-; MUBUF-NEXT: v_readlane_b32 s54, v40, 18
-; MUBUF-NEXT: v_readlane_b32 s53, v40, 17
-; MUBUF-NEXT: v_readlane_b32 s52, v40, 16
-; MUBUF-NEXT: v_readlane_b32 s51, v40, 15
-; MUBUF-NEXT: v_readlane_b32 s50, v40, 14
-; MUBUF-NEXT: v_readlane_b32 s49, v40, 13
-; MUBUF-NEXT: v_readlane_b32 s48, v40, 12
-; MUBUF-NEXT: v_readlane_b32 s47, v40, 11
-; MUBUF-NEXT: v_readlane_b32 s46, v40, 10
-; MUBUF-NEXT: v_readlane_b32 s45, v40, 9
-; MUBUF-NEXT: v_readlane_b32 s44, v40, 8
-; MUBUF-NEXT: v_readlane_b32 s43, v40, 7
-; MUBUF-NEXT: v_readlane_b32 s42, v40, 6
-; MUBUF-NEXT: v_readlane_b32 s41, v40, 5
-; MUBUF-NEXT: v_readlane_b32 s40, v40, 4
-; MUBUF-NEXT: v_readlane_b32 s39, v40, 3
-; MUBUF-NEXT: v_readlane_b32 s38, v40, 2
+; MUBUF-NEXT: v_readlane_b32 s83, v40, 23
+; MUBUF-NEXT: v_readlane_b32 s82, v40, 22
+; MUBUF-NEXT: v_readlane_b32 s81, v40, 21
+; MUBUF-NEXT: v_readlane_b32 s80, v40, 20
+; MUBUF-NEXT: v_readlane_b32 s79, v40, 19
+; MUBUF-NEXT: v_readlane_b32 s78, v40, 18
+; MUBUF-NEXT: v_readlane_b32 s69, v40, 17
+; MUBUF-NEXT: v_readlane_b32 s68, v40, 16
+; MUBUF-NEXT: v_readlane_b32 s67, v40, 15
+; MUBUF-NEXT: v_readlane_b32 s66, v40, 14
+; MUBUF-NEXT: v_readlane_b32 s65, v40, 13
+; MUBUF-NEXT: v_readlane_b32 s64, v40, 12
+; MUBUF-NEXT: v_readlane_b32 s63, v40, 11
+; MUBUF-NEXT: v_readlane_b32 s62, v40, 10
+; MUBUF-NEXT: v_readlane_b32 s53, v40, 9
+; MUBUF-NEXT: v_readlane_b32 s52, v40, 8
+; MUBUF-NEXT: v_readlane_b32 s51, v40, 7
+; MUBUF-NEXT: v_readlane_b32 s50, v40, 6
+; MUBUF-NEXT: v_readlane_b32 s49, v40, 5
+; MUBUF-NEXT: v_readlane_b32 s48, v40, 4
+; MUBUF-NEXT: v_readlane_b32 s47, v40, 3
+; MUBUF-NEXT: v_readlane_b32 s46, v40, 2
; MUBUF-NEXT: v_readlane_b32 s37, v40, 1
; MUBUF-NEXT: v_readlane_b32 s36, v40, 0
; MUBUF-NEXT: s_or_saveexec_b64 s[4:5], -1
@@ -409,48 +361,24 @@ define void @callee_func_sgpr_spill_no_calls(i32 %in) #0 {
; FLATSCR-NEXT: s_mov_b64 exec, s[0:1]
; FLATSCR-NEXT: v_writelane_b32 v40, s30, 0
; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1
-; FLATSCR-NEXT: v_writelane_b32 v40, s34, 2
-; FLATSCR-NEXT: v_writelane_b32 v40, s35, 3
-; FLATSCR-NEXT: v_writelane_b32 v40, s36, 4
-; FLATSCR-NEXT: v_writelane_b32 v40, s37, 5
-; FLATSCR-NEXT: v_writelane_b32 v40, s38, 6
-; FLATSCR-NEXT: v_writelane_b32 v40, s39, 7
-; FLATSCR-NEXT: v_writelane_b32 v40, s40, 8
-; FLATSCR-NEXT: v_writelane_b32 v40, s41, 9
-; FLATSCR-NEXT: v_writelane_b32 v40, s42, 10
-; FLATSCR-NEXT: v_writelane_b32 v40, s43, 11
-; FLATSCR-NEXT: v_writelane_b32 v40, s44, 12
-; FLATSCR-NEXT: v_writelane_b32 v40, s45, 13
-; FLATSCR-NEXT: v_writelane_b32 v40, s46, 14
-; FLATSCR-NEXT: v_writelane_b32 v40, s47, 15
-; FLATSCR-NEXT: v_writelane_b32 v40, s48, 16
-; FLATSCR-NEXT: v_writelane_b32 v40, s49, 17
-; FLATSCR-NEXT: v_writelane_b32 v40, s50, 18
-; FLATSCR-NEXT: v_writelane_b32 v40, s51, 19
-; FLATSCR-NEXT: v_writelane_b32 v40, s52, 20
-; FLATSCR-NEXT: v_writelane_b32 v40, s53, 21
-; FLATSCR-NEXT: v_writelane_b32 v40, s54, 22
-; FLATSCR-NEXT: v_writelane_b32 v40, s55, 23
-; FLATSCR-NEXT: v_writelane_b32 v40, s56, 24
-; FLATSCR-NEXT: v_writelane_b32 v40, s57, 25
-; FLATSCR-NEXT: v_writelane_b32 v40, s58, 26
-; FLATSCR-NEXT: v_writelane_b32 v40, s59, 27
-; FLATSCR-NEXT: v_writelane_b32 v40, s60, 28
-; FLATSCR-NEXT: v_writelane_b32 v40, s61, 29
-; FLATSCR-NEXT: v_writelane_b32 v40, s62, 30
-; FLATSCR-NEXT: v_writelane_b32 v40, s63, 31
-; FLATSCR-NEXT: v_writelane_b32 v40, s64, 32
-; FLATSCR-NEXT: v_writelane_b32 v40, s65, 33
-; FLATSCR-NEXT: v_writelane_b32 v40, s66, 34
-; FLATSCR-NEXT: v_writelane_b32 v40, s67, 35
-; FLATSCR-NEXT: v_writelane_b32 v40, s68, 36
-; FLATSCR-NEXT: v_writelane_b32 v40, s69, 37
-; FLATSCR-NEXT: v_writelane_b32 v40, s70, 38
-; FLATSCR-NEXT: v_writelane_b32 v40, s71, 39
-; FLATSCR-NEXT: v_writelane_b32 v40, s72, 40
-; FLATSCR-NEXT: v_writelane_b32 v40, s73, 41
-; FLATSCR-NEXT: v_writelane_b32 v40, s74, 42
-; FLATSCR-NEXT: v_writelane_b32 v40, s75, 43
+; FLATSCR-NEXT: v_writelane_b32 v40, s36, 2
+; FLATSCR-NEXT: v_writelane_b32 v40, s37, 3
+; FLATSCR-NEXT: v_writelane_b32 v40, s46, 4
+; FLATSCR-NEXT: v_writelane_b32 v40, s47, 5
+; FLATSCR-NEXT: v_writelane_b32 v40, s48, 6
+; FLATSCR-NEXT: v_writelane_b32 v40, s49, 7
+; FLATSCR-NEXT: v_writelane_b32 v40, s50, 8
+; FLATSCR-NEXT: v_writelane_b32 v40, s51, 9
+; FLATSCR-NEXT: v_writelane_b32 v40, s52, 10
+; FLATSCR-NEXT: v_writelane_b32 v40, s53, 11
+; FLATSCR-NEXT: v_writelane_b32 v40, s62, 12
+; FLATSCR-NEXT: v_writelane_b32 v40, s63, 13
+; FLATSCR-NEXT: v_writelane_b32 v40, s64, 14
+; FLATSCR-NEXT: v_writelane_b32 v40, s65, 15
+; FLATSCR-NEXT: v_writelane_b32 v40, s66, 16
+; FLATSCR-NEXT: v_writelane_b32 v40, s67, 17
+; FLATSCR-NEXT: v_writelane_b32 v40, s68, 18
+; FLATSCR-NEXT: v_writelane_b32 v40, s69, 19
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: ;;#ASMSTART
@@ -477,7 +405,7 @@ define void @callee_func_sgpr_spill_no_calls(i32 %in) #0 {
; FLATSCR-NEXT: ; def s[68:75]
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: ;;#ASMSTART
-; FLATSCR-NEXT: ; def s[34:35]
+; FLATSCR-NEXT: ; def s[76:77]
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ; use s[52:67]
@@ -492,53 +420,29 @@ define void @callee_func_sgpr_spill_no_calls(i32 %in) #0 {
; FLATSCR-NEXT: ; use s[68:75]
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: ;;#ASMSTART
-; FLATSCR-NEXT: ; use s[34:35]
+; FLATSCR-NEXT: ; use s[76:77]
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ; use s[0:15]
; FLATSCR-NEXT: ;;#ASMEND
-; FLATSCR-NEXT: v_readlane_b32 s75, v40, 43
-; FLATSCR-NEXT: v_readlane_b32 s74, v40, 42
-; FLATSCR-NEXT: v_readlane_b32 s73, v40, 41
-; FLATSCR-NEXT: v_readlane_b32 s72, v40, 40
-; FLATSCR-NEXT: v_readlane_b32 s71, v40, 39
-; FLATSCR-NEXT: v_readlane_b32 s70, v40, 38
-; FLATSCR-NEXT: v_readlane_b32 s69, v40, 37
-; FLATSCR-NEXT: v_readlane_b32 s68, v40, 36
-; FLATSCR-NEXT: v_readlane_b32 s67, v40, 35
-; FLATSCR-NEXT: v_readlane_b32 s66, v40, 34
-; FLATSCR-NEXT: v_readlane_b32 s65, v40, 33
-; FLATSCR-NEXT: v_readlane_b32 s64, v40, 32
-; FLATSCR-NEXT: v_readlane_b32 s63, v40, 31
-; FLATSCR-NEXT: v_readlane_b32 s62, v40, 30
-; FLATSCR-NEXT: v_readlane_b32 s61, v40, 29
-; FLATSCR-NEXT: v_readlane_b32 s60, v40, 28
-; FLATSCR-NEXT: v_readlane_b32 s59, v40, 27
-; FLATSCR-NEXT: v_readlane_b32 s58, v40, 26
-; FLATSCR-NEXT: v_readlane_b32 s57, v40, 25
-; FLATSCR-NEXT: v_readlane_b32 s56, v40, 24
-; FLATSCR-NEXT: v_readlane_b32 s55, v40, 23
-; FLATSCR-NEXT: v_readlane_b32 s54, v40, 22
-; FLATSCR-NEXT: v_readlane_b32 s53, v40, 21
-; FLATSCR-NEXT: v_readlane_b32 s52, v40, 20
-; FLATSCR-NEXT: v_readlane_b32 s51, v40, 19
-; FLATSCR-NEXT: v_readlane_b32 s50, v40, 18
-; FLATSCR-NEXT: v_readlane_b32 s49, v40, 17
-; FLATSCR-NEXT: v_readlane_b32 s48, v40, 16
-; FLATSCR-NEXT: v_readlane_b32 s47, v40, 15
-; FLATSCR-NEXT: v_readlane_b32 s46, v40, 14
-; FLATSCR-NEXT: v_readlane_b32 s45, v40, 13
-; FLATSCR-NEXT: v_readlane_b32 s44, v40, 12
-; FLATSCR-NEXT: v_readlane_b32 s43, v40, 11
-; FLATSCR-NEXT: v_readlane_b32 s42, v40, 10
-; FLATSCR-NEXT: v_readlane_b32 s41, v40, 9
-; FLATSCR-NEXT: v_readlane_b32 s40, v40, 8
-; FLATSCR-NEXT: v_readlane_b32 s39, v40, 7
-; FLATSCR-NEXT: v_readlane_b32 s38, v40, 6
-; FLATSCR-NEXT: v_readlane_b32 s37, v40, 5
-; FLATSCR-NEXT: v_readlane_b32 s36, v40, 4
-; FLATSCR-NEXT: v_readlane_b32 s35, v40, 3
-; FLATSCR-NEXT: v_readlane_b32 s34, v40, 2
+; FLATSCR-NEXT: v_readlane_b32 s69, v40, 19
+; FLATSCR-NEXT: v_readlane_b32 s68, v40, 18
+; FLATSCR-NEXT: v_readlane_b32 s67, v40, 17
+; FLATSCR-NEXT: v_readlane_b32 s66, v40, 16
+; FLATSCR-NEXT: v_readlane_b32 s65, v40, 15
+; FLATSCR-NEXT: v_readlane_b32 s64, v40, 14
+; FLATSCR-NEXT: v_readlane_b32 s63, v40, 13
+; FLATSCR-NEXT: v_readlane_b32 s62, v40, 12
+; FLATSCR-NEXT: v_readlane_b32 s53, v40, 11
+; FLATSCR-NEXT: v_readlane_b32 s52, v40, 10
+; FLATSCR-NEXT: v_readlane_b32 s51, v40, 9
+; FLATSCR-NEXT: v_readlane_b32 s50, v40, 8
+; FLATSCR-NEXT: v_readlane_b32 s49, v40, 7
+; FLATSCR-NEXT: v_readlane_b32 s48, v40, 6
+; FLATSCR-NEXT: v_readlane_b32 s47, v40, 5
+; FLATSCR-NEXT: v_readlane_b32 s46, v40, 4
+; FLATSCR-NEXT: v_readlane_b32 s37, v40, 3
+; FLATSCR-NEXT: v_readlane_b32 s36, v40, 2
; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1
; FLATSCR-NEXT: v_readlane_b32 s30, v40, 0
; FLATSCR-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -571,39 +475,13 @@ define void @callee_func_sgpr_spill_no_calls(i32 %in) #0 {
; Has no spilled CSR VGPRs used for SGPR spilling, so no need to
; enable all lanes and restore.
define void @spill_only_csr_sgpr() {
-; MUBUF-LABEL: spill_only_csr_sgpr:
-; MUBUF: ; %bb.0:
-; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; MUBUF-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
-; MUBUF-NEXT: s_mov_b64 exec, s[4:5]
-; MUBUF-NEXT: v_writelane_b32 v0, s42, 0
-; MUBUF-NEXT: ;;#ASMSTART
-; MUBUF-NEXT: ; clobber s42
-; MUBUF-NEXT: ;;#ASMEND
-; MUBUF-NEXT: v_readlane_b32 s42, v0, 0
-; MUBUF-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
-; MUBUF-NEXT: s_mov_b64 exec, s[4:5]
-; MUBUF-NEXT: s_waitcnt vmcnt(0)
-; MUBUF-NEXT: s_setpc_b64 s[30:31]
-;
-; FLATSCR-LABEL: spill_only_csr_sgpr:
-; FLATSCR: ; %bb.0:
-; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; FLATSCR-NEXT: s_xor_saveexec_b64 s[0:1], -1
-; FLATSCR-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill
-; FLATSCR-NEXT: s_mov_b64 exec, s[0:1]
-; FLATSCR-NEXT: v_writelane_b32 v0, s42, 0
-; FLATSCR-NEXT: ;;#ASMSTART
-; FLATSCR-NEXT: ; clobber s42
-; FLATSCR-NEXT: ;;#ASMEND
-; FLATSCR-NEXT: v_readlane_b32 s42, v0, 0
-; FLATSCR-NEXT: s_xor_saveexec_b64 s[0:1], -1
-; FLATSCR-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload
-; FLATSCR-NEXT: s_mov_b64 exec, s[0:1]
-; FLATSCR-NEXT: s_waitcnt vmcnt(0)
-; FLATSCR-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: spill_only_csr_sgpr:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; clobber s42
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: s_setpc_b64 s[30:31]
call void asm sideeffect "; clobber s42", "~{s42}"()
ret void
}
@@ -663,143 +541,83 @@ define void @last_lane_vgpr_for_fp_csr() #1 {
; MUBUF-NEXT: s_xor_saveexec_b64 s[6:7], -1
; MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; MUBUF-NEXT: s_mov_b64 exec, s[6:7]
-; MUBUF-NEXT: v_writelane_b32 v1, s40, 0
-; MUBUF-NEXT: v_writelane_b32 v1, s41, 1
-; MUBUF-NEXT: v_writelane_b32 v1, s42, 2
-; MUBUF-NEXT: v_writelane_b32 v1, s43, 3
-; MUBUF-NEXT: v_writelane_b32 v1, s44, 4
-; MUBUF-NEXT: v_writelane_b32 v1, s45, 5
-; MUBUF-NEXT: v_writelane_b32 v1, s46, 6
-; MUBUF-NEXT: v_writelane_b32 v1, s47, 7
-; MUBUF-NEXT: v_writelane_b32 v1, s48, 8
-; MUBUF-NEXT: v_writelane_b32 v1, s49, 9
-; MUBUF-NEXT: v_writelane_b32 v1, s50, 10
-; MUBUF-NEXT: v_writelane_b32 v1, s51, 11
-; MUBUF-NEXT: v_writelane_b32 v1, s52, 12
-; MUBUF-NEXT: v_writelane_b32 v1, s53, 13
-; MUBUF-NEXT: v_writelane_b32 v1, s54, 14
-; MUBUF-NEXT: v_writelane_b32 v1, s55, 15
-; MUBUF-NEXT: v_writelane_b32 v1, s56, 16
-; MUBUF-NEXT: v_writelane_b32 v1, s57, 17
-; MUBUF-NEXT: v_writelane_b32 v1, s58, 18
-; MUBUF-NEXT: v_writelane_b32 v1, s59, 19
-; MUBUF-NEXT: v_writelane_b32 v1, s60, 20
-; MUBUF-NEXT: v_writelane_b32 v1, s61, 21
-; MUBUF-NEXT: v_writelane_b32 v1, s62, 22
-; MUBUF-NEXT: v_writelane_b32 v1, s63, 23
-; MUBUF-NEXT: v_writelane_b32 v1, s64, 24
-; MUBUF-NEXT: v_writelane_b32 v1, s65, 25
-; MUBUF-NEXT: v_writelane_b32 v1, s66, 26
-; MUBUF-NEXT: v_writelane_b32 v1, s67, 27
-; MUBUF-NEXT: v_writelane_b32 v1, s68, 28
-; MUBUF-NEXT: v_writelane_b32 v1, s69, 29
-; MUBUF-NEXT: v_writelane_b32 v1, s70, 30
-; MUBUF-NEXT: v_writelane_b32 v1, s71, 31
-; MUBUF-NEXT: v_writelane_b32 v1, s72, 32
-; MUBUF-NEXT: v_writelane_b32 v1, s73, 33
-; MUBUF-NEXT: v_writelane_b32 v1, s74, 34
-; MUBUF-NEXT: v_writelane_b32 v1, s75, 35
-; MUBUF-NEXT: v_writelane_b32 v1, s76, 36
-; MUBUF-NEXT: v_writelane_b32 v1, s77, 37
-; MUBUF-NEXT: v_writelane_b32 v1, s78, 38
-; MUBUF-NEXT: v_writelane_b32 v1, s79, 39
-; MUBUF-NEXT: v_writelane_b32 v1, s80, 40
-; MUBUF-NEXT: v_writelane_b32 v1, s81, 41
-; MUBUF-NEXT: v_writelane_b32 v1, s82, 42
-; MUBUF-NEXT: v_writelane_b32 v1, s83, 43
-; MUBUF-NEXT: v_writelane_b32 v1, s84, 44
-; MUBUF-NEXT: v_writelane_b32 v1, s85, 45
-; MUBUF-NEXT: v_writelane_b32 v1, s86, 46
-; MUBUF-NEXT: v_writelane_b32 v1, s87, 47
-; MUBUF-NEXT: v_writelane_b32 v1, s88, 48
-; MUBUF-NEXT: v_writelane_b32 v1, s89, 49
-; MUBUF-NEXT: v_writelane_b32 v1, s90, 50
-; MUBUF-NEXT: v_writelane_b32 v1, s91, 51
-; MUBUF-NEXT: v_writelane_b32 v1, s92, 52
-; MUBUF-NEXT: v_writelane_b32 v1, s93, 53
-; MUBUF-NEXT: v_writelane_b32 v1, s94, 54
-; MUBUF-NEXT: v_writelane_b32 v1, s95, 55
-; MUBUF-NEXT: v_writelane_b32 v1, s96, 56
-; MUBUF-NEXT: v_writelane_b32 v1, s97, 57
-; MUBUF-NEXT: v_writelane_b32 v1, s98, 58
-; MUBUF-NEXT: v_writelane_b32 v1, s99, 59
-; MUBUF-NEXT: v_writelane_b32 v1, s100, 60
+; MUBUF-NEXT: v_writelane_b32 v1, s46, 0
+; MUBUF-NEXT: v_writelane_b32 v1, s47, 1
+; MUBUF-NEXT: v_writelane_b32 v1, s48, 2
+; MUBUF-NEXT: v_writelane_b32 v1, s49, 3
+; MUBUF-NEXT: v_writelane_b32 v1, s50, 4
+; MUBUF-NEXT: v_writelane_b32 v1, s51, 5
+; MUBUF-NEXT: v_writelane_b32 v1, s52, 6
+; MUBUF-NEXT: v_writelane_b32 v1, s53, 7
+; MUBUF-NEXT: v_writelane_b32 v1, s62, 8
+; MUBUF-NEXT: v_writelane_b32 v1, s63, 9
+; MUBUF-NEXT: v_writelane_b32 v1, s64, 10
+; MUBUF-NEXT: v_writelane_b32 v1, s65, 11
+; MUBUF-NEXT: v_writelane_b32 v1, s66, 12
+; MUBUF-NEXT: v_writelane_b32 v1, s67, 13
+; MUBUF-NEXT: v_writelane_b32 v1, s68, 14
+; MUBUF-NEXT: v_writelane_b32 v1, s69, 15
+; MUBUF-NEXT: v_writelane_b32 v1, s78, 16
+; MUBUF-NEXT: v_writelane_b32 v1, s79, 17
+; MUBUF-NEXT: v_writelane_b32 v1, s80, 18
+; MUBUF-NEXT: v_writelane_b32 v1, s81, 19
+; MUBUF-NEXT: v_writelane_b32 v1, s82, 20
+; MUBUF-NEXT: v_writelane_b32 v1, s83, 21
+; MUBUF-NEXT: v_writelane_b32 v1, s84, 22
+; MUBUF-NEXT: v_writelane_b32 v1, s85, 23
+; MUBUF-NEXT: v_writelane_b32 v1, s94, 24
+; MUBUF-NEXT: v_writelane_b32 v1, s95, 25
+; MUBUF-NEXT: v_writelane_b32 v1, s96, 26
+; MUBUF-NEXT: v_writelane_b32 v1, s97, 27
+; MUBUF-NEXT: v_writelane_b32 v1, s98, 28
+; MUBUF-NEXT: v_writelane_b32 v1, s99, 29
+; MUBUF-NEXT: v_writelane_b32 v1, s100, 30
; MUBUF-NEXT: v_mov_b32_e32 v0, 0
; MUBUF-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
-; MUBUF-NEXT: v_writelane_b32 v1, s101, 61
+; MUBUF-NEXT: v_writelane_b32 v1, s101, 31
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: ;;#ASMSTART
; MUBUF-NEXT: ; clobber v41
; MUBUF-NEXT: ;;#ASMEND
-; MUBUF-NEXT: v_writelane_b32 v1, s102, 62
+; MUBUF-NEXT: v_writelane_b32 v1, s102, 32
; MUBUF-NEXT: ;;#ASMSTART
; MUBUF-NEXT: ;;#ASMEND
; MUBUF-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
; MUBUF-NEXT: s_addk_i32 s32, 0x400
-; MUBUF-NEXT: v_readlane_b32 s102, v1, 62
-; MUBUF-NEXT: v_readlane_b32 s101, v1, 61
-; MUBUF-NEXT: v_readlane_b32 s100, v1, 60
-; MUBUF-NEXT: v_readlane_b32 s99, v1, 59
-; MUBUF-NEXT: v_readlane_b32 s98, v1, 58
-; MUBUF-NEXT: v_readlane_b32 s97, v1, 57
-; MUBUF-NEXT: v_readlane_b32 s96, v1, 56
-; MUBUF-NEXT: v_readlane_b32 s95, v1, 55
-; MUBUF-NEXT: v_readlane_b32 s94, v1, 54
-; MUBUF-NEXT: v_readlane_b32 s93, v1, 53
-; MUBUF-NEXT: v_readlane_b32 s92, v1, 52
-; MUBUF-NEXT: v_readlane_b32 s91, v1, 51
-; MUBUF-NEXT: v_readlane_b32 s90, v1, 50
-; MUBUF-NEXT: v_readlane_b32 s89, v1, 49
-; MUBUF-NEXT: v_readlane_b32 s88, v1, 48
-; MUBUF-NEXT: v_readlane_b32 s87, v1, 47
-; MUBUF-NEXT: v_readlane_b32 s86, v1, 46
-; MUBUF-NEXT: v_readlane_b32 s85, v1, 45
-; MUBUF-NEXT: v_readlane_b32 s84, v1, 44
-; MUBUF-NEXT: v_readlane_b32 s83, v1, 43
-; MUBUF-NEXT: v_readlane_b32 s82, v1, 42
-; MUBUF-NEXT: v_readlane_b32 s81, v1, 41
-; MUBUF-NEXT: v_readlane_b32 s80, v1, 40
-; MUBUF-NEXT: v_readlane_b32 s79, v1, 39
-; MUBUF-NEXT: v_readlane_b32 s78, v1, 38
-; MUBUF-NEXT: v_readlane_b32 s77, v1, 37
-; MUBUF-NEXT: v_readlane_b32 s76, v1, 36
-; MUBUF-NEXT: v_readlane_b32 s75, v1, 35
-; MUBUF-NEXT: v_readlane_b32 s74, v1, 34
-; MUBUF-NEXT: v_readlane_b32 s73, v1, 33
-; MUBUF-NEXT: v_readlane_b32 s72, v1, 32
-; MUBUF-NEXT: v_readlane_b32 s71, v1, 31
-; MUBUF-NEXT: v_readlane_b32 s70, v1, 30
-; MUBUF-NEXT: v_readlane_b32 s69, v1, 29
-; MUBUF-NEXT: v_readlane_b32 s68, v1, 28
-; MUBUF-NEXT: v_readlane_b32 s67, v1, 27
-; MUBUF-NEXT: v_readlane_b32 s66, v1, 26
-; MUBUF-NEXT: v_readlane_b32 s65, v1, 25
-; MUBUF-NEXT: v_readlane_b32 s64, v1, 24
-; MUBUF-NEXT: v_readlane_b32 s63, v1, 23
-; MUBUF-NEXT: v_readlane_b32 s62, v1, 22
-; MUBUF-NEXT: v_readlane_b32 s61, v1, 21
-; MUBUF-NEXT: v_readlane_b32 s60, v1, 20
-; MUBUF-NEXT: v_readlane_b32 s59, v1, 19
-; MUBUF-NEXT: v_readlane_b32 s58, v1, 18
-; MUBUF-NEXT: v_readlane_b32 s57, v1, 17
-; MUBUF-NEXT: v_readlane_b32 s56, v1, 16
-; MUBUF-NEXT: v_readlane_b32 s55, v1, 15
-; MUBUF-NEXT: v_readlane_b32 s54, v1, 14
-; MUBUF-NEXT: v_readlane_b32 s53, v1, 13
-; MUBUF-NEXT: v_readlane_b32 s52, v1, 12
-; MUBUF-NEXT: v_readlane_b32 s51, v1, 11
-; MUBUF-NEXT: v_readlane_b32 s50, v1, 10
-; MUBUF-NEXT: v_readlane_b32 s49, v1, 9
-; MUBUF-NEXT: v_readlane_b32 s48, v1, 8
-; MUBUF-NEXT: v_readlane_b32 s47, v1, 7
-; MUBUF-NEXT: v_readlane_b32 s46, v1, 6
-; MUBUF-NEXT: v_readlane_b32 s45, v1, 5
-; MUBUF-NEXT: v_readlane_b32 s44, v1, 4
-; MUBUF-NEXT: v_readlane_b32 s43, v1, 3
-; MUBUF-NEXT: v_readlane_b32 s42, v1, 2
-; MUBUF-NEXT: v_readlane_b32 s41, v1, 1
-; MUBUF-NEXT: v_readlane_b32 s40, v1, 0
+; MUBUF-NEXT: v_readlane_b32 s102, v1, 32
+; MUBUF-NEXT: v_readlane_b32 s101, v1, 31
+; MUBUF-NEXT: v_readlane_b32 s100, v1, 30
+; MUBUF-NEXT: v_readlane_b32 s99, v1, 29
+; MUBUF-NEXT: v_readlane_b32 s98, v1, 28
+; MUBUF-NEXT: v_readlane_b32 s97, v1, 27
+; MUBUF-NEXT: v_readlane_b32 s96, v1, 26
+; MUBUF-NEXT: v_readlane_b32 s95, v1, 25
+; MUBUF-NEXT: v_readlane_b32 s94, v1, 24
+; MUBUF-NEXT: v_readlane_b32 s85, v1, 23
+; MUBUF-NEXT: v_readlane_b32 s84, v1, 22
+; MUBUF-NEXT: v_readlane_b32 s83, v1, 21
+; MUBUF-NEXT: v_readlane_b32 s82, v1, 20
+; MUBUF-NEXT: v_readlane_b32 s81, v1, 19
+; MUBUF-NEXT: v_readlane_b32 s80, v1, 18
+; MUBUF-NEXT: v_readlane_b32 s79, v1, 17
+; MUBUF-NEXT: v_readlane_b32 s78, v1, 16
+; MUBUF-NEXT: v_readlane_b32 s69, v1, 15
+; MUBUF-NEXT: v_readlane_b32 s68, v1, 14
+; MUBUF-NEXT: v_readlane_b32 s67, v1, 13
+; MUBUF-NEXT: v_readlane_b32 s66, v1, 12
+; MUBUF-NEXT: v_readlane_b32 s65, v1, 11
+; MUBUF-NEXT: v_readlane_b32 s64, v1, 10
+; MUBUF-NEXT: v_readlane_b32 s63, v1, 9
+; MUBUF-NEXT: v_readlane_b32 s62, v1, 8
+; MUBUF-NEXT: v_readlane_b32 s53, v1, 7
+; MUBUF-NEXT: v_readlane_b32 s52, v1, 6
+; MUBUF-NEXT: v_readlane_b32 s51, v1, 5
+; MUBUF-NEXT: v_readlane_b32 s50, v1, 4
+; MUBUF-NEXT: v_readlane_b32 s49, v1, 3
+; MUBUF-NEXT: v_readlane_b32 s48, v1, 2
+; MUBUF-NEXT: v_readlane_b32 s47, v1, 1
+; MUBUF-NEXT: v_readlane_b32 s46, v1, 0
; MUBUF-NEXT: s_mov_b32 s32, s33
; MUBUF-NEXT: s_xor_saveexec_b64 s[6:7], -1
; MUBUF-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
@@ -816,143 +634,83 @@ define void @last_lane_vgpr_for_fp_csr() #1 {
; FLATSCR-NEXT: s_xor_saveexec_b64 s[2:3], -1
; FLATSCR-NEXT: scratch_store_dword off, v1, s33 offset:8 ; 4-byte Folded Spill
; FLATSCR-NEXT: s_mov_b64 exec, s[2:3]
-; FLATSCR-NEXT: v_writelane_b32 v1, s40, 0
-; FLATSCR-NEXT: v_writelane_b32 v1, s41, 1
-; FLATSCR-NEXT: v_writelane_b32 v1, s42, 2
-; FLATSCR-NEXT: v_writelane_b32 v1, s43, 3
-; FLATSCR-NEXT: v_writelane_b32 v1, s44, 4
-; FLATSCR-NEXT: v_writelane_b32 v1, s45, 5
-; FLATSCR-NEXT: v_writelane_b32 v1, s46, 6
-; FLATSCR-NEXT: v_writelane_b32 v1, s47, 7
-; FLATSCR-NEXT: v_writelane_b32 v1, s48, 8
-; FLATSCR-NEXT: v_writelane_b32 v1, s49, 9
-; FLATSCR-NEXT: v_writelane_b32 v1, s50, 10
-; FLATSCR-NEXT: v_writelane_b32 v1, s51, 11
-; FLATSCR-NEXT: v_writelane_b32 v1, s52, 12
-; FLATSCR-NEXT: v_writelane_b32 v1, s53, 13
-; FLATSCR-NEXT: v_writelane_b32 v1, s54, 14
-; FLATSCR-NEXT: v_writelane_b32 v1, s55, 15
-; FLATSCR-NEXT: v_writelane_b32 v1, s56, 16
-; FLATSCR-NEXT: v_writelane_b32 v1, s57, 17
-; FLATSCR-NEXT: v_writelane_b32 v1, s58, 18
-; FLATSCR-NEXT: v_writelane_b32 v1, s59, 19
-; FLATSCR-NEXT: v_writelane_b32 v1, s60, 20
-; FLATSCR-NEXT: v_writelane_b32 v1, s61, 21
-; FLATSCR-NEXT: v_writelane_b32 v1, s62, 22
-; FLATSCR-NEXT: v_writelane_b32 v1, s63, 23
-; FLATSCR-NEXT: v_writelane_b32 v1, s64, 24
-; FLATSCR-NEXT: v_writelane_b32 v1, s65, 25
-; FLATSCR-NEXT: v_writelane_b32 v1, s66, 26
-; FLATSCR-NEXT: v_writelane_b32 v1, s67, 27
-; FLATSCR-NEXT: v_writelane_b32 v1, s68, 28
-; FLATSCR-NEXT: v_writelane_b32 v1, s69, 29
-; FLATSCR-NEXT: v_writelane_b32 v1, s70, 30
-; FLATSCR-NEXT: v_writelane_b32 v1, s71, 31
-; FLATSCR-NEXT: v_writelane_b32 v1, s72, 32
-; FLATSCR-NEXT: v_writelane_b32 v1, s73, 33
-; FLATSCR-NEXT: v_writelane_b32 v1, s74, 34
-; FLATSCR-NEXT: v_writelane_b32 v1, s75, 35
-; FLATSCR-NEXT: v_writelane_b32 v1, s76, 36
-; FLATSCR-NEXT: v_writelane_b32 v1, s77, 37
-; FLATSCR-NEXT: v_writelane_b32 v1, s78, 38
-; FLATSCR-NEXT: v_writelane_b32 v1, s79, 39
-; FLATSCR-NEXT: v_writelane_b32 v1, s80, 40
-; FLATSCR-NEXT: v_writelane_b32 v1, s81, 41
-; FLATSCR-NEXT: v_writelane_b32 v1, s82, 42
-; FLATSCR-NEXT: v_writelane_b32 v1, s83, 43
-; FLATSCR-NEXT: v_writelane_b32 v1, s84, 44
-; FLATSCR-NEXT: v_writelane_b32 v1, s85, 45
-; FLATSCR-NEXT: v_writelane_b32 v1, s86, 46
-; FLATSCR-NEXT: v_writelane_b32 v1, s87, 47
-; FLATSCR-NEXT: v_writelane_b32 v1, s88, 48
-; FLATSCR-NEXT: v_writelane_b32 v1, s89, 49
-; FLATSCR-NEXT: v_writelane_b32 v1, s90, 50
-; FLATSCR-NEXT: v_writelane_b32 v1, s91, 51
-; FLATSCR-NEXT: v_writelane_b32 v1, s92, 52
-; FLATSCR-NEXT: v_writelane_b32 v1, s93, 53
-; FLATSCR-NEXT: v_writelane_b32 v1, s94, 54
-; FLATSCR-NEXT: v_writelane_b32 v1, s95, 55
-; FLATSCR-NEXT: v_writelane_b32 v1, s96, 56
-; FLATSCR-NEXT: v_writelane_b32 v1, s97, 57
-; FLATSCR-NEXT: v_writelane_b32 v1, s98, 58
-; FLATSCR-NEXT: v_writelane_b32 v1, s99, 59
-; FLATSCR-NEXT: v_writelane_b32 v1, s100, 60
+; FLATSCR-NEXT: v_writelane_b32 v1, s46, 0
+; FLATSCR-NEXT: v_writelane_b32 v1, s47, 1
+; FLATSCR-NEXT: v_writelane_b32 v1, s48, 2
+; FLATSCR-NEXT: v_writelane_b32 v1, s49, 3
+; FLATSCR-NEXT: v_writelane_b32 v1, s50, 4
+; FLATSCR-NEXT: v_writelane_b32 v1, s51, 5
+; FLATSCR-NEXT: v_writelane_b32 v1, s52, 6
+; FLATSCR-NEXT: v_writelane_b32 v1, s53, 7
+; FLATSCR-NEXT: v_writelane_b32 v1, s62, 8
+; FLATSCR-NEXT: v_writelane_b32 v1, s63, 9
+; FLATSCR-NEXT: v_writelane_b32 v1, s64, 10
+; FLATSCR-NEXT: v_writelane_b32 v1, s65, 11
+; FLATSCR-NEXT: v_writelane_b32 v1, s66, 12
+; FLATSCR-NEXT: v_writelane_b32 v1, s67, 13
+; FLATSCR-NEXT: v_writelane_b32 v1, s68, 14
+; FLATSCR-NEXT: v_writelane_b32 v1, s69, 15
+; FLATSCR-NEXT: v_writelane_b32 v1, s78, 16
+; FLATSCR-NEXT: v_writelane_b32 v1, s79, 17
+; FLATSCR-NEXT: v_writelane_b32 v1, s80, 18
+; FLATSCR-NEXT: v_writelane_b32 v1, s81, 19
+; FLATSCR-NEXT: v_writelane_b32 v1, s82, 20
+; FLATSCR-NEXT: v_writelane_b32 v1, s83, 21
+; FLATSCR-NEXT: v_writelane_b32 v1, s84, 22
+; FLATSCR-NEXT: v_writelane_b32 v1, s85, 23
+; FLATSCR-NEXT: v_writelane_b32 v1, s94, 24
+; FLATSCR-NEXT: v_writelane_b32 v1, s95, 25
+; FLATSCR-NEXT: v_writelane_b32 v1, s96, 26
+; FLATSCR-NEXT: v_writelane_b32 v1, s97, 27
+; FLATSCR-NEXT: v_writelane_b32 v1, s98, 28
+; FLATSCR-NEXT: v_writelane_b32 v1, s99, 29
+; FLATSCR-NEXT: v_writelane_b32 v1, s100, 30
; FLATSCR-NEXT: v_mov_b32_e32 v0, 0
; FLATSCR-NEXT: scratch_store_dword off, v41, s33 ; 4-byte Folded Spill
-; FLATSCR-NEXT: v_writelane_b32 v1, s101, 61
+; FLATSCR-NEXT: v_writelane_b32 v1, s101, 31
; FLATSCR-NEXT: scratch_store_dword off, v0, s33 offset:4
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ; clobber v41
; FLATSCR-NEXT: ;;#ASMEND
-; FLATSCR-NEXT: v_writelane_b32 v1, s102, 62
+; FLATSCR-NEXT: v_writelane_b32 v1, s102, 32
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: scratch_load_dword v41, off, s33 ; 4-byte Folded Reload
; FLATSCR-NEXT: s_add_i32 s32, s32, 16
-; FLATSCR-NEXT: v_readlane_b32 s102, v1, 62
-; FLATSCR-NEXT: v_readlane_b32 s101, v1, 61
-; FLATSCR-NEXT: v_readlane_b32 s100, v1, 60
-; FLATSCR-NEXT: v_readlane_b32 s99, v1, 59
-; FLATSCR-NEXT: v_readlane_b32 s98, v1, 58
-; FLATSCR-NEXT: v_readlane_b32 s97, v1, 57
-; FLATSCR-NEXT: v_readlane_b32 s96, v1, 56
-; FLATSCR-NEXT: v_readlane_b32 s95, v1, 55
-; FLATSCR-NEXT: v_readlane_b32 s94, v1, 54
-; FLATSCR-NEXT: v_readlane_b32 s93, v1, 53
-; FLATSCR-NEXT: v_readlane_b32 s92, v1, 52
-; FLATSCR-NEXT: v_readlane_b32 s91, v1, 51
-; FLATSCR-NEXT: v_readlane_b32 s90, v1, 50
-; FLATSCR-NEXT: v_readlane_b32 s89, v1, 49
-; FLATSCR-NEXT: v_readlane_b32 s88, v1, 48
-; FLATSCR-NEXT: v_readlane_b32 s87, v1, 47
-; FLATSCR-NEXT: v_readlane_b32 s86, v1, 46
-; FLATSCR-NEXT: v_readlane_b32 s85, v1, 45
-; FLATSCR-NEXT: v_readlane_b32 s84, v1, 44
-; FLATSCR-NEXT: v_readlane_b32 s83, v1, 43
-; FLATSCR-NEXT: v_readlane_b32 s82, v1, 42
-; FLATSCR-NEXT: v_readlane_b32 s81, v1, 41
-; FLATSCR-NEXT: v_readlane_b32 s80, v1, 40
-; FLATSCR-NEXT: v_readlane_b32 s79, v1, 39
-; FLATSCR-NEXT: v_readlane_b32 s78, v1, 38
-; FLATSCR-NEXT: v_readlane_b32 s77, v1, 37
-; FLATSCR-NEXT: v_readlane_b32 s76, v1, 36
-; FLATSCR-NEXT: v_readlane_b32 s75, v1, 35
-; FLATSCR-NEXT: v_readlane_b32 s74, v1, 34
-; FLATSCR-NEXT: v_readlane_b32 s73, v1, 33
-; FLATSCR-NEXT: v_readlane_b32 s72, v1, 32
-; FLATSCR-NEXT: v_readlane_b32 s71, v1, 31
-; FLATSCR-NEXT: v_readlane_b32 s70, v1, 30
-; FLATSCR-NEXT: v_readlane_b32 s69, v1, 29
-; FLATSCR-NEXT: v_readlane_b32 s68, v1, 28
-; FLATSCR-NEXT: v_readlane_b32 s67, v1, 27
-; FLATSCR-NEXT: v_readlane_b32 s66, v1, 26
-; FLATSCR-NEXT: v_readlane_b32 s65, v1, 25
-; FLATSCR-NEXT: v_readlane_b32 s64, v1, 24
-; FLATSCR-NEXT: v_readlane_b32 s63, v1, 23
-; FLATSCR-NEXT: v_readlane_b32 s62, v1, 22
-; FLATSCR-NEXT: v_readlane_b32 s61, v1, 21
-; FLATSCR-NEXT: v_readlane_b32 s60, v1, 20
-; FLATSCR-NEXT: v_readlane_b32 s59, v1, 19
-; FLATSCR-NEXT: v_readlane_b32 s58, v1, 18
-; FLATSCR-NEXT: v_readlane_b32 s57, v1, 17
-; FLATSCR-NEXT: v_readlane_b32 s56, v1, 16
-; FLATSCR-NEXT: v_readlane_b32 s55, v1, 15
-; FLATSCR-NEXT: v_readlane_b32 s54, v1, 14
-; FLATSCR-NEXT: v_readlane_b32 s53, v1, 13
-; FLATSCR-NEXT: v_readlane_b32 s52, v1, 12
-; FLATSCR-NEXT: v_readlane_b32 s51, v1, 11
-; FLATSCR-NEXT: v_readlane_b32 s50, v1, 10
-; FLATSCR-NEXT: v_readlane_b32 s49, v1, 9
-; FLATSCR-NEXT: v_readlane_b32 s48, v1, 8
-; FLATSCR-NEXT: v_readlane_b32 s47, v1, 7
-; FLATSCR-NEXT: v_readlane_b32 s46, v1, 6
-; FLATSCR-NEXT: v_readlane_b32 s45, v1, 5
-; FLATSCR-NEXT: v_readlane_b32 s44, v1, 4
-; FLATSCR-NEXT: v_readlane_b32 s43, v1, 3
-; FLATSCR-NEXT: v_readlane_b32 s42, v1, 2
-; FLATSCR-NEXT: v_readlane_b32 s41, v1, 1
-; FLATSCR-NEXT: v_readlane_b32 s40, v1, 0
+; FLATSCR-NEXT: v_readlane_b32 s102, v1, 32
+; FLATSCR-NEXT: v_readlane_b32 s101, v1, 31
+; FLATSCR-NEXT: v_readlane_b32 s100, v1, 30
+; FLATSCR-NEXT: v_readlane_b32 s99, v1, 29
+; FLATSCR-NEXT: v_readlane_b32 s98, v1, 28
+; FLATSCR-NEXT: v_readlane_b32 s97, v1, 27
+; FLATSCR-NEXT: v_readlane_b32 s96, v1, 26
+; FLATSCR-NEXT: v_readlane_b32 s95, v1, 25
+; FLATSCR-NEXT: v_readlane_b32 s94, v1, 24
+; FLATSCR-NEXT: v_readlane_b32 s85, v1, 23
+; FLATSCR-NEXT: v_readlane_b32 s84, v1, 22
+; FLATSCR-NEXT: v_readlane_b32 s83, v1, 21
+; FLATSCR-NEXT: v_readlane_b32 s82, v1, 20
+; FLATSCR-NEXT: v_readlane_b32 s81, v1, 19
+; FLATSCR-NEXT: v_readlane_b32 s80, v1, 18
+; FLATSCR-NEXT: v_readlane_b32 s79, v1, 17
+; FLATSCR-NEXT: v_readlane_b32 s78, v1, 16
+; FLATSCR-NEXT: v_readlane_b32 s69, v1, 15
+; FLATSCR-NEXT: v_readlane_b32 s68, v1, 14
+; FLATSCR-NEXT: v_readlane_b32 s67, v1, 13
+; FLATSCR-NEXT: v_readlane_b32 s66, v1, 12
+; FLATSCR-NEXT: v_readlane_b32 s65, v1, 11
+; FLATSCR-NEXT: v_readlane_b32 s64, v1, 10
+; FLATSCR-NEXT: v_readlane_b32 s63, v1, 9
+; FLATSCR-NEXT: v_readlane_b32 s62, v1, 8
+; FLATSCR-NEXT: v_readlane_b32 s53, v1, 7
+; FLATSCR-NEXT: v_readlane_b32 s52, v1, 6
+; FLATSCR-NEXT: v_readlane_b32 s51, v1, 5
+; FLATSCR-NEXT: v_readlane_b32 s50, v1, 4
+; FLATSCR-NEXT: v_readlane_b32 s49, v1, 3
+; FLATSCR-NEXT: v_readlane_b32 s48, v1, 2
+; FLATSCR-NEXT: v_readlane_b32 s47, v1, 1
+; FLATSCR-NEXT: v_readlane_b32 s46, v1, 0
; FLATSCR-NEXT: s_mov_b32 s32, s33
; FLATSCR-NEXT: s_xor_saveexec_b64 s[2:3], -1
; FLATSCR-NEXT: scratch_load_dword v1, off, s33 offset:8 ; 4-byte Folded Reload
@@ -985,145 +743,83 @@ define void @no_new_vgpr_for_fp_csr() #1 {
; MUBUF-NEXT: s_xor_saveexec_b64 s[6:7], -1
; MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; MUBUF-NEXT: s_mov_b64 exec, s[6:7]
-; MUBUF-NEXT: v_writelane_b32 v1, s39, 0
-; MUBUF-NEXT: v_writelane_b32 v1, s40, 1
-; MUBUF-NEXT: v_writelane_b32 v1, s41, 2
-; MUBUF-NEXT: v_writelane_b32 v1, s42, 3
-; MUBUF-NEXT: v_writelane_b32 v1, s43, 4
-; MUBUF-NEXT: v_writelane_b32 v1, s44, 5
-; MUBUF-NEXT: v_writelane_b32 v1, s45, 6
-; MUBUF-NEXT: v_writelane_b32 v1, s46, 7
-; MUBUF-NEXT: v_writelane_b32 v1, s47, 8
-; MUBUF-NEXT: v_writelane_b32 v1, s48, 9
-; MUBUF-NEXT: v_writelane_b32 v1, s49, 10
-; MUBUF-NEXT: v_writelane_b32 v1, s50, 11
-; MUBUF-NEXT: v_writelane_b32 v1, s51, 12
-; MUBUF-NEXT: v_writelane_b32 v1, s52, 13
-; MUBUF-NEXT: v_writelane_b32 v1, s53, 14
-; MUBUF-NEXT: v_writelane_b32 v1, s54, 15
-; MUBUF-NEXT: v_writelane_b32 v1, s55, 16
-; MUBUF-NEXT: v_writelane_b32 v1, s56, 17
-; MUBUF-NEXT: v_writelane_b32 v1, s57, 18
-; MUBUF-NEXT: v_writelane_b32 v1, s58, 19
-; MUBUF-NEXT: v_writelane_b32 v1, s59, 20
-; MUBUF-NEXT: v_writelane_b32 v1, s60, 21
-; MUBUF-NEXT: v_writelane_b32 v1, s61, 22
-; MUBUF-NEXT: v_writelane_b32 v1, s62, 23
-; MUBUF-NEXT: v_writelane_b32 v1, s63, 24
-; MUBUF-NEXT: v_writelane_b32 v1, s64, 25
-; MUBUF-NEXT: v_writelane_b32 v1, s65, 26
-; MUBUF-NEXT: v_writelane_b32 v1, s66, 27
-; MUBUF-NEXT: v_writelane_b32 v1, s67, 28
-; MUBUF-NEXT: v_writelane_b32 v1, s68, 29
-; MUBUF-NEXT: v_writelane_b32 v1, s69, 30
-; MUBUF-NEXT: v_writelane_b32 v1, s70, 31
-; MUBUF-NEXT: v_writelane_b32 v1, s71, 32
-; MUBUF-NEXT: v_writelane_b32 v1, s72, 33
-; MUBUF-NEXT: v_writelane_b32 v1, s73, 34
-; MUBUF-NEXT: v_writelane_b32 v1, s74, 35
-; MUBUF-NEXT: v_writelane_b32 v1, s75, 36
-; MUBUF-NEXT: v_writelane_b32 v1, s76, 37
-; MUBUF-NEXT: v_writelane_b32 v1, s77, 38
-; MUBUF-NEXT: v_writelane_b32 v1, s78, 39
-; MUBUF-NEXT: v_writelane_b32 v1, s79, 40
-; MUBUF-NEXT: v_writelane_b32 v1, s80, 41
-; MUBUF-NEXT: v_writelane_b32 v1, s81, 42
-; MUBUF-NEXT: v_writelane_b32 v1, s82, 43
-; MUBUF-NEXT: v_writelane_b32 v1, s83, 44
-; MUBUF-NEXT: v_writelane_b32 v1, s84, 45
-; MUBUF-NEXT: v_writelane_b32 v1, s85, 46
-; MUBUF-NEXT: v_writelane_b32 v1, s86, 47
-; MUBUF-NEXT: v_writelane_b32 v1, s87, 48
-; MUBUF-NEXT: v_writelane_b32 v1, s88, 49
-; MUBUF-NEXT: v_writelane_b32 v1, s89, 50
-; MUBUF-NEXT: v_writelane_b32 v1, s90, 51
-; MUBUF-NEXT: v_writelane_b32 v1, s91, 52
-; MUBUF-NEXT: v_writelane_b32 v1, s92, 53
-; MUBUF-NEXT: v_writelane_b32 v1, s93, 54
-; MUBUF-NEXT: v_writelane_b32 v1, s94, 55
-; MUBUF-NEXT: v_writelane_b32 v1, s95, 56
-; MUBUF-NEXT: v_writelane_b32 v1, s96, 57
-; MUBUF-NEXT: v_writelane_b32 v1, s97, 58
-; MUBUF-NEXT: v_writelane_b32 v1, s98, 59
-; MUBUF-NEXT: v_writelane_b32 v1, s99, 60
-; MUBUF-NEXT: v_writelane_b32 v1, s100, 61
+; MUBUF-NEXT: v_writelane_b32 v1, s46, 0
+; MUBUF-NEXT: v_writelane_b32 v1, s47, 1
+; MUBUF-NEXT: v_writelane_b32 v1, s48, 2
+; MUBUF-NEXT: v_writelane_b32 v1, s49, 3
+; MUBUF-NEXT: v_writelane_b32 v1, s50, 4
+; MUBUF-NEXT: v_writelane_b32 v1, s51, 5
+; MUBUF-NEXT: v_writelane_b32 v1, s52, 6
+; MUBUF-NEXT: v_writelane_b32 v1, s53, 7
+; MUBUF-NEXT: v_writelane_b32 v1, s62, 8
+; MUBUF-NEXT: v_writelane_b32 v1, s63, 9
+; MUBUF-NEXT: v_writelane_b32 v1, s64, 10
+; MUBUF-NEXT: v_writelane_b32 v1, s65, 11
+; MUBUF-NEXT: v_writelane_b32 v1, s66, 12
+; MUBUF-NEXT: v_writelane_b32 v1, s67, 13
+; MUBUF-NEXT: v_writelane_b32 v1, s68, 14
+; MUBUF-NEXT: v_writelane_b32 v1, s69, 15
+; MUBUF-NEXT: v_writelane_b32 v1, s78, 16
+; MUBUF-NEXT: v_writelane_b32 v1, s79, 17
+; MUBUF-NEXT: v_writelane_b32 v1, s80, 18
+; MUBUF-NEXT: v_writelane_b32 v1, s81, 19
+; MUBUF-NEXT: v_writelane_b32 v1, s82, 20
+; MUBUF-NEXT: v_writelane_b32 v1, s83, 21
+; MUBUF-NEXT: v_writelane_b32 v1, s84, 22
+; MUBUF-NEXT: v_writelane_b32 v1, s85, 23
+; MUBUF-NEXT: v_writelane_b32 v1, s94, 24
+; MUBUF-NEXT: v_writelane_b32 v1, s95, 25
+; MUBUF-NEXT: v_writelane_b32 v1, s96, 26
+; MUBUF-NEXT: v_writelane_b32 v1, s97, 27
+; MUBUF-NEXT: v_writelane_b32 v1, s98, 28
+; MUBUF-NEXT: v_writelane_b32 v1, s99, 29
+; MUBUF-NEXT: v_writelane_b32 v1, s100, 30
; MUBUF-NEXT: v_mov_b32_e32 v0, 0
; MUBUF-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
-; MUBUF-NEXT: v_writelane_b32 v1, s101, 62
+; MUBUF-NEXT: v_writelane_b32 v1, s101, 31
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: ;;#ASMSTART
; MUBUF-NEXT: ; clobber v41
; MUBUF-NEXT: ;;#ASMEND
-; MUBUF-NEXT: v_writelane_b32 v1, s102, 63
+; MUBUF-NEXT: v_writelane_b32 v1, s102, 32
; MUBUF-NEXT: ;;#ASMSTART
; MUBUF-NEXT: ;;#ASMEND
; MUBUF-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
; MUBUF-NEXT: s_addk_i32 s32, 0x400
-; MUBUF-NEXT: v_readlane_b32 s102, v1, 63
-; MUBUF-NEXT: v_readlane_b32 s101, v1, 62
-; MUBUF-NEXT: v_readlane_b32 s100, v1, 61
-; MUBUF-NEXT: v_readlane_b32 s99, v1, 60
-; MUBUF-NEXT: v_readlane_b32 s98, v1, 59
-; MUBUF-NEXT: v_readlane_b32 s97, v1, 58
-; MUBUF-NEXT: v_readlane_b32 s96, v1, 57
-; MUBUF-NEXT: v_readlane_b32 s95, v1, 56
-; MUBUF-NEXT: v_readlane_b32 s94, v1, 55
-; MUBUF-NEXT: v_readlane_b32 s93, v1, 54
-; MUBUF-NEXT: v_readlane_b32 s92, v1, 53
-; MUBUF-NEXT: v_readlane_b32 s91, v1, 52
-; MUBUF-NEXT: v_readlane_b32 s90, v1, 51
-; MUBUF-NEXT: v_readlane_b32 s89, v1, 50
-; MUBUF-NEXT: v_readlane_b32 s88, v1, 49
-; MUBUF-NEXT: v_readlane_b32 s87, v1, 48
-; MUBUF-NEXT: v_readlane_b32 s86, v1, 47
-; MUBUF-NEXT: v_readlane_b32 s85, v1, 46
-; MUBUF-NEXT: v_readlane_b32 s84, v1, 45
-; MUBUF-NEXT: v_readlane_b32 s83, v1, 44
-; MUBUF-NEXT: v_readlane_b32 s82, v1, 43
-; MUBUF-NEXT: v_readlane_b32 s81, v1, 42
-; MUBUF-NEXT: v_readlane_b32 s80, v1, 41
-; MUBUF-NEXT: v_readlane_b32 s79, v1, 40
-; MUBUF-NEXT: v_readlane_b32 s78, v1, 39
-; MUBUF-NEXT: v_readlane_b32 s77, v1, 38
-; MUBUF-NEXT: v_readlane_b32 s76, v1, 37
-; MUBUF-NEXT: v_readlane_b32 s75, v1, 36
-; MUBUF-NEXT: v_readlane_b32 s74, v1, 35
-; MUBUF-NEXT: v_readlane_b32 s73, v1, 34
-; MUBUF-NEXT: v_readlane_b32 s72, v1, 33
-; MUBUF-NEXT: v_readlane_b32 s71, v1, 32
-; MUBUF-NEXT: v_readlane_b32 s70, v1, 31
-; MUBUF-NEXT: v_readlane_b32 s69, v1, 30
-; MUBUF-NEXT: v_readlane_b32 s68, v1, 29
-; MUBUF-NEXT: v_readlane_b32 s67, v1, 28
-; MUBUF-NEXT: v_readlane_b32 s66, v1, 27
-; MUBUF-NEXT: v_readlane_b32 s65, v1, 26
-; MUBUF-NEXT: v_readlane_b32 s64, v1, 25
-; MUBUF-NEXT: v_readlane_b32 s63, v1, 24
-; MUBUF-NEXT: v_readlane_b32 s62, v1, 23
-; MUBUF-NEXT: v_readlane_b32 s61, v1, 22
-; MUBUF-NEXT: v_readlane_b32 s60, v1, 21
-; MUBUF-NEXT: v_readlane_b32 s59, v1, 20
-; MUBUF-NEXT: v_readlane_b32 s58, v1, 19
-; MUBUF-NEXT: v_readlane_b32 s57, v1, 18
-; MUBUF-NEXT: v_readlane_b32 s56, v1, 17
-; MUBUF-NEXT: v_readlane_b32 s55, v1, 16
-; MUBUF-NEXT: v_readlane_b32 s54, v1, 15
-; MUBUF-NEXT: v_readlane_b32 s53, v1, 14
-; MUBUF-NEXT: v_readlane_b32 s52, v1, 13
-; MUBUF-NEXT: v_readlane_b32 s51, v1, 12
-; MUBUF-NEXT: v_readlane_b32 s50, v1, 11
-; MUBUF-NEXT: v_readlane_b32 s49, v1, 10
-; MUBUF-NEXT: v_readlane_b32 s48, v1, 9
-; MUBUF-NEXT: v_readlane_b32 s47, v1, 8
-; MUBUF-NEXT: v_readlane_b32 s46, v1, 7
-; MUBUF-NEXT: v_readlane_b32 s45, v1, 6
-; MUBUF-NEXT: v_readlane_b32 s44, v1, 5
-; MUBUF-NEXT: v_readlane_b32 s43, v1, 4
-; MUBUF-NEXT: v_readlane_b32 s42, v1, 3
-; MUBUF-NEXT: v_readlane_b32 s41, v1, 2
-; MUBUF-NEXT: v_readlane_b32 s40, v1, 1
-; MUBUF-NEXT: v_readlane_b32 s39, v1, 0
+; MUBUF-NEXT: v_readlane_b32 s102, v1, 32
+; MUBUF-NEXT: v_readlane_b32 s101, v1, 31
+; MUBUF-NEXT: v_readlane_b32 s100, v1, 30
+; MUBUF-NEXT: v_readlane_b32 s99, v1, 29
+; MUBUF-NEXT: v_readlane_b32 s98, v1, 28
+; MUBUF-NEXT: v_readlane_b32 s97, v1, 27
+; MUBUF-NEXT: v_readlane_b32 s96, v1, 26
+; MUBUF-NEXT: v_readlane_b32 s95, v1, 25
+; MUBUF-NEXT: v_readlane_b32 s94, v1, 24
+; MUBUF-NEXT: v_readlane_b32 s85, v1, 23
+; MUBUF-NEXT: v_readlane_b32 s84, v1, 22
+; MUBUF-NEXT: v_readlane_b32 s83, v1, 21
+; MUBUF-NEXT: v_readlane_b32 s82, v1, 20
+; MUBUF-NEXT: v_readlane_b32 s81, v1, 19
+; MUBUF-NEXT: v_readlane_b32 s80, v1, 18
+; MUBUF-NEXT: v_readlane_b32 s79, v1, 17
+; MUBUF-NEXT: v_readlane_b32 s78, v1, 16
+; MUBUF-NEXT: v_readlane_b32 s69, v1, 15
+; MUBUF-NEXT: v_readlane_b32 s68, v1, 14
+; MUBUF-NEXT: v_readlane_b32 s67, v1, 13
+; MUBUF-NEXT: v_readlane_b32 s66, v1, 12
+; MUBUF-NEXT: v_readlane_b32 s65, v1, 11
+; MUBUF-NEXT: v_readlane_b32 s64, v1, 10
+; MUBUF-NEXT: v_readlane_b32 s63, v1, 9
+; MUBUF-NEXT: v_readlane_b32 s62, v1, 8
+; MUBUF-NEXT: v_readlane_b32 s53, v1, 7
+; MUBUF-NEXT: v_readlane_b32 s52, v1, 6
+; MUBUF-NEXT: v_readlane_b32 s51, v1, 5
+; MUBUF-NEXT: v_readlane_b32 s50, v1, 4
+; MUBUF-NEXT: v_readlane_b32 s49, v1, 3
+; MUBUF-NEXT: v_readlane_b32 s48, v1, 2
+; MUBUF-NEXT: v_readlane_b32 s47, v1, 1
+; MUBUF-NEXT: v_readlane_b32 s46, v1, 0
; MUBUF-NEXT: s_mov_b32 s32, s33
; MUBUF-NEXT: s_xor_saveexec_b64 s[6:7], -1
; MUBUF-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
@@ -1140,145 +836,83 @@ define void @no_new_vgpr_for_fp_csr() #1 {
; FLATSCR-NEXT: s_xor_saveexec_b64 s[2:3], -1
; FLATSCR-NEXT: scratch_store_dword off, v1, s33 offset:8 ; 4-byte Folded Spill
; FLATSCR-NEXT: s_mov_b64 exec, s[2:3]
-; FLATSCR-NEXT: v_writelane_b32 v1, s39, 0
-; FLATSCR-NEXT: v_writelane_b32 v1, s40, 1
-; FLATSCR-NEXT: v_writelane_b32 v1, s41, 2
-; FLATSCR-NEXT: v_writelane_b32 v1, s42, 3
-; FLATSCR-NEXT: v_writelane_b32 v1, s43, 4
-; FLATSCR-NEXT: v_writelane_b32 v1, s44, 5
-; FLATSCR-NEXT: v_writelane_b32 v1, s45, 6
-; FLATSCR-NEXT: v_writelane_b32 v1, s46, 7
-; FLATSCR-NEXT: v_writelane_b32 v1, s47, 8
-; FLATSCR-NEXT: v_writelane_b32 v1, s48, 9
-; FLATSCR-NEXT: v_writelane_b32 v1, s49, 10
-; FLATSCR-NEXT: v_writelane_b32 v1, s50, 11
-; FLATSCR-NEXT: v_writelane_b32 v1, s51, 12
-; FLATSCR-NEXT: v_writelane_b32 v1, s52, 13
-; FLATSCR-NEXT: v_writelane_b32 v1, s53, 14
-; FLATSCR-NEXT: v_writelane_b32 v1, s54, 15
-; FLATSCR-NEXT: v_writelane_b32 v1, s55, 16
-; FLATSCR-NEXT: v_writelane_b32 v1, s56, 17
-; FLATSCR-NEXT: v_writelane_b32 v1, s57, 18
-; FLATSCR-NEXT: v_writelane_b32 v1, s58, 19
-; FLATSCR-NEXT: v_writelane_b32 v1, s59, 20
-; FLATSCR-NEXT: v_writelane_b32 v1, s60, 21
-; FLATSCR-NEXT: v_writelane_b32 v1, s61, 22
-; FLATSCR-NEXT: v_writelane_b32 v1, s62, 23
-; FLATSCR-NEXT: v_writelane_b32 v1, s63, 24
-; FLATSCR-NEXT: v_writelane_b32 v1, s64, 25
-; FLATSCR-NEXT: v_writelane_b32 v1, s65, 26
-; FLATSCR-NEXT: v_writelane_b32 v1, s66, 27
-; FLATSCR-NEXT: v_writelane_b32 v1, s67, 28
-; FLATSCR-NEXT: v_writelane_b32 v1, s68, 29
-; FLATSCR-NEXT: v_writelane_b32 v1, s69, 30
-; FLATSCR-NEXT: v_writelane_b32 v1, s70, 31
-; FLATSCR-NEXT: v_writelane_b32 v1, s71, 32
-; FLATSCR-NEXT: v_writelane_b32 v1, s72, 33
-; FLATSCR-NEXT: v_writelane_b32 v1, s73, 34
-; FLATSCR-NEXT: v_writelane_b32 v1, s74, 35
-; FLATSCR-NEXT: v_writelane_b32 v1, s75, 36
-; FLATSCR-NEXT: v_writelane_b32 v1, s76, 37
-; FLATSCR-NEXT: v_writelane_b32 v1, s77, 38
-; FLATSCR-NEXT: v_writelane_b32 v1, s78, 39
-; FLATSCR-NEXT: v_writelane_b32 v1, s79, 40
-; FLATSCR-NEXT: v_writelane_b32 v1, s80, 41
-; FLATSCR-NEXT: v_writelane_b32 v1, s81, 42
-; FLATSCR-NEXT: v_writelane_b32 v1, s82, 43
-; FLATSCR-NEXT: v_writelane_b32 v1, s83, 44
-; FLATSCR-NEXT: v_writelane_b32 v1, s84, 45
-; FLATSCR-NEXT: v_writelane_b32 v1, s85, 46
-; FLATSCR-NEXT: v_writelane_b32 v1, s86, 47
-; FLATSCR-NEXT: v_writelane_b32 v1, s87, 48
-; FLATSCR-NEXT: v_writelane_b32 v1, s88, 49
-; FLATSCR-NEXT: v_writelane_b32 v1, s89, 50
-; FLATSCR-NEXT: v_writelane_b32 v1, s90, 51
-; FLATSCR-NEXT: v_writelane_b32 v1, s91, 52
-; FLATSCR-NEXT: v_writelane_b32 v1, s92, 53
-; FLATSCR-NEXT: v_writelane_b32 v1, s93, 54
-; FLATSCR-NEXT: v_writelane_b32 v1, s94, 55
-; FLATSCR-NEXT: v_writelane_b32 v1, s95, 56
-; FLATSCR-NEXT: v_writelane_b32 v1, s96, 57
-; FLATSCR-NEXT: v_writelane_b32 v1, s97, 58
-; FLATSCR-NEXT: v_writelane_b32 v1, s98, 59
-; FLATSCR-NEXT: v_writelane_b32 v1, s99, 60
-; FLATSCR-NEXT: v_writelane_b32 v1, s100, 61
+; FLATSCR-NEXT: v_writelane_b32 v1, s46, 0
+; FLATSCR-NEXT: v_writelane_b32 v1, s47, 1
+; FLATSCR-NEXT: v_writelane_b32 v1, s48, 2
+; FLATSCR-NEXT: v_writelane_b32 v1, s49, 3
+; FLATSCR-NEXT: v_writelane_b32 v1, s50, 4
+; FLATSCR-NEXT: v_writelane_b32 v1, s51, 5
+; FLATSCR-NEXT: v_writelane_b32 v1, s52, 6
+; FLATSCR-NEXT: v_writelane_b32 v1, s53, 7
+; FLATSCR-NEXT: v_writelane_b32 v1, s62, 8
+; FLATSCR-NEXT: v_writelane_b32 v1, s63, 9
+; FLATSCR-NEXT: v_writelane_b32 v1, s64, 10
+; FLATSCR-NEXT: v_writelane_b32 v1, s65, 11
+; FLATSCR-NEXT: v_writelane_b32 v1, s66, 12
+; FLATSCR-NEXT: v_writelane_b32 v1, s67, 13
+; FLATSCR-NEXT: v_writelane_b32 v1, s68, 14
+; FLATSCR-NEXT: v_writelane_b32 v1, s69, 15
+; FLATSCR-NEXT: v_writelane_b32 v1, s78, 16
+; FLATSCR-NEXT: v_writelane_b32 v1, s79, 17
+; FLATSCR-NEXT: v_writelane_b32 v1, s80, 18
+; FLATSCR-NEXT: v_writelane_b32 v1, s81, 19
+; FLATSCR-NEXT: v_writelane_b32 v1, s82, 20
+; FLATSCR-NEXT: v_writelane_b32 v1, s83, 21
+; FLATSCR-NEXT: v_writelane_b32 v1, s84, 22
+; FLATSCR-NEXT: v_writelane_b32 v1, s85, 23
+; FLATSCR-NEXT: v_writelane_b32 v1, s94, 24
+; FLATSCR-NEXT: v_writelane_b32 v1, s95, 25
+; FLATSCR-NEXT: v_writelane_b32 v1, s96, 26
+; FLATSCR-NEXT: v_writelane_b32 v1, s97, 27
+; FLATSCR-NEXT: v_writelane_b32 v1, s98, 28
+; FLATSCR-NEXT: v_writelane_b32 v1, s99, 29
+; FLATSCR-NEXT: v_writelane_b32 v1, s100, 30
; FLATSCR-NEXT: v_mov_b32_e32 v0, 0
; FLATSCR-NEXT: scratch_store_dword off, v41, s33 ; 4-byte Folded Spill
-; FLATSCR-NEXT: v_writelane_b32 v1, s101, 62
+; FLATSCR-NEXT: v_writelane_b32 v1, s101, 31
; FLATSCR-NEXT: scratch_store_dword off, v0, s33 offset:4
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ; clobber v41
; FLATSCR-NEXT: ;;#ASMEND
-; FLATSCR-NEXT: v_writelane_b32 v1, s102, 63
+; FLATSCR-NEXT: v_writelane_b32 v1, s102, 32
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: scratch_load_dword v41, off, s33 ; 4-byte Folded Reload
; FLATSCR-NEXT: s_add_i32 s32, s32, 16
-; FLATSCR-NEXT: v_readlane_b32 s102, v1, 63
-; FLATSCR-NEXT: v_readlane_b32 s101, v1, 62
-; FLATSCR-NEXT: v_readlane_b32 s100, v1, 61
-; FLATSCR-NEXT: v_readlane_b32 s99, v1, 60
-; FLATSCR-NEXT: v_readlane_b32 s98, v1, 59
-; FLATSCR-NEXT: v_readlane_b32 s97, v1, 58
-; FLATSCR-NEXT: v_readlane_b32 s96, v1, 57
-; FLATSCR-NEXT: v_readlane_b32 s95, v1, 56
-; FLATSCR-NEXT: v_readlane_b32 s94, v1, 55
-; FLATSCR-NEXT: v_readlane_b32 s93, v1, 54
-; FLATSCR-NEXT: v_readlane_b32 s92, v1, 53
-; FLATSCR-NEXT: v_readlane_b32 s91, v1, 52
-; FLATSCR-NEXT: v_readlane_b32 s90, v1, 51
-; FLATSCR-NEXT: v_readlane_b32 s89, v1, 50
-; FLATSCR-NEXT: v_readlane_b32 s88, v1, 49
-; FLATSCR-NEXT: v_readlane_b32 s87, v1, 48
-; FLATSCR-NEXT: v_readlane_b32 s86, v1, 47
-; FLATSCR-NEXT: v_readlane_b32 s85, v1, 46
-; FLATSCR-NEXT: v_readlane_b32 s84, v1, 45
-; FLATSCR-NEXT: v_readlane_b32 s83, v1, 44
-; FLATSCR-NEXT: v_readlane_b32 s82, v1, 43
-; FLATSCR-NEXT: v_readlane_b32 s81, v1, 42
-; FLATSCR-NEXT: v_readlane_b32 s80, v1, 41
-; FLATSCR-NEXT: v_readlane_b32 s79, v1, 40
-; FLATSCR-NEXT: v_readlane_b32 s78, v1, 39
-; FLATSCR-NEXT: v_readlane_b32 s77, v1, 38
-; FLATSCR-NEXT: v_readlane_b32 s76, v1, 37
-; FLATSCR-NEXT: v_readlane_b32 s75, v1, 36
-; FLATSCR-NEXT: v_readlane_b32 s74, v1, 35
-; FLATSCR-NEXT: v_readlane_b32 s73, v1, 34
-; FLATSCR-NEXT: v_readlane_b32 s72, v1, 33
-; FLATSCR-NEXT: v_readlane_b32 s71, v1, 32
-; FLATSCR-NEXT: v_readlane_b32 s70, v1, 31
-; FLATSCR-NEXT: v_readlane_b32 s69, v1, 30
-; FLATSCR-NEXT: v_readlane_b32 s68, v1, 29
-; FLATSCR-NEXT: v_readlane_b32 s67, v1, 28
-; FLATSCR-NEXT: v_readlane_b32 s66, v1, 27
-; FLATSCR-NEXT: v_readlane_b32 s65, v1, 26
-; FLATSCR-NEXT: v_readlane_b32 s64, v1, 25
-; FLATSCR-NEXT: v_readlane_b32 s63, v1, 24
-; FLATSCR-NEXT: v_readlane_b32 s62, v1, 23
-; FLATSCR-NEXT: v_readlane_b32 s61, v1, 22
-; FLATSCR-NEXT: v_readlane_b32 s60, v1, 21
-; FLATSCR-NEXT: v_readlane_b32 s59, v1, 20
-; FLATSCR-NEXT: v_readlane_b32 s58, v1, 19
-; FLATSCR-NEXT: v_readlane_b32 s57, v1, 18
-; FLATSCR-NEXT: v_readlane_b32 s56, v1, 17
-; FLATSCR-NEXT: v_readlane_b32 s55, v1, 16
-; FLATSCR-NEXT: v_readlane_b32 s54, v1, 15
-; FLATSCR-NEXT: v_readlane_b32 s53, v1, 14
-; FLATSCR-NEXT: v_readlane_b32 s52, v1, 13
-; FLATSCR-NEXT: v_readlane_b32 s51, v1, 12
-; FLATSCR-NEXT: v_readlane_b32 s50, v1, 11
-; FLATSCR-NEXT: v_readlane_b32 s49, v1, 10
-; FLATSCR-NEXT: v_readlane_b32 s48, v1, 9
-; FLATSCR-NEXT: v_readlane_b32 s47, v1, 8
-; FLATSCR-NEXT: v_readlane_b32 s46, v1, 7
-; FLATSCR-NEXT: v_readlane_b32 s45, v1, 6
-; FLATSCR-NEXT: v_readlane_b32 s44, v1, 5
-; FLATSCR-NEXT: v_readlane_b32 s43, v1, 4
-; FLATSCR-NEXT: v_readlane_b32 s42, v1, 3
-; FLATSCR-NEXT: v_readlane_b32 s41, v1, 2
-; FLATSCR-NEXT: v_readlane_b32 s40, v1, 1
-; FLATSCR-NEXT: v_readlane_b32 s39, v1, 0
+; FLATSCR-NEXT: v_readlane_b32 s102, v1, 32
+; FLATSCR-NEXT: v_readlane_b32 s101, v1, 31
+; FLATSCR-NEXT: v_readlane_b32 s100, v1, 30
+; FLATSCR-NEXT: v_readlane_b32 s99, v1, 29
+; FLATSCR-NEXT: v_readlane_b32 s98, v1, 28
+; FLATSCR-NEXT: v_readlane_b32 s97, v1, 27
+; FLATSCR-NEXT: v_readlane_b32 s96, v1, 26
+; FLATSCR-NEXT: v_readlane_b32 s95, v1, 25
+; FLATSCR-NEXT: v_readlane_b32 s94, v1, 24
+; FLATSCR-NEXT: v_readlane_b32 s85, v1, 23
+; FLATSCR-NEXT: v_readlane_b32 s84, v1, 22
+; FLATSCR-NEXT: v_readlane_b32 s83, v1, 21
+; FLATSCR-NEXT: v_readlane_b32 s82, v1, 20
+; FLATSCR-NEXT: v_readlane_b32 s81, v1, 19
+; FLATSCR-NEXT: v_readlane_b32 s80, v1, 18
+; FLATSCR-NEXT: v_readlane_b32 s79, v1, 17
+; FLATSCR-NEXT: v_readlane_b32 s78, v1, 16
+; FLATSCR-NEXT: v_readlane_b32 s69, v1, 15
+; FLATSCR-NEXT: v_readlane_b32 s68, v1, 14
+; FLATSCR-NEXT: v_readlane_b32 s67, v1, 13
+; FLATSCR-NEXT: v_readlane_b32 s66, v1, 12
+; FLATSCR-NEXT: v_readlane_b32 s65, v1, 11
+; FLATSCR-NEXT: v_readlane_b32 s64, v1, 10
+; FLATSCR-NEXT: v_readlane_b32 s63, v1, 9
+; FLATSCR-NEXT: v_readlane_b32 s62, v1, 8
+; FLATSCR-NEXT: v_readlane_b32 s53, v1, 7
+; FLATSCR-NEXT: v_readlane_b32 s52, v1, 6
+; FLATSCR-NEXT: v_readlane_b32 s51, v1, 5
+; FLATSCR-NEXT: v_readlane_b32 s50, v1, 4
+; FLATSCR-NEXT: v_readlane_b32 s49, v1, 3
+; FLATSCR-NEXT: v_readlane_b32 s48, v1, 2
+; FLATSCR-NEXT: v_readlane_b32 s47, v1, 1
+; FLATSCR-NEXT: v_readlane_b32 s46, v1, 0
; FLATSCR-NEXT: s_mov_b32 s32, s33
; FLATSCR-NEXT: s_xor_saveexec_b64 s[2:3], -1
; FLATSCR-NEXT: scratch_load_dword v1, off, s33 offset:8 ; 4-byte Folded Reload
@@ -1346,7 +980,7 @@ define void @no_unused_non_csr_sgpr_for_fp() #1 {
; MUBUF-LABEL: no_unused_non_csr_sgpr_for_fp:
; MUBUF: ; %bb.0:
; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; MUBUF-NEXT: s_mov_b32 vcc_lo, s33
+; MUBUF-NEXT: s_mov_b32 s38, s33
; MUBUF-NEXT: s_mov_b32 s33, s32
; MUBUF-NEXT: s_xor_saveexec_b64 s[4:5], -1
; MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
@@ -1365,14 +999,14 @@ define void @no_unused_non_csr_sgpr_for_fp() #1 {
; MUBUF-NEXT: s_xor_saveexec_b64 s[4:5], -1
; MUBUF-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; MUBUF-NEXT: s_mov_b64 exec, s[4:5]
-; MUBUF-NEXT: s_mov_b32 s33, vcc_lo
+; MUBUF-NEXT: s_mov_b32 s33, s38
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: s_setpc_b64 s[30:31]
;
; FLATSCR-LABEL: no_unused_non_csr_sgpr_for_fp:
; FLATSCR: ; %bb.0:
; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; FLATSCR-NEXT: s_mov_b32 vcc_lo, s33
+; FLATSCR-NEXT: s_mov_b32 s38, s33
; FLATSCR-NEXT: s_mov_b32 s33, s32
; FLATSCR-NEXT: s_xor_saveexec_b64 s[0:1], -1
; FLATSCR-NEXT: scratch_store_dword off, v1, s33 offset:4 ; 4-byte Folded Spill
@@ -1391,7 +1025,7 @@ define void @no_unused_non_csr_sgpr_for_fp() #1 {
; FLATSCR-NEXT: s_xor_saveexec_b64 s[0:1], -1
; FLATSCR-NEXT: scratch_load_dword v1, off, s33 offset:4 ; 4-byte Folded Reload
; FLATSCR-NEXT: s_mov_b64 exec, s[0:1]
-; FLATSCR-NEXT: s_mov_b32 s33, vcc_lo
+; FLATSCR-NEXT: s_mov_b32 s33, s38
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: s_setpc_b64 s[30:31]
%alloca = alloca i32, addrspace(5)
@@ -1412,7 +1046,7 @@ define void @no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr() #1 {
; MUBUF-LABEL: no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr:
; MUBUF: ; %bb.0:
; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; MUBUF-NEXT: s_mov_b32 vcc_lo, s33
+; MUBUF-NEXT: s_mov_b32 s38, s33
; MUBUF-NEXT: s_mov_b32 s33, s32
; MUBUF-NEXT: s_or_saveexec_b64 s[4:5], -1
; MUBUF-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
@@ -1434,14 +1068,14 @@ define void @no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr() #1 {
; MUBUF-NEXT: s_or_saveexec_b64 s[4:5], -1
; MUBUF-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; MUBUF-NEXT: s_mov_b64 exec, s[4:5]
-; MUBUF-NEXT: s_mov_b32 s33, vcc_lo
+; MUBUF-NEXT: s_mov_b32 s33, s38
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: s_setpc_b64 s[30:31]
;
; FLATSCR-LABEL: no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr:
; FLATSCR: ; %bb.0:
; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; FLATSCR-NEXT: s_mov_b32 vcc_lo, s33
+; FLATSCR-NEXT: s_mov_b32 s38, s33
; FLATSCR-NEXT: s_mov_b32 s33, s32
; FLATSCR-NEXT: s_or_saveexec_b64 s[0:1], -1
; FLATSCR-NEXT: scratch_store_dword off, v40, s33 offset:4 ; 4-byte Folded Spill
@@ -1463,7 +1097,7 @@ define void @no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr() #1 {
; FLATSCR-NEXT: s_or_saveexec_b64 s[0:1], -1
; FLATSCR-NEXT: scratch_load_dword v40, off, s33 offset:4 ; 4-byte Folded Reload
; FLATSCR-NEXT: s_mov_b64 exec, s[0:1]
-; FLATSCR-NEXT: s_mov_b32 s33, vcc_lo
+; FLATSCR-NEXT: s_mov_b32 s33, s38
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: s_setpc_b64 s[30:31]
%alloca = alloca i32, addrspace(5)
@@ -1491,7 +1125,7 @@ define void @scratch_reg_needed_mubuf_offset(ptr addrspace(5) byval([4096 x i8])
; MUBUF-LABEL: scratch_reg_needed_mubuf_offset:
; MUBUF: ; %bb.0:
; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; MUBUF-NEXT: s_mov_b32 vcc_lo, s33
+; MUBUF-NEXT: s_mov_b32 s38, s33
; MUBUF-NEXT: s_mov_b32 s33, s32
; MUBUF-NEXT: s_or_saveexec_b64 s[4:5], -1
; MUBUF-NEXT: s_add_i32 s6, s33, 0x40100
@@ -1517,14 +1151,14 @@ define void @scratch_reg_needed_mubuf_offset(ptr addrspace(5) byval([4096 x i8])
; MUBUF-NEXT: s_add_i32 s6, s33, 0x40100
; MUBUF-NEXT: buffer_load_dword v40, off, s[0:3], s6 ; 4-byte Folded Reload
; MUBUF-NEXT: s_mov_b64 exec, s[4:5]
-; MUBUF-NEXT: s_mov_b32 s33, vcc_lo
+; MUBUF-NEXT: s_mov_b32 s33, s38
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: s_setpc_b64 s[30:31]
;
; FLATSCR-LABEL: scratch_reg_needed_mubuf_offset:
; FLATSCR: ; %bb.0:
; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; FLATSCR-NEXT: s_mov_b32 vcc_lo, s33
+; FLATSCR-NEXT: s_mov_b32 s38, s33
; FLATSCR-NEXT: s_mov_b32 s33, s32
; FLATSCR-NEXT: s_or_saveexec_b64 s[0:1], -1
; FLATSCR-NEXT: s_add_i32 s2, s33, 0x1004
@@ -1550,7 +1184,7 @@ define void @scratch_reg_needed_mubuf_offset(ptr addrspace(5) byval([4096 x i8])
; FLATSCR-NEXT: s_add_i32 s2, s33, 0x1004
; FLATSCR-NEXT: scratch_load_dword v40, off, s2 ; 4-byte Folded Reload
; FLATSCR-NEXT: s_mov_b64 exec, s[0:1]
-; FLATSCR-NEXT: s_mov_b32 s33, vcc_lo
+; FLATSCR-NEXT: s_mov_b32 s33, s38
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: s_setpc_b64 s[30:31]
%alloca = alloca i32, addrspace(5)
@@ -1650,22 +1284,15 @@ define void @callee_need_to_spill_fp_to_memory() #3 {
; MUBUF-LABEL: callee_need_to_spill_fp_to_memory:
; MUBUF: ; %bb.0:
; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; MUBUF-NEXT: s_mov_b32 s4, s33
+; MUBUF-NEXT: s_mov_b32 s38, s33
; MUBUF-NEXT: s_mov_b32 s33, s32
-; MUBUF-NEXT: v_mov_b32_e32 v0, s4
-; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s33 ; 4-byte Folded Spill
; MUBUF-NEXT: ;;#ASMSTART
; MUBUF-NEXT: ; clobber nonpreserved SGPRs
; MUBUF-NEXT: ;;#ASMEND
; MUBUF-NEXT: ;;#ASMSTART
; MUBUF-NEXT: ; clobber all VGPRs
; MUBUF-NEXT: ;;#ASMEND
-; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], s33 ; 4-byte Folded Reload
-; MUBUF-NEXT: s_addk_i32 s32, 0x200
-; MUBUF-NEXT: s_mov_b32 s32, s33
-; MUBUF-NEXT: s_waitcnt vmcnt(0)
-; MUBUF-NEXT: v_readfirstlane_b32 s4, v0
-; MUBUF-NEXT: s_mov_b32 s33, s4
+; MUBUF-NEXT: s_mov_b32 s33, s38
; MUBUF-NEXT: s_setpc_b64 s[30:31]
;
; FLATSCR-LABEL: callee_need_to_spill_fp_to_memory:
@@ -1702,156 +1329,89 @@ define void @callee_need_to_spill_fp_to_memory_full_reserved_vgpr() #3 {
; MUBUF-LABEL: callee_need_to_spill_fp_to_memory_full_reserved_vgpr:
; MUBUF: ; %bb.0:
; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; MUBUF-NEXT: s_mov_b32 s4, s33
+; MUBUF-NEXT: s_mov_b32 s38, s33
; MUBUF-NEXT: s_mov_b32 s33, s32
-; MUBUF-NEXT: s_xor_saveexec_b64 s[6:7], -1
+; MUBUF-NEXT: s_xor_saveexec_b64 s[4:5], -1
; MUBUF-NEXT: buffer_store_dword v39, off, s[0:3], s33 ; 4-byte Folded Spill
-; MUBUF-NEXT: s_mov_b64 exec, s[6:7]
-; MUBUF-NEXT: v_writelane_b32 v39, s39, 0
-; MUBUF-NEXT: v_writelane_b32 v39, s40, 1
-; MUBUF-NEXT: v_writelane_b32 v39, s41, 2
-; MUBUF-NEXT: v_writelane_b32 v39, s42, 3
-; MUBUF-NEXT: v_writelane_b32 v39, s43, 4
-; MUBUF-NEXT: v_writelane_b32 v39, s44, 5
-; MUBUF-NEXT: v_writelane_b32 v39, s45, 6
-; MUBUF-NEXT: v_writelane_b32 v39, s46, 7
-; MUBUF-NEXT: v_writelane_b32 v39, s47, 8
-; MUBUF-NEXT: v_writelane_b32 v39, s48, 9
-; MUBUF-NEXT: v_writelane_b32 v39, s49, 10
-; MUBUF-NEXT: v_writelane_b32 v39, s50, 11
-; MUBUF-NEXT: v_writelane_b32 v39, s51, 12
-; MUBUF-NEXT: v_writelane_b32 v39, s52, 13
-; MUBUF-NEXT: v_writelane_b32 v39, s53, 14
-; MUBUF-NEXT: v_writelane_b32 v39, s54, 15
-; MUBUF-NEXT: v_writelane_b32 v39, s55, 16
-; MUBUF-NEXT: v_writelane_b32 v39, s56, 17
-; MUBUF-NEXT: v_writelane_b32 v39, s57, 18
-; MUBUF-NEXT: v_writelane_b32 v39, s58, 19
-; MUBUF-NEXT: v_writelane_b32 v39, s59, 20
-; MUBUF-NEXT: v_writelane_b32 v39, s60, 21
-; MUBUF-NEXT: v_writelane_b32 v39, s61, 22
-; MUBUF-NEXT: v_writelane_b32 v39, s62, 23
-; MUBUF-NEXT: v_writelane_b32 v39, s63, 24
-; MUBUF-NEXT: v_writelane_b32 v39, s64, 25
-; MUBUF-NEXT: v_writelane_b32 v39, s65, 26
-; MUBUF-NEXT: v_writelane_b32 v39, s66, 27
-; MUBUF-NEXT: v_writelane_b32 v39, s67, 28
-; MUBUF-NEXT: v_writelane_b32 v39, s68, 29
-; MUBUF-NEXT: v_writelane_b32 v39, s69, 30
-; MUBUF-NEXT: v_writelane_b32 v39, s70, 31
-; MUBUF-NEXT: v_writelane_b32 v39, s71, 32
-; MUBUF-NEXT: v_writelane_b32 v39, s72, 33
-; MUBUF-NEXT: v_writelane_b32 v39, s73, 34
-; MUBUF-NEXT: v_writelane_b32 v39, s74, 35
-; MUBUF-NEXT: v_writelane_b32 v39, s75, 36
-; MUBUF-NEXT: v_writelane_b32 v39, s76, 37
-; MUBUF-NEXT: v_writelane_b32 v39, s77, 38
-; MUBUF-NEXT: v_writelane_b32 v39, s78, 39
-; MUBUF-NEXT: v_writelane_b32 v39, s79, 40
-; MUBUF-NEXT: v_writelane_b32 v39, s80, 41
-; MUBUF-NEXT: v_writelane_b32 v39, s81, 42
-; MUBUF-NEXT: v_writelane_b32 v39, s82, 43
-; MUBUF-NEXT: v_writelane_b32 v39, s83, 44
-; MUBUF-NEXT: v_writelane_b32 v39, s84, 45
-; MUBUF-NEXT: v_writelane_b32 v39, s85, 46
-; MUBUF-NEXT: v_writelane_b32 v39, s86, 47
-; MUBUF-NEXT: v_writelane_b32 v39, s87, 48
-; MUBUF-NEXT: v_writelane_b32 v39, s88, 49
-; MUBUF-NEXT: v_writelane_b32 v39, s89, 50
-; MUBUF-NEXT: v_writelane_b32 v39, s90, 51
-; MUBUF-NEXT: v_writelane_b32 v39, s91, 52
-; MUBUF-NEXT: v_writelane_b32 v39, s92, 53
-; MUBUF-NEXT: v_writelane_b32 v39, s93, 54
-; MUBUF-NEXT: v_writelane_b32 v39, s94, 55
-; MUBUF-NEXT: v_writelane_b32 v39, s95, 56
-; MUBUF-NEXT: v_writelane_b32 v39, s96, 57
-; MUBUF-NEXT: v_writelane_b32 v39, s97, 58
-; MUBUF-NEXT: v_writelane_b32 v39, s98, 59
-; MUBUF-NEXT: v_writelane_b32 v39, s99, 60
-; MUBUF-NEXT: v_writelane_b32 v39, s100, 61
-; MUBUF-NEXT: v_mov_b32_e32 v0, s4
-; MUBUF-NEXT: v_writelane_b32 v39, s101, 62
-; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
-; MUBUF-NEXT: v_writelane_b32 v39, s102, 63
+; MUBUF-NEXT: s_mov_b64 exec, s[4:5]
+; MUBUF-NEXT: v_writelane_b32 v39, s46, 0
+; MUBUF-NEXT: v_writelane_b32 v39, s47, 1
+; MUBUF-NEXT: v_writelane_b32 v39, s48, 2
+; MUBUF-NEXT: v_writelane_b32 v39, s49, 3
+; MUBUF-NEXT: v_writelane_b32 v39, s50, 4
+; MUBUF-NEXT: v_writelane_b32 v39, s51, 5
+; MUBUF-NEXT: v_writelane_b32 v39, s52, 6
+; MUBUF-NEXT: v_writelane_b32 v39, s53, 7
+; MUBUF-NEXT: v_writelane_b32 v39, s62, 8
+; MUBUF-NEXT: v_writelane_b32 v39, s63, 9
+; MUBUF-NEXT: v_writelane_b32 v39, s64, 10
+; MUBUF-NEXT: v_writelane_b32 v39, s65, 11
+; MUBUF-NEXT: v_writelane_b32 v39, s66, 12
+; MUBUF-NEXT: v_writelane_b32 v39, s67, 13
+; MUBUF-NEXT: v_writelane_b32 v39, s68, 14
+; MUBUF-NEXT: v_writelane_b32 v39, s69, 15
+; MUBUF-NEXT: v_writelane_b32 v39, s78, 16
+; MUBUF-NEXT: v_writelane_b32 v39, s79, 17
+; MUBUF-NEXT: v_writelane_b32 v39, s80, 18
+; MUBUF-NEXT: v_writelane_b32 v39, s81, 19
+; MUBUF-NEXT: v_writelane_b32 v39, s82, 20
+; MUBUF-NEXT: v_writelane_b32 v39, s83, 21
+; MUBUF-NEXT: v_writelane_b32 v39, s84, 22
+; MUBUF-NEXT: v_writelane_b32 v39, s85, 23
+; MUBUF-NEXT: v_writelane_b32 v39, s94, 24
+; MUBUF-NEXT: v_writelane_b32 v39, s95, 25
+; MUBUF-NEXT: v_writelane_b32 v39, s96, 26
+; MUBUF-NEXT: v_writelane_b32 v39, s97, 27
+; MUBUF-NEXT: v_writelane_b32 v39, s98, 28
+; MUBUF-NEXT: v_writelane_b32 v39, s99, 29
+; MUBUF-NEXT: v_writelane_b32 v39, s100, 30
+; MUBUF-NEXT: v_writelane_b32 v39, s101, 31
+; MUBUF-NEXT: s_addk_i32 s32, 0x200
+; MUBUF-NEXT: v_writelane_b32 v39, s102, 32
; MUBUF-NEXT: ;;#ASMSTART
; MUBUF-NEXT: ; clobber nonpreserved SGPRs and 64 CSRs
; MUBUF-NEXT: ;;#ASMEND
; MUBUF-NEXT: ;;#ASMSTART
; MUBUF-NEXT: ; clobber all VGPRs except CSR v40
; MUBUF-NEXT: ;;#ASMEND
-; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
-; MUBUF-NEXT: s_addk_i32 s32, 0x300
-; MUBUF-NEXT: v_readlane_b32 s102, v39, 63
-; MUBUF-NEXT: v_readlane_b32 s101, v39, 62
-; MUBUF-NEXT: v_readlane_b32 s100, v39, 61
-; MUBUF-NEXT: v_readlane_b32 s99, v39, 60
-; MUBUF-NEXT: v_readlane_b32 s98, v39, 59
-; MUBUF-NEXT: v_readlane_b32 s97, v39, 58
-; MUBUF-NEXT: v_readlane_b32 s96, v39, 57
-; MUBUF-NEXT: v_readlane_b32 s95, v39, 56
-; MUBUF-NEXT: v_readlane_b32 s94, v39, 55
-; MUBUF-NEXT: v_readlane_b32 s93, v39, 54
-; MUBUF-NEXT: v_readlane_b32 s92, v39, 53
-; MUBUF-NEXT: v_readlane_b32 s91, v39, 52
-; MUBUF-NEXT: v_readlane_b32 s90, v39, 51
-; MUBUF-NEXT: v_readlane_b32 s89, v39, 50
-; MUBUF-NEXT: v_readlane_b32 s88, v39, 49
-; MUBUF-NEXT: v_readlane_b32 s87, v39, 48
-; MUBUF-NEXT: v_readlane_b32 s86, v39, 47
-; MUBUF-NEXT: v_readlane_b32 s85, v39, 46
-; MUBUF-NEXT: v_readlane_b32 s84, v39, 45
-; MUBUF-NEXT: v_readlane_b32 s83, v39, 44
-; MUBUF-NEXT: v_readlane_b32 s82, v39, 43
-; MUBUF-NEXT: v_readlane_b32 s81, v39, 42
-; MUBUF-NEXT: v_readlane_b32 s80, v39, 41
-; MUBUF-NEXT: v_readlane_b32 s79, v39, 40
-; MUBUF-NEXT: v_readlane_b32 s78, v39, 39
-; MUBUF-NEXT: v_readlane_b32 s77, v39, 38
-; MUBUF-NEXT: v_readlane_b32 s76, v39, 37
-; MUBUF-NEXT: v_readlane_b32 s75, v39, 36
-; MUBUF-NEXT: v_readlane_b32 s74, v39, 35
-; MUBUF-NEXT: v_readlane_b32 s73, v39, 34
-; MUBUF-NEXT: v_readlane_b32 s72, v39, 33
-; MUBUF-NEXT: v_readlane_b32 s71, v39, 32
-; MUBUF-NEXT: v_readlane_b32 s70, v39, 31
-; MUBUF-NEXT: v_readlane_b32 s69, v39, 30
-; MUBUF-NEXT: v_readlane_b32 s68, v39, 29
-; MUBUF-NEXT: v_readlane_b32 s67, v39, 28
-; MUBUF-NEXT: v_readlane_b32 s66, v39, 27
-; MUBUF-NEXT: v_readlane_b32 s65, v39, 26
-; MUBUF-NEXT: v_readlane_b32 s64, v39, 25
-; MUBUF-NEXT: v_readlane_b32 s63, v39, 24
-; MUBUF-NEXT: v_readlane_b32 s62, v39, 23
-; MUBUF-NEXT: v_readlane_b32 s61, v39, 22
-; MUBUF-NEXT: v_readlane_b32 s60, v39, 21
-; MUBUF-NEXT: v_readlane_b32 s59, v39, 20
-; MUBUF-NEXT: v_readlane_b32 s58, v39, 19
-; MUBUF-NEXT: v_readlane_b32 s57, v39, 18
-; MUBUF-NEXT: v_readlane_b32 s56, v39, 17
-; MUBUF-NEXT: v_readlane_b32 s55, v39, 16
-; MUBUF-NEXT: v_readlane_b32 s54, v39, 15
-; MUBUF-NEXT: v_readlane_b32 s53, v39, 14
-; MUBUF-NEXT: v_readlane_b32 s52, v39, 13
-; MUBUF-NEXT: v_readlane_b32 s51, v39, 12
-; MUBUF-NEXT: v_readlane_b32 s50, v39, 11
-; MUBUF-NEXT: v_readlane_b32 s49, v39, 10
-; MUBUF-NEXT: v_readlane_b32 s48, v39, 9
-; MUBUF-NEXT: v_readlane_b32 s47, v39, 8
-; MUBUF-NEXT: v_readlane_b32 s46, v39, 7
-; MUBUF-NEXT: v_readlane_b32 s45, v39, 6
-; MUBUF-NEXT: v_readlane_b32 s44, v39, 5
-; MUBUF-NEXT: v_readlane_b32 s43, v39, 4
-; MUBUF-NEXT: v_readlane_b32 s42, v39, 3
-; MUBUF-NEXT: v_readlane_b32 s41, v39, 2
-; MUBUF-NEXT: v_readlane_b32 s40, v39, 1
-; MUBUF-NEXT: v_readlane_b32 s39, v39, 0
+; MUBUF-NEXT: v_readlane_b32 s102, v39, 32
+; MUBUF-NEXT: v_readlane_b32 s101, v39, 31
+; MUBUF-NEXT: v_readlane_b32 s100, v39, 30
+; MUBUF-NEXT: v_readlane_b32 s99, v39, 29
+; MUBUF-NEXT: v_readlane_b32 s98, v39, 28
+; MUBUF-NEXT: v_readlane_b32 s97, v39, 27
+; MUBUF-NEXT: v_readlane_b32 s96, v39, 26
+; MUBUF-NEXT: v_readlane_b32 s95, v39, 25
+; MUBUF-NEXT: v_readlane_b32 s94, v39, 24
+; MUBUF-NEXT: v_readlane_b32 s85, v39, 23
+; MUBUF-NEXT: v_readlane_b32 s84, v39, 22
+; MUBUF-NEXT: v_readlane_b32 s83, v39, 21
+; MUBUF-NEXT: v_readlane_b32 s82, v39, 20
+; MUBUF-NEXT: v_readlane_b32 s81, v39, 19
+; MUBUF-NEXT: v_readlane_b32 s80, v39, 18
+; MUBUF-NEXT: v_readlane_b32 s79, v39, 17
+; MUBUF-NEXT: v_readlane_b32 s78, v39, 16
+; MUBUF-NEXT: v_readlane_b32 s69, v39, 15
+; MUBUF-NEXT: v_readlane_b32 s68, v39, 14
+; MUBUF-NEXT: v_readlane_b32 s67, v39, 13
+; MUBUF-NEXT: v_readlane_b32 s66, v39, 12
+; MUBUF-NEXT: v_readlane_b32 s65, v39, 11
+; MUBUF-NEXT: v_readlane_b32 s64, v39, 10
+; MUBUF-NEXT: v_readlane_b32 s63, v39, 9
+; MUBUF-NEXT: v_readlane_b32 s62, v39, 8
+; MUBUF-NEXT: v_readlane_b32 s53, v39, 7
+; MUBUF-NEXT: v_readlane_b32 s52, v39, 6
+; MUBUF-NEXT: v_readlane_b32 s51, v39, 5
+; MUBUF-NEXT: v_readlane_b32 s50, v39, 4
+; MUBUF-NEXT: v_readlane_b32 s49, v39, 3
+; MUBUF-NEXT: v_readlane_b32 s48, v39, 2
+; MUBUF-NEXT: v_readlane_b32 s47, v39, 1
+; MUBUF-NEXT: v_readlane_b32 s46, v39, 0
; MUBUF-NEXT: s_mov_b32 s32, s33
-; MUBUF-NEXT: s_waitcnt vmcnt(0)
-; MUBUF-NEXT: v_readfirstlane_b32 s4, v0
-; MUBUF-NEXT: s_xor_saveexec_b64 s[6:7], -1
+; MUBUF-NEXT: s_xor_saveexec_b64 s[4:5], -1
; MUBUF-NEXT: buffer_load_dword v39, off, s[0:3], s33 ; 4-byte Folded Reload
-; MUBUF-NEXT: s_mov_b64 exec, s[6:7]
-; MUBUF-NEXT: s_mov_b32 s33, s4
+; MUBUF-NEXT: s_mov_b64 exec, s[4:5]
+; MUBUF-NEXT: s_mov_b32 s33, s38
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: s_setpc_b64 s[30:31]
;
@@ -1863,141 +1423,79 @@ define void @callee_need_to_spill_fp_to_memory_full_reserved_vgpr() #3 {
; FLATSCR-NEXT: s_xor_saveexec_b64 s[2:3], -1
; FLATSCR-NEXT: scratch_store_dword off, v39, s33 ; 4-byte Folded Spill
; FLATSCR-NEXT: s_mov_b64 exec, s[2:3]
-; FLATSCR-NEXT: v_writelane_b32 v39, s39, 0
-; FLATSCR-NEXT: v_writelane_b32 v39, s40, 1
-; FLATSCR-NEXT: v_writelane_b32 v39, s41, 2
-; FLATSCR-NEXT: v_writelane_b32 v39, s42, 3
-; FLATSCR-NEXT: v_writelane_b32 v39, s43, 4
-; FLATSCR-NEXT: v_writelane_b32 v39, s44, 5
-; FLATSCR-NEXT: v_writelane_b32 v39, s45, 6
-; FLATSCR-NEXT: v_writelane_b32 v39, s46, 7
-; FLATSCR-NEXT: v_writelane_b32 v39, s47, 8
-; FLATSCR-NEXT: v_writelane_b32 v39, s48, 9
-; FLATSCR-NEXT: v_writelane_b32 v39, s49, 10
-; FLATSCR-NEXT: v_writelane_b32 v39, s50, 11
-; FLATSCR-NEXT: v_writelane_b32 v39, s51, 12
-; FLATSCR-NEXT: v_writelane_b32 v39, s52, 13
-; FLATSCR-NEXT: v_writelane_b32 v39, s53, 14
-; FLATSCR-NEXT: v_writelane_b32 v39, s54, 15
-; FLATSCR-NEXT: v_writelane_b32 v39, s55, 16
-; FLATSCR-NEXT: v_writelane_b32 v39, s56, 17
-; FLATSCR-NEXT: v_writelane_b32 v39, s57, 18
-; FLATSCR-NEXT: v_writelane_b32 v39, s58, 19
-; FLATSCR-NEXT: v_writelane_b32 v39, s59, 20
-; FLATSCR-NEXT: v_writelane_b32 v39, s60, 21
-; FLATSCR-NEXT: v_writelane_b32 v39, s61, 22
-; FLATSCR-NEXT: v_writelane_b32 v39, s62, 23
-; FLATSCR-NEXT: v_writelane_b32 v39, s63, 24
-; FLATSCR-NEXT: v_writelane_b32 v39, s64, 25
-; FLATSCR-NEXT: v_writelane_b32 v39, s65, 26
-; FLATSCR-NEXT: v_writelane_b32 v39, s66, 27
-; FLATSCR-NEXT: v_writelane_b32 v39, s67, 28
-; FLATSCR-NEXT: v_writelane_b32 v39, s68, 29
-; FLATSCR-NEXT: v_writelane_b32 v39, s69, 30
-; FLATSCR-NEXT: v_writelane_b32 v39, s70, 31
-; FLATSCR-NEXT: v_writelane_b32 v39, s71, 32
-; FLATSCR-NEXT: v_writelane_b32 v39, s72, 33
-; FLATSCR-NEXT: v_writelane_b32 v39, s73, 34
-; FLATSCR-NEXT: v_writelane_b32 v39, s74, 35
-; FLATSCR-NEXT: v_writelane_b32 v39, s75, 36
-; FLATSCR-NEXT: v_writelane_b32 v39, s76, 37
-; FLATSCR-NEXT: v_writelane_b32 v39, s77, 38
-; FLATSCR-NEXT: v_writelane_b32 v39, s78, 39
-; FLATSCR-NEXT: v_writelane_b32 v39, s79, 40
-; FLATSCR-NEXT: v_writelane_b32 v39, s80, 41
-; FLATSCR-NEXT: v_writelane_b32 v39, s81, 42
-; FLATSCR-NEXT: v_writelane_b32 v39, s82, 43
-; FLATSCR-NEXT: v_writelane_b32 v39, s83, 44
-; FLATSCR-NEXT: v_writelane_b32 v39, s84, 45
-; FLATSCR-NEXT: v_writelane_b32 v39, s85, 46
-; FLATSCR-NEXT: v_writelane_b32 v39, s86, 47
-; FLATSCR-NEXT: v_writelane_b32 v39, s87, 48
-; FLATSCR-NEXT: v_writelane_b32 v39, s88, 49
-; FLATSCR-NEXT: v_writelane_b32 v39, s89, 50
-; FLATSCR-NEXT: v_writelane_b32 v39, s90, 51
-; FLATSCR-NEXT: v_writelane_b32 v39, s91, 52
-; FLATSCR-NEXT: v_writelane_b32 v39, s92, 53
-; FLATSCR-NEXT: v_writelane_b32 v39, s93, 54
-; FLATSCR-NEXT: v_writelane_b32 v39, s94, 55
-; FLATSCR-NEXT: v_writelane_b32 v39, s95, 56
-; FLATSCR-NEXT: v_writelane_b32 v39, s96, 57
-; FLATSCR-NEXT: v_writelane_b32 v39, s97, 58
-; FLATSCR-NEXT: v_writelane_b32 v39, s98, 59
-; FLATSCR-NEXT: v_writelane_b32 v39, s99, 60
-; FLATSCR-NEXT: v_writelane_b32 v39, s100, 61
-; FLATSCR-NEXT: v_writelane_b32 v39, s101, 62
+; FLATSCR-NEXT: v_writelane_b32 v39, s46, 0
+; FLATSCR-NEXT: v_writelane_b32 v39, s47, 1
+; FLATSCR-NEXT: v_writelane_b32 v39, s48, 2
+; FLATSCR-NEXT: v_writelane_b32 v39, s49, 3
+; FLATSCR-NEXT: v_writelane_b32 v39, s50, 4
+; FLATSCR-NEXT: v_writelane_b32 v39, s51, 5
+; FLATSCR-NEXT: v_writelane_b32 v39, s52, 6
+; FLATSCR-NEXT: v_writelane_b32 v39, s53, 7
+; FLATSCR-NEXT: v_writelane_b32 v39, s62, 8
+; FLATSCR-NEXT: v_writelane_b32 v39, s63, 9
+; FLATSCR-NEXT: v_writelane_b32 v39, s64, 10
+; FLATSCR-NEXT: v_writelane_b32 v39, s65, 11
+; FLATSCR-NEXT: v_writelane_b32 v39, s66, 12
+; FLATSCR-NEXT: v_writelane_b32 v39, s67, 13
+; FLATSCR-NEXT: v_writelane_b32 v39, s68, 14
+; FLATSCR-NEXT: v_writelane_b32 v39, s69, 15
+; FLATSCR-NEXT: v_writelane_b32 v39, s78, 16
+; FLATSCR-NEXT: v_writelane_b32 v39, s79, 17
+; FLATSCR-NEXT: v_writelane_b32 v39, s80, 18
+; FLATSCR-NEXT: v_writelane_b32 v39, s81, 19
+; FLATSCR-NEXT: v_writelane_b32 v39, s82, 20
+; FLATSCR-NEXT: v_writelane_b32 v39, s83, 21
+; FLATSCR-NEXT: v_writelane_b32 v39, s84, 22
+; FLATSCR-NEXT: v_writelane_b32 v39, s85, 23
+; FLATSCR-NEXT: v_writelane_b32 v39, s94, 24
+; FLATSCR-NEXT: v_writelane_b32 v39, s95, 25
+; FLATSCR-NEXT: v_writelane_b32 v39, s96, 26
+; FLATSCR-NEXT: v_writelane_b32 v39, s97, 27
+; FLATSCR-NEXT: v_writelane_b32 v39, s98, 28
+; FLATSCR-NEXT: v_writelane_b32 v39, s99, 29
+; FLATSCR-NEXT: v_writelane_b32 v39, s100, 30
+; FLATSCR-NEXT: v_writelane_b32 v39, s101, 31
; FLATSCR-NEXT: s_add_i32 s32, s32, 8
-; FLATSCR-NEXT: v_writelane_b32 v39, s102, 63
+; FLATSCR-NEXT: v_writelane_b32 v39, s102, 32
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ; clobber nonpreserved SGPRs and 64 CSRs
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ; clobber all VGPRs except CSR v40
; FLATSCR-NEXT: ;;#ASMEND
-; FLATSCR-NEXT: v_readlane_b32 s102, v39, 63
-; FLATSCR-NEXT: v_readlane_b32 s101, v39, 62
-; FLATSCR-NEXT: v_readlane_b32 s100, v39, 61
-; FLATSCR-NEXT: v_readlane_b32 s99, v39, 60
-; FLATSCR-NEXT: v_readlane_b32 s98, v39, 59
-; FLATSCR-NEXT: v_readlane_b32 s97, v39, 58
-; FLATSCR-NEXT: v_readlane_b32 s96, v39, 57
-; FLATSCR-NEXT: v_readlane_b32 s95, v39, 56
-; FLATSCR-NEXT: v_readlane_b32 s94, v39, 55
-; FLATSCR-NEXT: v_readlane_b32 s93, v39, 54
-; FLATSCR-NEXT: v_readlane_b32 s92, v39, 53
-; FLATSCR-NEXT: v_readlane_b32 s91, v39, 52
-; FLATSCR-NEXT: v_readlane_b32 s90, v39, 51
-; FLATSCR-NEXT: v_readlane_b32 s89, v39, 50
-; FLATSCR-NEXT: v_readlane_b32 s88, v39, 49
-; FLATSCR-NEXT: v_readlane_b32 s87, v39, 48
-; FLATSCR-NEXT: v_readlane_b32 s86, v39, 47
-; FLATSCR-NEXT: v_readlane_b32 s85, v39, 46
-; FLATSCR-NEXT: v_readlane_b32 s84, v39, 45
-; FLATSCR-NEXT: v_readlane_b32 s83, v39, 44
-; FLATSCR-NEXT: v_readlane_b32 s82, v39, 43
-; FLATSCR-NEXT: v_readlane_b32 s81, v39, 42
-; FLATSCR-NEXT: v_readlane_b32 s80, v39, 41
-; FLATSCR-NEXT: v_readlane_b32 s79, v39, 40
-; FLATSCR-NEXT: v_readlane_b32 s78, v39, 39
-; FLATSCR-NEXT: v_readlane_b32 s77, v39, 38
-; FLATSCR-NEXT: v_readlane_b32 s76, v39, 37
-; FLATSCR-NEXT: v_readlane_b32 s75, v39, 36
-; FLATSCR-NEXT: v_readlane_b32 s74, v39, 35
-; FLATSCR-NEXT: v_readlane_b32 s73, v39, 34
-; FLATSCR-NEXT: v_readlane_b32 s72, v39, 33
-; FLATSCR-NEXT: v_readlane_b32 s71, v39, 32
-; FLATSCR-NEXT: v_readlane_b32 s70, v39, 31
-; FLATSCR-NEXT: v_readlane_b32 s69, v39, 30
-; FLATSCR-NEXT: v_readlane_b32 s68, v39, 29
-; FLATSCR-NEXT: v_readlane_b32 s67, v39, 28
-; FLATSCR-NEXT: v_readlane_b32 s66, v39, 27
-; FLATSCR-NEXT: v_readlane_b32 s65, v39, 26
-; FLATSCR-NEXT: v_readlane_b32 s64, v39, 25
-; FLATSCR-NEXT: v_readlane_b32 s63, v39, 24
-; FLATSCR-NEXT: v_readlane_b32 s62, v39, 23
-; FLATSCR-NEXT: v_readlane_b32 s61, v39, 22
-; FLATSCR-NEXT: v_readlane_b32 s60, v39, 21
-; FLATSCR-NEXT: v_readlane_b32 s59, v39, 20
-; FLATSCR-NEXT: v_readlane_b32 s58, v39, 19
-; FLATSCR-NEXT: v_readlane_b32 s57, v39, 18
-; FLATSCR-NEXT: v_readlane_b32 s56, v39, 17
-; FLATSCR-NEXT: v_readlane_b32 s55, v39, 16
-; FLATSCR-NEXT: v_readlane_b32 s54, v39, 15
-; FLATSCR-NEXT: v_readlane_b32 s53, v39, 14
-; FLATSCR-NEXT: v_readlane_b32 s52, v39, 13
-; FLATSCR-NEXT: v_readlane_b32 s51, v39, 12
-; FLATSCR-NEXT: v_readlane_b32 s50, v39, 11
-; FLATSCR-NEXT: v_readlane_b32 s49, v39, 10
-; FLATSCR-NEXT: v_readlane_b32 s48, v39, 9
-; FLATSCR-NEXT: v_readlane_b32 s47, v39, 8
-; FLATSCR-NEXT: v_readlane_b32 s46, v39, 7
-; FLATSCR-NEXT: v_readlane_b32 s45, v39, 6
-; FLATSCR-NEXT: v_readlane_b32 s44, v39, 5
-; FLATSCR-NEXT: v_readlane_b32 s43, v39, 4
-; FLATSCR-NEXT: v_readlane_b32 s42, v39, 3
-; FLATSCR-NEXT: v_readlane_b32 s41, v39, 2
-; FLATSCR-NEXT: v_readlane_b32 s40, v39, 1
-; FLATSCR-NEXT: v_readlane_b32 s39, v39, 0
+; FLATSCR-NEXT: v_readlane_b32 s102, v39, 32
+; FLATSCR-NEXT: v_readlane_b32 s101, v39, 31
+; FLATSCR-NEXT: v_readlane_b32 s100, v39, 30
+; FLATSCR-NEXT: v_readlane_b32 s99, v39, 29
+; FLATSCR-NEXT: v_readlane_b32 s98, v39, 28
+; FLATSCR-NEXT: v_readlane_b32 s97, v39, 27
+; FLATSCR-NEXT: v_readlane_b32 s96, v39, 26
+; FLATSCR-NEXT: v_readlane_b32 s95, v39, 25
+; FLATSCR-NEXT: v_readlane_b32 s94, v39, 24
+; FLATSCR-NEXT: v_readlane_b32 s85, v39, 23
+; FLATSCR-NEXT: v_readlane_b32 s84, v39, 22
+; FLATSCR-NEXT: v_readlane_b32 s83, v39, 21
+; FLATSCR-NEXT: v_readlane_b32 s82, v39, 20
+; FLATSCR-NEXT: v_readlane_b32 s81, v39, 19
+; FLATSCR-NEXT: v_readlane_b32 s80, v39, 18
+; FLATSCR-NEXT: v_readlane_b32 s79, v39, 17
+; FLATSCR-NEXT: v_readlane_b32 s78, v39, 16
+; FLATSCR-NEXT: v_readlane_b32 s69, v39, 15
+; FLATSCR-NEXT: v_readlane_b32 s68, v39, 14
+; FLATSCR-NEXT: v_readlane_b32 s67, v39, 13
+; FLATSCR-NEXT: v_readlane_b32 s66, v39, 12
+; FLATSCR-NEXT: v_readlane_b32 s65, v39, 11
+; FLATSCR-NEXT: v_readlane_b32 s64, v39, 10
+; FLATSCR-NEXT: v_readlane_b32 s63, v39, 9
+; FLATSCR-NEXT: v_readlane_b32 s62, v39, 8
+; FLATSCR-NEXT: v_readlane_b32 s53, v39, 7
+; FLATSCR-NEXT: v_readlane_b32 s52, v39, 6
+; FLATSCR-NEXT: v_readlane_b32 s51, v39, 5
+; FLATSCR-NEXT: v_readlane_b32 s50, v39, 4
+; FLATSCR-NEXT: v_readlane_b32 s49, v39, 3
+; FLATSCR-NEXT: v_readlane_b32 s48, v39, 2
+; FLATSCR-NEXT: v_readlane_b32 s47, v39, 1
+; FLATSCR-NEXT: v_readlane_b32 s46, v39, 0
; FLATSCR-NEXT: s_mov_b32 s32, s33
; FLATSCR-NEXT: s_xor_saveexec_b64 s[2:3], -1
; FLATSCR-NEXT: scratch_load_dword v39, off, s33 ; 4-byte Folded Reload
@@ -2033,155 +1531,89 @@ define void @callee_need_to_spill_fp_to_reg() #1 {
; MUBUF-LABEL: callee_need_to_spill_fp_to_reg:
; MUBUF: ; %bb.0:
; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; MUBUF-NEXT: s_mov_b32 s4, s33
+; MUBUF-NEXT: s_mov_b32 s38, s33
; MUBUF-NEXT: s_mov_b32 s33, s32
-; MUBUF-NEXT: s_or_saveexec_b64 s[6:7], -1
+; MUBUF-NEXT: s_or_saveexec_b64 s[4:5], -1
; MUBUF-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; MUBUF-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
-; MUBUF-NEXT: s_mov_b64 exec, s[6:7]
-; MUBUF-NEXT: v_writelane_b32 v40, s39, 0
-; MUBUF-NEXT: v_writelane_b32 v40, s40, 1
-; MUBUF-NEXT: v_writelane_b32 v40, s41, 2
-; MUBUF-NEXT: v_writelane_b32 v40, s42, 3
-; MUBUF-NEXT: v_writelane_b32 v40, s43, 4
-; MUBUF-NEXT: v_writelane_b32 v40, s44, 5
-; MUBUF-NEXT: v_writelane_b32 v40, s45, 6
-; MUBUF-NEXT: v_writelane_b32 v40, s46, 7
-; MUBUF-NEXT: v_writelane_b32 v40, s47, 8
-; MUBUF-NEXT: v_writelane_b32 v40, s48, 9
-; MUBUF-NEXT: v_writelane_b32 v40, s49, 10
-; MUBUF-NEXT: v_writelane_b32 v40, s50, 11
-; MUBUF-NEXT: v_writelane_b32 v40, s51, 12
-; MUBUF-NEXT: v_writelane_b32 v40, s52, 13
-; MUBUF-NEXT: v_writelane_b32 v40, s53, 14
-; MUBUF-NEXT: v_writelane_b32 v40, s54, 15
-; MUBUF-NEXT: v_writelane_b32 v40, s55, 16
-; MUBUF-NEXT: v_writelane_b32 v40, s56, 17
-; MUBUF-NEXT: v_writelane_b32 v40, s57, 18
-; MUBUF-NEXT: v_writelane_b32 v40, s58, 19
-; MUBUF-NEXT: v_writelane_b32 v40, s59, 20
-; MUBUF-NEXT: v_writelane_b32 v40, s60, 21
-; MUBUF-NEXT: v_writelane_b32 v40, s61, 22
-; MUBUF-NEXT: v_writelane_b32 v40, s62, 23
-; MUBUF-NEXT: v_writelane_b32 v40, s63, 24
-; MUBUF-NEXT: v_writelane_b32 v40, s64, 25
-; MUBUF-NEXT: v_writelane_b32 v40, s65, 26
-; MUBUF-NEXT: v_writelane_b32 v40, s66, 27
-; MUBUF-NEXT: v_writelane_b32 v40, s67, 28
-; MUBUF-NEXT: v_writelane_b32 v40, s68, 29
-; MUBUF-NEXT: v_writelane_b32 v40, s69, 30
-; MUBUF-NEXT: v_writelane_b32 v40, s70, 31
-; MUBUF-NEXT: v_writelane_b32 v40, s71, 32
-; MUBUF-NEXT: v_writelane_b32 v40, s72, 33
-; MUBUF-NEXT: v_writelane_b32 v40, s73, 34
-; MUBUF-NEXT: v_writelane_b32 v40, s74, 35
-; MUBUF-NEXT: v_writelane_b32 v40, s75, 36
-; MUBUF-NEXT: v_writelane_b32 v40, s76, 37
-; MUBUF-NEXT: v_writelane_b32 v40, s77, 38
-; MUBUF-NEXT: v_writelane_b32 v40, s78, 39
-; MUBUF-NEXT: v_writelane_b32 v40, s79, 40
-; MUBUF-NEXT: v_writelane_b32 v40, s80, 41
-; MUBUF-NEXT: v_writelane_b32 v40, s81, 42
-; MUBUF-NEXT: v_writelane_b32 v40, s82, 43
-; MUBUF-NEXT: v_writelane_b32 v40, s83, 44
-; MUBUF-NEXT: v_writelane_b32 v40, s84, 45
-; MUBUF-NEXT: v_writelane_b32 v40, s85, 46
-; MUBUF-NEXT: v_writelane_b32 v40, s86, 47
-; MUBUF-NEXT: v_writelane_b32 v40, s87, 48
-; MUBUF-NEXT: v_writelane_b32 v40, s88, 49
-; MUBUF-NEXT: v_writelane_b32 v40, s89, 50
-; MUBUF-NEXT: v_writelane_b32 v40, s90, 51
-; MUBUF-NEXT: v_writelane_b32 v40, s91, 52
-; MUBUF-NEXT: v_writelane_b32 v40, s92, 53
-; MUBUF-NEXT: v_writelane_b32 v40, s93, 54
-; MUBUF-NEXT: v_writelane_b32 v40, s94, 55
-; MUBUF-NEXT: v_writelane_b32 v40, s95, 56
-; MUBUF-NEXT: v_writelane_b32 v40, s96, 57
-; MUBUF-NEXT: v_writelane_b32 v40, s97, 58
-; MUBUF-NEXT: v_writelane_b32 v40, s98, 59
-; MUBUF-NEXT: v_writelane_b32 v40, s99, 60
-; MUBUF-NEXT: v_writelane_b32 v40, s100, 61
-; MUBUF-NEXT: v_writelane_b32 v40, s101, 62
-; MUBUF-NEXT: v_writelane_b32 v41, s4, 0
-; MUBUF-NEXT: s_addk_i32 s32, 0x300
-; MUBUF-NEXT: v_writelane_b32 v40, s102, 63
+; MUBUF-NEXT: s_mov_b64 exec, s[4:5]
+; MUBUF-NEXT: v_writelane_b32 v40, s46, 0
+; MUBUF-NEXT: v_writelane_b32 v40, s47, 1
+; MUBUF-NEXT: v_writelane_b32 v40, s48, 2
+; MUBUF-NEXT: v_writelane_b32 v40, s49, 3
+; MUBUF-NEXT: v_writelane_b32 v40, s50, 4
+; MUBUF-NEXT: v_writelane_b32 v40, s51, 5
+; MUBUF-NEXT: v_writelane_b32 v40, s52, 6
+; MUBUF-NEXT: v_writelane_b32 v40, s53, 7
+; MUBUF-NEXT: v_writelane_b32 v40, s62, 8
+; MUBUF-NEXT: v_writelane_b32 v40, s63, 9
+; MUBUF-NEXT: v_writelane_b32 v40, s64, 10
+; MUBUF-NEXT: v_writelane_b32 v40, s65, 11
+; MUBUF-NEXT: v_writelane_b32 v40, s66, 12
+; MUBUF-NEXT: v_writelane_b32 v40, s67, 13
+; MUBUF-NEXT: v_writelane_b32 v40, s68, 14
+; MUBUF-NEXT: v_writelane_b32 v40, s69, 15
+; MUBUF-NEXT: v_writelane_b32 v40, s78, 16
+; MUBUF-NEXT: v_writelane_b32 v40, s79, 17
+; MUBUF-NEXT: v_writelane_b32 v40, s80, 18
+; MUBUF-NEXT: v_writelane_b32 v40, s81, 19
+; MUBUF-NEXT: v_writelane_b32 v40, s82, 20
+; MUBUF-NEXT: v_writelane_b32 v40, s83, 21
+; MUBUF-NEXT: v_writelane_b32 v40, s84, 22
+; MUBUF-NEXT: v_writelane_b32 v40, s85, 23
+; MUBUF-NEXT: v_writelane_b32 v40, s94, 24
+; MUBUF-NEXT: v_writelane_b32 v40, s95, 25
+; MUBUF-NEXT: v_writelane_b32 v40, s96, 26
+; MUBUF-NEXT: v_writelane_b32 v40, s97, 27
+; MUBUF-NEXT: v_writelane_b32 v40, s98, 28
+; MUBUF-NEXT: v_writelane_b32 v40, s99, 29
+; MUBUF-NEXT: v_writelane_b32 v40, s100, 30
+; MUBUF-NEXT: v_writelane_b32 v40, s101, 31
+; MUBUF-NEXT: s_addk_i32 s32, 0x200
+; MUBUF-NEXT: v_writelane_b32 v40, s102, 32
; MUBUF-NEXT: ;;#ASMSTART
; MUBUF-NEXT: ; clobber nonpreserved SGPRs and 64 CSRs
; MUBUF-NEXT: ;;#ASMEND
; MUBUF-NEXT: ;;#ASMSTART
; MUBUF-NEXT: ; clobber all VGPRs except CSR v40
; MUBUF-NEXT: ;;#ASMEND
-; MUBUF-NEXT: v_readlane_b32 s102, v40, 63
-; MUBUF-NEXT: v_readlane_b32 s101, v40, 62
-; MUBUF-NEXT: v_readlane_b32 s100, v40, 61
-; MUBUF-NEXT: v_readlane_b32 s99, v40, 60
-; MUBUF-NEXT: v_readlane_b32 s98, v40, 59
-; MUBUF-NEXT: v_readlane_b32 s97, v40, 58
-; MUBUF-NEXT: v_readlane_b32 s96, v40, 57
-; MUBUF-NEXT: v_readlane_b32 s95, v40, 56
-; MUBUF-NEXT: v_readlane_b32 s94, v40, 55
-; MUBUF-NEXT: v_readlane_b32 s93, v40, 54
-; MUBUF-NEXT: v_readlane_b32 s92, v40, 53
-; MUBUF-NEXT: v_readlane_b32 s91, v40, 52
-; MUBUF-NEXT: v_readlane_b32 s90, v40, 51
-; MUBUF-NEXT: v_readlane_b32 s89, v40, 50
-; MUBUF-NEXT: v_readlane_b32 s88, v40, 49
-; MUBUF-NEXT: v_readlane_b32 s87, v40, 48
-; MUBUF-NEXT: v_readlane_b32 s86, v40, 47
-; MUBUF-NEXT: v_readlane_b32 s85, v40, 46
-; MUBUF-NEXT: v_readlane_b32 s84, v40, 45
-; MUBUF-NEXT: v_readlane_b32 s83, v40, 44
-; MUBUF-NEXT: v_readlane_b32 s82, v40, 43
-; MUBUF-NEXT: v_readlane_b32 s81, v40, 42
-; MUBUF-NEXT: v_readlane_b32 s80, v40, 41
-; MUBUF-NEXT: v_readlane_b32 s79, v40, 40
-; MUBUF-NEXT: v_readlane_b32 s78, v40, 39
-; MUBUF-NEXT: v_readlane_b32 s77, v40, 38
-; MUBUF-NEXT: v_readlane_b32 s76, v40, 37
-; MUBUF-NEXT: v_readlane_b32 s75, v40, 36
-; MUBUF-NEXT: v_readlane_b32 s74, v40, 35
-; MUBUF-NEXT: v_readlane_b32 s73, v40, 34
-; MUBUF-NEXT: v_readlane_b32 s72, v40, 33
-; MUBUF-NEXT: v_readlane_b32 s71, v40, 32
-; MUBUF-NEXT: v_readlane_b32 s70, v40, 31
-; MUBUF-NEXT: v_readlane_b32 s69, v40, 30
-; MUBUF-NEXT: v_readlane_b32 s68, v40, 29
-; MUBUF-NEXT: v_readlane_b32 s67, v40, 28
-; MUBUF-NEXT: v_readlane_b32 s66, v40, 27
-; MUBUF-NEXT: v_readlane_b32 s65, v40, 26
-; MUBUF-NEXT: v_readlane_b32 s64, v40, 25
-; MUBUF-NEXT: v_readlane_b32 s63, v40, 24
-; MUBUF-NEXT: v_readlane_b32 s62, v40, 23
-; MUBUF-NEXT: v_readlane_b32 s61, v40, 22
-; MUBUF-NEXT: v_readlane_b32 s60, v40, 21
-; MUBUF-NEXT: v_readlane_b32 s59, v40, 20
-; MUBUF-NEXT: v_readlane_b32 s58, v40, 19
-; MUBUF-NEXT: v_readlane_b32 s57, v40, 18
-; MUBUF-NEXT: v_readlane_b32 s56, v40, 17
-; MUBUF-NEXT: v_readlane_b32 s55, v40, 16
-; MUBUF-NEXT: v_readlane_b32 s54, v40, 15
-; MUBUF-NEXT: v_readlane_b32 s53, v40, 14
-; MUBUF-NEXT: v_readlane_b32 s52, v40, 13
-; MUBUF-NEXT: v_readlane_b32 s51, v40, 12
-; MUBUF-NEXT: v_readlane_b32 s50, v40, 11
-; MUBUF-NEXT: v_readlane_b32 s49, v40, 10
-; MUBUF-NEXT: v_readlane_b32 s48, v40, 9
-; MUBUF-NEXT: v_readlane_b32 s47, v40, 8
-; MUBUF-NEXT: v_readlane_b32 s46, v40, 7
-; MUBUF-NEXT: v_readlane_b32 s45, v40, 6
-; MUBUF-NEXT: v_readlane_b32 s44, v40, 5
-; MUBUF-NEXT: v_readlane_b32 s43, v40, 4
-; MUBUF-NEXT: v_readlane_b32 s42, v40, 3
-; MUBUF-NEXT: v_readlane_b32 s41, v40, 2
-; MUBUF-NEXT: v_readlane_b32 s40, v40, 1
-; MUBUF-NEXT: v_readlane_b32 s39, v40, 0
+; MUBUF-NEXT: v_readlane_b32 s102, v40, 32
+; MUBUF-NEXT: v_readlane_b32 s101, v40, 31
+; MUBUF-NEXT: v_readlane_b32 s100, v40, 30
+; MUBUF-NEXT: v_readlane_b32 s99, v40, 29
+; MUBUF-NEXT: v_readlane_b32 s98, v40, 28
+; MUBUF-NEXT: v_readlane_b32 s97, v40, 27
+; MUBUF-NEXT: v_readlane_b32 s96, v40, 26
+; MUBUF-NEXT: v_readlane_b32 s95, v40, 25
+; MUBUF-NEXT: v_readlane_b32 s94, v40, 24
+; MUBUF-NEXT: v_readlane_b32 s85, v40, 23
+; MUBUF-NEXT: v_readlane_b32 s84, v40, 22
+; MUBUF-NEXT: v_readlane_b32 s83, v40, 21
+; MUBUF-NEXT: v_readlane_b32 s82, v40, 20
+; MUBUF-NEXT: v_readlane_b32 s81, v40, 19
+; MUBUF-NEXT: v_readlane_b32 s80, v40, 18
+; MUBUF-NEXT: v_readlane_b32 s79, v40, 17
+; MUBUF-NEXT: v_readlane_b32 s78, v40, 16
+; MUBUF-NEXT: v_readlane_b32 s69, v40, 15
+; MUBUF-NEXT: v_readlane_b32 s68, v40, 14
+; MUBUF-NEXT: v_readlane_b32 s67, v40, 13
+; MUBUF-NEXT: v_readlane_b32 s66, v40, 12
+; MUBUF-NEXT: v_readlane_b32 s65, v40, 11
+; MUBUF-NEXT: v_readlane_b32 s64, v40, 10
+; MUBUF-NEXT: v_readlane_b32 s63, v40, 9
+; MUBUF-NEXT: v_readlane_b32 s62, v40, 8
+; MUBUF-NEXT: v_readlane_b32 s53, v40, 7
+; MUBUF-NEXT: v_readlane_b32 s52, v40, 6
+; MUBUF-NEXT: v_readlane_b32 s51, v40, 5
+; MUBUF-NEXT: v_readlane_b32 s50, v40, 4
+; MUBUF-NEXT: v_readlane_b32 s49, v40, 3
+; MUBUF-NEXT: v_readlane_b32 s48, v40, 2
+; MUBUF-NEXT: v_readlane_b32 s47, v40, 1
+; MUBUF-NEXT: v_readlane_b32 s46, v40, 0
; MUBUF-NEXT: s_mov_b32 s32, s33
-; MUBUF-NEXT: v_readlane_b32 s4, v41, 0
-; MUBUF-NEXT: s_or_saveexec_b64 s[6:7], -1
+; MUBUF-NEXT: s_or_saveexec_b64 s[4:5], -1
; MUBUF-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; MUBUF-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
-; MUBUF-NEXT: s_mov_b64 exec, s[6:7]
-; MUBUF-NEXT: s_mov_b32 s33, s4
+; MUBUF-NEXT: s_mov_b64 exec, s[4:5]
+; MUBUF-NEXT: s_mov_b32 s33, s38
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: s_setpc_b64 s[30:31]
;
@@ -2193,141 +1625,79 @@ define void @callee_need_to_spill_fp_to_reg() #1 {
; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1
; FLATSCR-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
; FLATSCR-NEXT: s_mov_b64 exec, s[2:3]
-; FLATSCR-NEXT: v_writelane_b32 v40, s39, 0
-; FLATSCR-NEXT: v_writelane_b32 v40, s40, 1
-; FLATSCR-NEXT: v_writelane_b32 v40, s41, 2
-; FLATSCR-NEXT: v_writelane_b32 v40, s42, 3
-; FLATSCR-NEXT: v_writelane_b32 v40, s43, 4
-; FLATSCR-NEXT: v_writelane_b32 v40, s44, 5
-; FLATSCR-NEXT: v_writelane_b32 v40, s45, 6
-; FLATSCR-NEXT: v_writelane_b32 v40, s46, 7
-; FLATSCR-NEXT: v_writelane_b32 v40, s47, 8
-; FLATSCR-NEXT: v_writelane_b32 v40, s48, 9
-; FLATSCR-NEXT: v_writelane_b32 v40, s49, 10
-; FLATSCR-NEXT: v_writelane_b32 v40, s50, 11
-; FLATSCR-NEXT: v_writelane_b32 v40, s51, 12
-; FLATSCR-NEXT: v_writelane_b32 v40, s52, 13
-; FLATSCR-NEXT: v_writelane_b32 v40, s53, 14
-; FLATSCR-NEXT: v_writelane_b32 v40, s54, 15
-; FLATSCR-NEXT: v_writelane_b32 v40, s55, 16
-; FLATSCR-NEXT: v_writelane_b32 v40, s56, 17
-; FLATSCR-NEXT: v_writelane_b32 v40, s57, 18
-; FLATSCR-NEXT: v_writelane_b32 v40, s58, 19
-; FLATSCR-NEXT: v_writelane_b32 v40, s59, 20
-; FLATSCR-NEXT: v_writelane_b32 v40, s60, 21
-; FLATSCR-NEXT: v_writelane_b32 v40, s61, 22
-; FLATSCR-NEXT: v_writelane_b32 v40, s62, 23
-; FLATSCR-NEXT: v_writelane_b32 v40, s63, 24
-; FLATSCR-NEXT: v_writelane_b32 v40, s64, 25
-; FLATSCR-NEXT: v_writelane_b32 v40, s65, 26
-; FLATSCR-NEXT: v_writelane_b32 v40, s66, 27
-; FLATSCR-NEXT: v_writelane_b32 v40, s67, 28
-; FLATSCR-NEXT: v_writelane_b32 v40, s68, 29
-; FLATSCR-NEXT: v_writelane_b32 v40, s69, 30
-; FLATSCR-NEXT: v_writelane_b32 v40, s70, 31
-; FLATSCR-NEXT: v_writelane_b32 v40, s71, 32
-; FLATSCR-NEXT: v_writelane_b32 v40, s72, 33
-; FLATSCR-NEXT: v_writelane_b32 v40, s73, 34
-; FLATSCR-NEXT: v_writelane_b32 v40, s74, 35
-; FLATSCR-NEXT: v_writelane_b32 v40, s75, 36
-; FLATSCR-NEXT: v_writelane_b32 v40, s76, 37
-; FLATSCR-NEXT: v_writelane_b32 v40, s77, 38
-; FLATSCR-NEXT: v_writelane_b32 v40, s78, 39
-; FLATSCR-NEXT: v_writelane_b32 v40, s79, 40
-; FLATSCR-NEXT: v_writelane_b32 v40, s80, 41
-; FLATSCR-NEXT: v_writelane_b32 v40, s81, 42
-; FLATSCR-NEXT: v_writelane_b32 v40, s82, 43
-; FLATSCR-NEXT: v_writelane_b32 v40, s83, 44
-; FLATSCR-NEXT: v_writelane_b32 v40, s84, 45
-; FLATSCR-NEXT: v_writelane_b32 v40, s85, 46
-; FLATSCR-NEXT: v_writelane_b32 v40, s86, 47
-; FLATSCR-NEXT: v_writelane_b32 v40, s87, 48
-; FLATSCR-NEXT: v_writelane_b32 v40, s88, 49
-; FLATSCR-NEXT: v_writelane_b32 v40, s89, 50
-; FLATSCR-NEXT: v_writelane_b32 v40, s90, 51
-; FLATSCR-NEXT: v_writelane_b32 v40, s91, 52
-; FLATSCR-NEXT: v_writelane_b32 v40, s92, 53
-; FLATSCR-NEXT: v_writelane_b32 v40, s93, 54
-; FLATSCR-NEXT: v_writelane_b32 v40, s94, 55
-; FLATSCR-NEXT: v_writelane_b32 v40, s95, 56
-; FLATSCR-NEXT: v_writelane_b32 v40, s96, 57
-; FLATSCR-NEXT: v_writelane_b32 v40, s97, 58
-; FLATSCR-NEXT: v_writelane_b32 v40, s98, 59
-; FLATSCR-NEXT: v_writelane_b32 v40, s99, 60
-; FLATSCR-NEXT: v_writelane_b32 v40, s100, 61
-; FLATSCR-NEXT: v_writelane_b32 v40, s101, 62
+; FLATSCR-NEXT: v_writelane_b32 v40, s46, 0
+; FLATSCR-NEXT: v_writelane_b32 v40, s47, 1
+; FLATSCR-NEXT: v_writelane_b32 v40, s48, 2
+; FLATSCR-NEXT: v_writelane_b32 v40, s49, 3
+; FLATSCR-NEXT: v_writelane_b32 v40, s50, 4
+; FLATSCR-NEXT: v_writelane_b32 v40, s51, 5
+; FLATSCR-NEXT: v_writelane_b32 v40, s52, 6
+; FLATSCR-NEXT: v_writelane_b32 v40, s53, 7
+; FLATSCR-NEXT: v_writelane_b32 v40, s62, 8
+; FLATSCR-NEXT: v_writelane_b32 v40, s63, 9
+; FLATSCR-NEXT: v_writelane_b32 v40, s64, 10
+; FLATSCR-NEXT: v_writelane_b32 v40, s65, 11
+; FLATSCR-NEXT: v_writelane_b32 v40, s66, 12
+; FLATSCR-NEXT: v_writelane_b32 v40, s67, 13
+; FLATSCR-NEXT: v_writelane_b32 v40, s68, 14
+; FLATSCR-NEXT: v_writelane_b32 v40, s69, 15
+; FLATSCR-NEXT: v_writelane_b32 v40, s78, 16
+; FLATSCR-NEXT: v_writelane_b32 v40, s79, 17
+; FLATSCR-NEXT: v_writelane_b32 v40, s80, 18
+; FLATSCR-NEXT: v_writelane_b32 v40, s81, 19
+; FLATSCR-NEXT: v_writelane_b32 v40, s82, 20
+; FLATSCR-NEXT: v_writelane_b32 v40, s83, 21
+; FLATSCR-NEXT: v_writelane_b32 v40, s84, 22
+; FLATSCR-NEXT: v_writelane_b32 v40, s85, 23
+; FLATSCR-NEXT: v_writelane_b32 v40, s94, 24
+; FLATSCR-NEXT: v_writelane_b32 v40, s95, 25
+; FLATSCR-NEXT: v_writelane_b32 v40, s96, 26
+; FLATSCR-NEXT: v_writelane_b32 v40, s97, 27
+; FLATSCR-NEXT: v_writelane_b32 v40, s98, 28
+; FLATSCR-NEXT: v_writelane_b32 v40, s99, 29
+; FLATSCR-NEXT: v_writelane_b32 v40, s100, 30
+; FLATSCR-NEXT: v_writelane_b32 v40, s101, 31
; FLATSCR-NEXT: s_add_i32 s32, s32, 8
-; FLATSCR-NEXT: v_writelane_b32 v40, s102, 63
+; FLATSCR-NEXT: v_writelane_b32 v40, s102, 32
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ; clobber nonpreserved SGPRs and 64 CSRs
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ; clobber all VGPRs except CSR v40
; FLATSCR-NEXT: ;;#ASMEND
-; FLATSCR-NEXT: v_readlane_b32 s102, v40, 63
-; FLATSCR-NEXT: v_readlane_b32 s101, v40, 62
-; FLATSCR-NEXT: v_readlane_b32 s100, v40, 61
-; FLATSCR-NEXT: v_readlane_b32 s99, v40, 60
-; FLATSCR-NEXT: v_readlane_b32 s98, v40, 59
-; FLATSCR-NEXT: v_readlane_b32 s97, v40, 58
-; FLATSCR-NEXT: v_readlane_b32 s96, v40, 57
-; FLATSCR-NEXT: v_readlane_b32 s95, v40, 56
-; FLATSCR-NEXT: v_readlane_b32 s94, v40, 55
-; FLATSCR-NEXT: v_readlane_b32 s93, v40, 54
-; FLATSCR-NEXT: v_readlane_b32 s92, v40, 53
-; FLATSCR-NEXT: v_readlane_b32 s91, v40, 52
-; FLATSCR-NEXT: v_readlane_b32 s90, v40, 51
-; FLATSCR-NEXT: v_readlane_b32 s89, v40, 50
-; FLATSCR-NEXT: v_readlane_b32 s88, v40, 49
-; FLATSCR-NEXT: v_readlane_b32 s87, v40, 48
-; FLATSCR-NEXT: v_readlane_b32 s86, v40, 47
-; FLATSCR-NEXT: v_readlane_b32 s85, v40, 46
-; FLATSCR-NEXT: v_readlane_b32 s84, v40, 45
-; FLATSCR-NEXT: v_readlane_b32 s83, v40, 44
-; FLATSCR-NEXT: v_readlane_b32 s82, v40, 43
-; FLATSCR-NEXT: v_readlane_b32 s81, v40, 42
-; FLATSCR-NEXT: v_readlane_b32 s80, v40, 41
-; FLATSCR-NEXT: v_readlane_b32 s79, v40, 40
-; FLATSCR-NEXT: v_readlane_b32 s78, v40, 39
-; FLATSCR-NEXT: v_readlane_b32 s77, v40, 38
-; FLATSCR-NEXT: v_readlane_b32 s76, v40, 37
-; FLATSCR-NEXT: v_readlane_b32 s75, v40, 36
-; FLATSCR-NEXT: v_readlane_b32 s74, v40, 35
-; FLATSCR-NEXT: v_readlane_b32 s73, v40, 34
-; FLATSCR-NEXT: v_readlane_b32 s72, v40, 33
-; FLATSCR-NEXT: v_readlane_b32 s71, v40, 32
-; FLATSCR-NEXT: v_readlane_b32 s70, v40, 31
-; FLATSCR-NEXT: v_readlane_b32 s69, v40, 30
-; FLATSCR-NEXT: v_readlane_b32 s68, v40, 29
-; FLATSCR-NEXT: v_readlane_b32 s67, v40, 28
-; FLATSCR-NEXT: v_readlane_b32 s66, v40, 27
-; FLATSCR-NEXT: v_readlane_b32 s65, v40, 26
-; FLATSCR-NEXT: v_readlane_b32 s64, v40, 25
-; FLATSCR-NEXT: v_readlane_b32 s63, v40, 24
-; FLATSCR-NEXT: v_readlane_b32 s62, v40, 23
-; FLATSCR-NEXT: v_readlane_b32 s61, v40, 22
-; FLATSCR-NEXT: v_readlane_b32 s60, v40, 21
-; FLATSCR-NEXT: v_readlane_b32 s59, v40, 20
-; FLATSCR-NEXT: v_readlane_b32 s58, v40, 19
-; FLATSCR-NEXT: v_readlane_b32 s57, v40, 18
-; FLATSCR-NEXT: v_readlane_b32 s56, v40, 17
-; FLATSCR-NEXT: v_readlane_b32 s55, v40, 16
-; FLATSCR-NEXT: v_readlane_b32 s54, v40, 15
-; FLATSCR-NEXT: v_readlane_b32 s53, v40, 14
-; FLATSCR-NEXT: v_readlane_b32 s52, v40, 13
-; FLATSCR-NEXT: v_readlane_b32 s51, v40, 12
-; FLATSCR-NEXT: v_readlane_b32 s50, v40, 11
-; FLATSCR-NEXT: v_readlane_b32 s49, v40, 10
-; FLATSCR-NEXT: v_readlane_b32 s48, v40, 9
-; FLATSCR-NEXT: v_readlane_b32 s47, v40, 8
-; FLATSCR-NEXT: v_readlane_b32 s46, v40, 7
-; FLATSCR-NEXT: v_readlane_b32 s45, v40, 6
-; FLATSCR-NEXT: v_readlane_b32 s44, v40, 5
-; FLATSCR-NEXT: v_readlane_b32 s43, v40, 4
-; FLATSCR-NEXT: v_readlane_b32 s42, v40, 3
-; FLATSCR-NEXT: v_readlane_b32 s41, v40, 2
-; FLATSCR-NEXT: v_readlane_b32 s40, v40, 1
-; FLATSCR-NEXT: v_readlane_b32 s39, v40, 0
+; FLATSCR-NEXT: v_readlane_b32 s102, v40, 32
+; FLATSCR-NEXT: v_readlane_b32 s101, v40, 31
+; FLATSCR-NEXT: v_readlane_b32 s100, v40, 30
+; FLATSCR-NEXT: v_readlane_b32 s99, v40, 29
+; FLATSCR-NEXT: v_readlane_b32 s98, v40, 28
+; FLATSCR-NEXT: v_readlane_b32 s97, v40, 27
+; FLATSCR-NEXT: v_readlane_b32 s96, v40, 26
+; FLATSCR-NEXT: v_readlane_b32 s95, v40, 25
+; FLATSCR-NEXT: v_readlane_b32 s94, v40, 24
+; FLATSCR-NEXT: v_readlane_b32 s85, v40, 23
+; FLATSCR-NEXT: v_readlane_b32 s84, v40, 22
+; FLATSCR-NEXT: v_readlane_b32 s83, v40, 21
+; FLATSCR-NEXT: v_readlane_b32 s82, v40, 20
+; FLATSCR-NEXT: v_readlane_b32 s81, v40, 19
+; FLATSCR-NEXT: v_readlane_b32 s80, v40, 18
+; FLATSCR-NEXT: v_readlane_b32 s79, v40, 17
+; FLATSCR-NEXT: v_readlane_b32 s78, v40, 16
+; FLATSCR-NEXT: v_readlane_b32 s69, v40, 15
+; FLATSCR-NEXT: v_readlane_b32 s68, v40, 14
+; FLATSCR-NEXT: v_readlane_b32 s67, v40, 13
+; FLATSCR-NEXT: v_readlane_b32 s66, v40, 12
+; FLATSCR-NEXT: v_readlane_b32 s65, v40, 11
+; FLATSCR-NEXT: v_readlane_b32 s64, v40, 10
+; FLATSCR-NEXT: v_readlane_b32 s63, v40, 9
+; FLATSCR-NEXT: v_readlane_b32 s62, v40, 8
+; FLATSCR-NEXT: v_readlane_b32 s53, v40, 7
+; FLATSCR-NEXT: v_readlane_b32 s52, v40, 6
+; FLATSCR-NEXT: v_readlane_b32 s51, v40, 5
+; FLATSCR-NEXT: v_readlane_b32 s50, v40, 4
+; FLATSCR-NEXT: v_readlane_b32 s49, v40, 3
+; FLATSCR-NEXT: v_readlane_b32 s48, v40, 2
+; FLATSCR-NEXT: v_readlane_b32 s47, v40, 1
+; FLATSCR-NEXT: v_readlane_b32 s46, v40, 0
; FLATSCR-NEXT: s_mov_b32 s32, s33
; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1
; FLATSCR-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
@@ -2361,81 +1731,48 @@ define void @spill_fp_to_memory_scratch_reg_needed_mubuf_offset(ptr addrspace(5)
; MUBUF-LABEL: spill_fp_to_memory_scratch_reg_needed_mubuf_offset:
; MUBUF: ; %bb.0:
; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; MUBUF-NEXT: s_mov_b32 s4, s33
+; MUBUF-NEXT: s_mov_b32 s38, s33
; MUBUF-NEXT: s_mov_b32 s33, s32
-; MUBUF-NEXT: s_xor_saveexec_b64 s[6:7], -1
-; MUBUF-NEXT: s_add_i32 s5, s33, 0x40100
-; MUBUF-NEXT: buffer_store_dword v39, off, s[0:3], s5 ; 4-byte Folded Spill
-; MUBUF-NEXT: s_mov_b64 exec, s[6:7]
-; MUBUF-NEXT: v_writelane_b32 v39, s39, 0
-; MUBUF-NEXT: v_writelane_b32 v39, s40, 1
-; MUBUF-NEXT: v_writelane_b32 v39, s41, 2
-; MUBUF-NEXT: v_writelane_b32 v39, s42, 3
-; MUBUF-NEXT: v_writelane_b32 v39, s43, 4
-; MUBUF-NEXT: v_writelane_b32 v39, s44, 5
-; MUBUF-NEXT: v_writelane_b32 v39, s45, 6
-; MUBUF-NEXT: v_writelane_b32 v39, s46, 7
-; MUBUF-NEXT: v_writelane_b32 v39, s47, 8
-; MUBUF-NEXT: v_writelane_b32 v39, s48, 9
-; MUBUF-NEXT: v_writelane_b32 v39, s49, 10
-; MUBUF-NEXT: v_writelane_b32 v39, s50, 11
-; MUBUF-NEXT: v_writelane_b32 v39, s51, 12
-; MUBUF-NEXT: v_writelane_b32 v39, s52, 13
-; MUBUF-NEXT: v_writelane_b32 v39, s53, 14
-; MUBUF-NEXT: v_writelane_b32 v39, s54, 15
-; MUBUF-NEXT: v_writelane_b32 v39, s55, 16
-; MUBUF-NEXT: v_writelane_b32 v39, s56, 17
-; MUBUF-NEXT: v_writelane_b32 v39, s57, 18
-; MUBUF-NEXT: v_writelane_b32 v39, s58, 19
-; MUBUF-NEXT: v_writelane_b32 v39, s59, 20
-; MUBUF-NEXT: v_writelane_b32 v39, s60, 21
-; MUBUF-NEXT: v_writelane_b32 v39, s61, 22
-; MUBUF-NEXT: v_writelane_b32 v39, s62, 23
-; MUBUF-NEXT: v_writelane_b32 v39, s63, 24
-; MUBUF-NEXT: v_writelane_b32 v39, s64, 25
-; MUBUF-NEXT: v_writelane_b32 v39, s65, 26
-; MUBUF-NEXT: v_writelane_b32 v39, s66, 27
-; MUBUF-NEXT: v_writelane_b32 v39, s67, 28
-; MUBUF-NEXT: v_writelane_b32 v39, s68, 29
-; MUBUF-NEXT: v_writelane_b32 v39, s69, 30
-; MUBUF-NEXT: v_writelane_b32 v39, s70, 31
-; MUBUF-NEXT: v_writelane_b32 v39, s71, 32
-; MUBUF-NEXT: v_writelane_b32 v39, s72, 33
-; MUBUF-NEXT: v_writelane_b32 v39, s73, 34
-; MUBUF-NEXT: v_writelane_b32 v39, s74, 35
-; MUBUF-NEXT: v_writelane_b32 v39, s75, 36
-; MUBUF-NEXT: v_writelane_b32 v39, s76, 37
-; MUBUF-NEXT: v_writelane_b32 v39, s77, 38
-; MUBUF-NEXT: v_writelane_b32 v39, s78, 39
-; MUBUF-NEXT: v_writelane_b32 v39, s79, 40
-; MUBUF-NEXT: v_writelane_b32 v39, s80, 41
-; MUBUF-NEXT: v_writelane_b32 v39, s81, 42
-; MUBUF-NEXT: v_writelane_b32 v39, s82, 43
-; MUBUF-NEXT: v_writelane_b32 v39, s83, 44
-; MUBUF-NEXT: v_writelane_b32 v39, s84, 45
-; MUBUF-NEXT: v_writelane_b32 v39, s85, 46
-; MUBUF-NEXT: v_writelane_b32 v39, s86, 47
-; MUBUF-NEXT: v_writelane_b32 v39, s87, 48
-; MUBUF-NEXT: v_writelane_b32 v39, s88, 49
-; MUBUF-NEXT: v_writelane_b32 v39, s89, 50
-; MUBUF-NEXT: v_writelane_b32 v39, s90, 51
-; MUBUF-NEXT: v_writelane_b32 v39, s91, 52
-; MUBUF-NEXT: v_writelane_b32 v39, s92, 53
-; MUBUF-NEXT: v_writelane_b32 v39, s93, 54
-; MUBUF-NEXT: v_writelane_b32 v39, s94, 55
-; MUBUF-NEXT: v_writelane_b32 v39, s95, 56
-; MUBUF-NEXT: v_writelane_b32 v39, s96, 57
-; MUBUF-NEXT: v_writelane_b32 v39, s97, 58
-; MUBUF-NEXT: v_writelane_b32 v39, s98, 59
-; MUBUF-NEXT: v_writelane_b32 v39, s99, 60
-; MUBUF-NEXT: v_mov_b32_e32 v0, s4
-; MUBUF-NEXT: s_add_i32 s5, s33, 0x40200
-; MUBUF-NEXT: v_writelane_b32 v39, s100, 61
-; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill
-; MUBUF-NEXT: v_writelane_b32 v39, s101, 62
+; MUBUF-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; MUBUF-NEXT: s_add_i32 s6, s33, 0x40100
+; MUBUF-NEXT: buffer_store_dword v39, off, s[0:3], s6 ; 4-byte Folded Spill
+; MUBUF-NEXT: s_mov_b64 exec, s[4:5]
+; MUBUF-NEXT: v_writelane_b32 v39, s46, 0
+; MUBUF-NEXT: v_writelane_b32 v39, s47, 1
+; MUBUF-NEXT: v_writelane_b32 v39, s48, 2
+; MUBUF-NEXT: v_writelane_b32 v39, s49, 3
+; MUBUF-NEXT: v_writelane_b32 v39, s50, 4
+; MUBUF-NEXT: v_writelane_b32 v39, s51, 5
+; MUBUF-NEXT: v_writelane_b32 v39, s52, 6
+; MUBUF-NEXT: v_writelane_b32 v39, s53, 7
+; MUBUF-NEXT: v_writelane_b32 v39, s62, 8
+; MUBUF-NEXT: v_writelane_b32 v39, s63, 9
+; MUBUF-NEXT: v_writelane_b32 v39, s64, 10
+; MUBUF-NEXT: v_writelane_b32 v39, s65, 11
+; MUBUF-NEXT: v_writelane_b32 v39, s66, 12
+; MUBUF-NEXT: v_writelane_b32 v39, s67, 13
+; MUBUF-NEXT: v_writelane_b32 v39, s68, 14
+; MUBUF-NEXT: v_writelane_b32 v39, s69, 15
+; MUBUF-NEXT: v_writelane_b32 v39, s78, 16
+; MUBUF-NEXT: v_writelane_b32 v39, s79, 17
+; MUBUF-NEXT: v_writelane_b32 v39, s80, 18
+; MUBUF-NEXT: v_writelane_b32 v39, s81, 19
+; MUBUF-NEXT: v_writelane_b32 v39, s82, 20
+; MUBUF-NEXT: v_writelane_b32 v39, s83, 21
+; MUBUF-NEXT: v_writelane_b32 v39, s84, 22
+; MUBUF-NEXT: v_writelane_b32 v39, s85, 23
+; MUBUF-NEXT: v_writelane_b32 v39, s94, 24
+; MUBUF-NEXT: v_writelane_b32 v39, s95, 25
+; MUBUF-NEXT: v_writelane_b32 v39, s96, 26
+; MUBUF-NEXT: v_writelane_b32 v39, s97, 27
+; MUBUF-NEXT: v_writelane_b32 v39, s98, 28
+; MUBUF-NEXT: v_writelane_b32 v39, s99, 29
+; MUBUF-NEXT: v_writelane_b32 v39, s100, 30
+; MUBUF-NEXT: v_writelane_b32 v39, s101, 31
; MUBUF-NEXT: v_mov_b32_e32 v0, 0
; MUBUF-NEXT: v_mov_b32_e32 v1, 0x1000
-; MUBUF-NEXT: v_writelane_b32 v39, s102, 63
+; MUBUF-NEXT: s_add_i32 s32, s32, 0x40300
+; MUBUF-NEXT: v_writelane_b32 v39, s102, 32
; MUBUF-NEXT: buffer_store_dword v0, v1, s[0:3], s33 offen
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: ;;#ASMSTART
@@ -2444,81 +1781,45 @@ define void @spill_fp_to_memory_scratch_reg_needed_mubuf_offset(ptr addrspace(5)
; MUBUF-NEXT: ;;#ASMSTART
; MUBUF-NEXT: ; clobber all VGPRs except CSR v40
; MUBUF-NEXT: ;;#ASMEND
-; MUBUF-NEXT: s_add_i32 s5, s33, 0x40200
-; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload
-; MUBUF-NEXT: s_add_i32 s32, s32, 0x40400
-; MUBUF-NEXT: v_readlane_b32 s102, v39, 63
-; MUBUF-NEXT: v_readlane_b32 s101, v39, 62
-; MUBUF-NEXT: v_readlane_b32 s100, v39, 61
-; MUBUF-NEXT: v_readlane_b32 s99, v39, 60
-; MUBUF-NEXT: v_readlane_b32 s98, v39, 59
-; MUBUF-NEXT: v_readlane_b32 s97, v39, 58
-; MUBUF-NEXT: v_readlane_b32 s96, v39, 57
-; MUBUF-NEXT: v_readlane_b32 s95, v39, 56
-; MUBUF-NEXT: v_readlane_b32 s94, v39, 55
-; MUBUF-NEXT: v_readlane_b32 s93, v39, 54
-; MUBUF-NEXT: v_readlane_b32 s92, v39, 53
-; MUBUF-NEXT: v_readlane_b32 s91, v39, 52
-; MUBUF-NEXT: v_readlane_b32 s90, v39, 51
-; MUBUF-NEXT: v_readlane_b32 s89, v39, 50
-; MUBUF-NEXT: v_readlane_b32 s88, v39, 49
-; MUBUF-NEXT: v_readlane_b32 s87, v39, 48
-; MUBUF-NEXT: v_readlane_b32 s86, v39, 47
-; MUBUF-NEXT: v_readlane_b32 s85, v39, 46
-; MUBUF-NEXT: v_readlane_b32 s84, v39, 45
-; MUBUF-NEXT: v_readlane_b32 s83, v39, 44
-; MUBUF-NEXT: v_readlane_b32 s82, v39, 43
-; MUBUF-NEXT: v_readlane_b32 s81, v39, 42
-; MUBUF-NEXT: v_readlane_b32 s80, v39, 41
-; MUBUF-NEXT: v_readlane_b32 s79, v39, 40
-; MUBUF-NEXT: v_readlane_b32 s78, v39, 39
-; MUBUF-NEXT: v_readlane_b32 s77, v39, 38
-; MUBUF-NEXT: v_readlane_b32 s76, v39, 37
-; MUBUF-NEXT: v_readlane_b32 s75, v39, 36
-; MUBUF-NEXT: v_readlane_b32 s74, v39, 35
-; MUBUF-NEXT: v_readlane_b32 s73, v39, 34
-; MUBUF-NEXT: v_readlane_b32 s72, v39, 33
-; MUBUF-NEXT: v_readlane_b32 s71, v39, 32
-; MUBUF-NEXT: v_readlane_b32 s70, v39, 31
-; MUBUF-NEXT: v_readlane_b32 s69, v39, 30
-; MUBUF-NEXT: v_readlane_b32 s68, v39, 29
-; MUBUF-NEXT: v_readlane_b32 s67, v39, 28
-; MUBUF-NEXT: v_readlane_b32 s66, v39, 27
-; MUBUF-NEXT: v_readlane_b32 s65, v39, 26
-; MUBUF-NEXT: v_readlane_b32 s64, v39, 25
-; MUBUF-NEXT: v_readlane_b32 s63, v39, 24
-; MUBUF-NEXT: v_readlane_b32 s62, v39, 23
-; MUBUF-NEXT: v_readlane_b32 s61, v39, 22
-; MUBUF-NEXT: v_readlane_b32 s60, v39, 21
-; MUBUF-NEXT: v_readlane_b32 s59, v39, 20
-; MUBUF-NEXT: v_readlane_b32 s58, v39, 19
-; MUBUF-NEXT: v_readlane_b32 s57, v39, 18
-; MUBUF-NEXT: v_readlane_b32 s56, v39, 17
-; MUBUF-NEXT: v_readlane_b32 s55, v39, 16
-; MUBUF-NEXT: v_readlane_b32 s54, v39, 15
-; MUBUF-NEXT: v_readlane_b32 s53, v39, 14
-; MUBUF-NEXT: v_readlane_b32 s52, v39, 13
-; MUBUF-NEXT: v_readlane_b32 s51, v39, 12
-; MUBUF-NEXT: v_readlane_b32 s50, v39, 11
-; MUBUF-NEXT: v_readlane_b32 s49, v39, 10
-; MUBUF-NEXT: v_readlane_b32 s48, v39, 9
-; MUBUF-NEXT: v_readlane_b32 s47, v39, 8
-; MUBUF-NEXT: v_readlane_b32 s46, v39, 7
-; MUBUF-NEXT: v_readlane_b32 s45, v39, 6
-; MUBUF-NEXT: v_readlane_b32 s44, v39, 5
-; MUBUF-NEXT: v_readlane_b32 s43, v39, 4
-; MUBUF-NEXT: v_readlane_b32 s42, v39, 3
-; MUBUF-NEXT: v_readlane_b32 s41, v39, 2
-; MUBUF-NEXT: v_readlane_b32 s40, v39, 1
-; MUBUF-NEXT: v_readlane_b32 s39, v39, 0
+; MUBUF-NEXT: v_readlane_b32 s102, v39, 32
+; MUBUF-NEXT: v_readlane_b32 s101, v39, 31
+; MUBUF-NEXT: v_readlane_b32 s100, v39, 30
+; MUBUF-NEXT: v_readlane_b32 s99, v39, 29
+; MUBUF-NEXT: v_readlane_b32 s98, v39, 28
+; MUBUF-NEXT: v_readlane_b32 s97, v39, 27
+; MUBUF-NEXT: v_readlane_b32 s96, v39, 26
+; MUBUF-NEXT: v_readlane_b32 s95, v39, 25
+; MUBUF-NEXT: v_readlane_b32 s94, v39, 24
+; MUBUF-NEXT: v_readlane_b32 s85, v39, 23
+; MUBUF-NEXT: v_readlane_b32 s84, v39, 22
+; MUBUF-NEXT: v_readlane_b32 s83, v39, 21
+; MUBUF-NEXT: v_readlane_b32 s82, v39, 20
+; MUBUF-NEXT: v_readlane_b32 s81, v39, 19
+; MUBUF-NEXT: v_readlane_b32 s80, v39, 18
+; MUBUF-NEXT: v_readlane_b32 s79, v39, 17
+; MUBUF-NEXT: v_readlane_b32 s78, v39, 16
+; MUBUF-NEXT: v_readlane_b32 s69, v39, 15
+; MUBUF-NEXT: v_readlane_b32 s68, v39, 14
+; MUBUF-NEXT: v_readlane_b32 s67, v39, 13
+; MUBUF-NEXT: v_readlane_b32 s66, v39, 12
+; MUBUF-NEXT: v_readlane_b32 s65, v39, 11
+; MUBUF-NEXT: v_readlane_b32 s64, v39, 10
+; MUBUF-NEXT: v_readlane_b32 s63, v39, 9
+; MUBUF-NEXT: v_readlane_b32 s62, v39, 8
+; MUBUF-NEXT: v_readlane_b32 s53, v39, 7
+; MUBUF-NEXT: v_readlane_b32 s52, v39, 6
+; MUBUF-NEXT: v_readlane_b32 s51, v39, 5
+; MUBUF-NEXT: v_readlane_b32 s50, v39, 4
+; MUBUF-NEXT: v_readlane_b32 s49, v39, 3
+; MUBUF-NEXT: v_readlane_b32 s48, v39, 2
+; MUBUF-NEXT: v_readlane_b32 s47, v39, 1
+; MUBUF-NEXT: v_readlane_b32 s46, v39, 0
; MUBUF-NEXT: s_mov_b32 s32, s33
-; MUBUF-NEXT: s_waitcnt vmcnt(0)
-; MUBUF-NEXT: v_readfirstlane_b32 s4, v0
-; MUBUF-NEXT: s_xor_saveexec_b64 s[6:7], -1
-; MUBUF-NEXT: s_add_i32 s5, s33, 0x40100
-; MUBUF-NEXT: buffer_load_dword v39, off, s[0:3], s5 ; 4-byte Folded Reload
-; MUBUF-NEXT: s_mov_b64 exec, s[6:7]
-; MUBUF-NEXT: s_mov_b32 s33, s4
+; MUBUF-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; MUBUF-NEXT: s_add_i32 s6, s33, 0x40100
+; MUBUF-NEXT: buffer_load_dword v39, off, s[0:3], s6 ; 4-byte Folded Reload
+; MUBUF-NEXT: s_mov_b64 exec, s[4:5]
+; MUBUF-NEXT: s_mov_b32 s33, s38
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: s_setpc_b64 s[30:31]
;
@@ -2531,73 +1832,42 @@ define void @spill_fp_to_memory_scratch_reg_needed_mubuf_offset(ptr addrspace(5)
; FLATSCR-NEXT: s_add_i32 s1, s33, 0x1004
; FLATSCR-NEXT: scratch_store_dword off, v39, s1 ; 4-byte Folded Spill
; FLATSCR-NEXT: s_mov_b64 exec, s[2:3]
-; FLATSCR-NEXT: v_writelane_b32 v39, s39, 0
-; FLATSCR-NEXT: v_writelane_b32 v39, s40, 1
-; FLATSCR-NEXT: v_writelane_b32 v39, s41, 2
-; FLATSCR-NEXT: v_writelane_b32 v39, s42, 3
-; FLATSCR-NEXT: v_writelane_b32 v39, s43, 4
-; FLATSCR-NEXT: v_writelane_b32 v39, s44, 5
-; FLATSCR-NEXT: v_writelane_b32 v39, s45, 6
-; FLATSCR-NEXT: v_writelane_b32 v39, s46, 7
-; FLATSCR-NEXT: v_writelane_b32 v39, s47, 8
-; FLATSCR-NEXT: v_writelane_b32 v39, s48, 9
-; FLATSCR-NEXT: v_writelane_b32 v39, s49, 10
-; FLATSCR-NEXT: v_writelane_b32 v39, s50, 11
-; FLATSCR-NEXT: v_writelane_b32 v39, s51, 12
-; FLATSCR-NEXT: v_writelane_b32 v39, s52, 13
-; FLATSCR-NEXT: v_writelane_b32 v39, s53, 14
-; FLATSCR-NEXT: v_writelane_b32 v39, s54, 15
-; FLATSCR-NEXT: v_writelane_b32 v39, s55, 16
-; FLATSCR-NEXT: v_writelane_b32 v39, s56, 17
-; FLATSCR-NEXT: v_writelane_b32 v39, s57, 18
-; FLATSCR-NEXT: v_writelane_b32 v39, s58, 19
-; FLATSCR-NEXT: v_writelane_b32 v39, s59, 20
-; FLATSCR-NEXT: v_writelane_b32 v39, s60, 21
-; FLATSCR-NEXT: v_writelane_b32 v39, s61, 22
-; FLATSCR-NEXT: v_writelane_b32 v39, s62, 23
-; FLATSCR-NEXT: v_writelane_b32 v39, s63, 24
-; FLATSCR-NEXT: v_writelane_b32 v39, s64, 25
-; FLATSCR-NEXT: v_writelane_b32 v39, s65, 26
-; FLATSCR-NEXT: v_writelane_b32 v39, s66, 27
-; FLATSCR-NEXT: v_writelane_b32 v39, s67, 28
-; FLATSCR-NEXT: v_writelane_b32 v39, s68, 29
-; FLATSCR-NEXT: v_writelane_b32 v39, s69, 30
-; FLATSCR-NEXT: v_writelane_b32 v39, s70, 31
-; FLATSCR-NEXT: v_writelane_b32 v39, s71, 32
-; FLATSCR-NEXT: v_writelane_b32 v39, s72, 33
-; FLATSCR-NEXT: v_writelane_b32 v39, s73, 34
-; FLATSCR-NEXT: v_writelane_b32 v39, s74, 35
-; FLATSCR-NEXT: v_writelane_b32 v39, s75, 36
-; FLATSCR-NEXT: v_writelane_b32 v39, s76, 37
-; FLATSCR-NEXT: v_writelane_b32 v39, s77, 38
-; FLATSCR-NEXT: v_writelane_b32 v39, s78, 39
-; FLATSCR-NEXT: v_writelane_b32 v39, s79, 40
-; FLATSCR-NEXT: v_writelane_b32 v39, s80, 41
-; FLATSCR-NEXT: v_writelane_b32 v39, s81, 42
-; FLATSCR-NEXT: v_writelane_b32 v39, s82, 43
-; FLATSCR-NEXT: v_writelane_b32 v39, s83, 44
-; FLATSCR-NEXT: v_writelane_b32 v39, s84, 45
-; FLATSCR-NEXT: v_writelane_b32 v39, s85, 46
-; FLATSCR-NEXT: v_writelane_b32 v39, s86, 47
-; FLATSCR-NEXT: v_writelane_b32 v39, s87, 48
-; FLATSCR-NEXT: v_writelane_b32 v39, s88, 49
-; FLATSCR-NEXT: v_writelane_b32 v39, s89, 50
-; FLATSCR-NEXT: v_writelane_b32 v39, s90, 51
-; FLATSCR-NEXT: v_writelane_b32 v39, s91, 52
-; FLATSCR-NEXT: v_writelane_b32 v39, s92, 53
-; FLATSCR-NEXT: v_writelane_b32 v39, s93, 54
-; FLATSCR-NEXT: v_writelane_b32 v39, s94, 55
-; FLATSCR-NEXT: v_writelane_b32 v39, s95, 56
-; FLATSCR-NEXT: v_writelane_b32 v39, s96, 57
-; FLATSCR-NEXT: v_writelane_b32 v39, s97, 58
-; FLATSCR-NEXT: v_writelane_b32 v39, s98, 59
-; FLATSCR-NEXT: v_writelane_b32 v39, s99, 60
+; FLATSCR-NEXT: v_writelane_b32 v39, s46, 0
+; FLATSCR-NEXT: v_writelane_b32 v39, s47, 1
+; FLATSCR-NEXT: v_writelane_b32 v39, s48, 2
+; FLATSCR-NEXT: v_writelane_b32 v39, s49, 3
+; FLATSCR-NEXT: v_writelane_b32 v39, s50, 4
+; FLATSCR-NEXT: v_writelane_b32 v39, s51, 5
+; FLATSCR-NEXT: v_writelane_b32 v39, s52, 6
+; FLATSCR-NEXT: v_writelane_b32 v39, s53, 7
+; FLATSCR-NEXT: v_writelane_b32 v39, s62, 8
+; FLATSCR-NEXT: v_writelane_b32 v39, s63, 9
+; FLATSCR-NEXT: v_writelane_b32 v39, s64, 10
+; FLATSCR-NEXT: v_writelane_b32 v39, s65, 11
+; FLATSCR-NEXT: v_writelane_b32 v39, s66, 12
+; FLATSCR-NEXT: v_writelane_b32 v39, s67, 13
+; FLATSCR-NEXT: v_writelane_b32 v39, s68, 14
+; FLATSCR-NEXT: v_writelane_b32 v39, s69, 15
+; FLATSCR-NEXT: v_writelane_b32 v39, s78, 16
+; FLATSCR-NEXT: v_writelane_b32 v39, s79, 17
+; FLATSCR-NEXT: v_writelane_b32 v39, s80, 18
+; FLATSCR-NEXT: v_writelane_b32 v39, s81, 19
+; FLATSCR-NEXT: v_writelane_b32 v39, s82, 20
+; FLATSCR-NEXT: v_writelane_b32 v39, s83, 21
+; FLATSCR-NEXT: v_writelane_b32 v39, s84, 22
+; FLATSCR-NEXT: v_writelane_b32 v39, s85, 23
+; FLATSCR-NEXT: v_writelane_b32 v39, s94, 24
+; FLATSCR-NEXT: v_writelane_b32 v39, s95, 25
+; FLATSCR-NEXT: v_writelane_b32 v39, s96, 26
+; FLATSCR-NEXT: v_writelane_b32 v39, s97, 27
+; FLATSCR-NEXT: v_writelane_b32 v39, s98, 28
+; FLATSCR-NEXT: v_writelane_b32 v39, s99, 29
; FLATSCR-NEXT: s_addk_i32 s32, 0x100c
-; FLATSCR-NEXT: v_writelane_b32 v39, s100, 61
-; FLATSCR-NEXT: v_writelane_b32 v39, s101, 62
+; FLATSCR-NEXT: v_writelane_b32 v39, s100, 30
+; FLATSCR-NEXT: v_writelane_b32 v39, s101, 31
; FLATSCR-NEXT: v_mov_b32_e32 v0, 0
; FLATSCR-NEXT: s_add_i32 s1, s33, 0x1000
-; FLATSCR-NEXT: v_writelane_b32 v39, s102, 63
+; FLATSCR-NEXT: v_writelane_b32 v39, s102, 32
; FLATSCR-NEXT: scratch_store_dword off, v0, s1
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: ;;#ASMSTART
@@ -2606,70 +1876,39 @@ define void @spill_fp_to_memory_scratch_reg_needed_mubuf_offset(ptr addrspace(5)
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ; clobber all VGPRs except CSR v40
; FLATSCR-NEXT: ;;#ASMEND
-; FLATSCR-NEXT: v_readlane_b32 s102, v39, 63
-; FLATSCR-NEXT: v_readlane_b32 s101, v39, 62
-; FLATSCR-NEXT: v_readlane_b32 s100, v39, 61
-; FLATSCR-NEXT: v_readlane_b32 s99, v39, 60
-; FLATSCR-NEXT: v_readlane_b32 s98, v39, 59
-; FLATSCR-NEXT: v_readlane_b32 s97, v39, 58
-; FLATSCR-NEXT: v_readlane_b32 s96, v39, 57
-; FLATSCR-NEXT: v_readlane_b32 s95, v39, 56
-; FLATSCR-NEXT: v_readlane_b32 s94, v39, 55
-; FLATSCR-NEXT: v_readlane_b32 s93, v39, 54
-; FLATSCR-NEXT: v_readlane_b32 s92, v39, 53
-; FLATSCR-NEXT: v_readlane_b32 s91, v39, 52
-; FLATSCR-NEXT: v_readlane_b32 s90, v39, 51
-; FLATSCR-NEXT: v_readlane_b32 s89, v39, 50
-; FLATSCR-NEXT: v_readlane_b32 s88, v39, 49
-; FLATSCR-NEXT: v_readlane_b32 s87, v39, 48
-; FLATSCR-NEXT: v_readlane_b32 s86, v39, 47
-; FLATSCR-NEXT: v_readlane_b32 s85, v39, 46
-; FLATSCR-NEXT: v_readlane_b32 s84, v39, 45
-; FLATSCR-NEXT: v_readlane_b32 s83, v39, 44
-; FLATSCR-NEXT: v_readlane_b32 s82, v39, 43
-; FLATSCR-NEXT: v_readlane_b32 s81, v39, 42
-; FLATSCR-NEXT: v_readlane_b32 s80, v39, 41
-; FLATSCR-NEXT: v_readlane_b32 s79, v39, 40
-; FLATSCR-NEXT: v_readlane_b32 s78, v39, 39
-; FLATSCR-NEXT: v_readlane_b32 s77, v39, 38
-; FLATSCR-NEXT: v_readlane_b32 s76, v39, 37
-; FLATSCR-NEXT: v_readlane_b32 s75, v39, 36
-; FLATSCR-NEXT: v_readlane_b32 s74, v39, 35
-; FLATSCR-NEXT: v_readlane_b32 s73, v39, 34
-; FLATSCR-NEXT: v_readlane_b32 s72, v39, 33
-; FLATSCR-NEXT: v_readlane_b32 s71, v39, 32
-; FLATSCR-NEXT: v_readlane_b32 s70, v39, 31
-; FLATSCR-NEXT: v_readlane_b32 s69, v39, 30
-; FLATSCR-NEXT: v_readlane_b32 s68, v39, 29
-; FLATSCR-NEXT: v_readlane_b32 s67, v39, 28
-; FLATSCR-NEXT: v_readlane_b32 s66, v39, 27
-; FLATSCR-NEXT: v_readlane_b32 s65, v39, 26
-; FLATSCR-NEXT: v_readlane_b32 s64, v39, 25
-; FLATSCR-NEXT: v_readlane_b32 s63, v39, 24
-; FLATSCR-NEXT: v_readlane_b32 s62, v39, 23
-; FLATSCR-NEXT: v_readlane_b32 s61, v39, 22
-; FLATSCR-NEXT: v_readlane_b32 s60, v39, 21
-; FLATSCR-NEXT: v_readlane_b32 s59, v39, 20
-; FLATSCR-NEXT: v_readlane_b32 s58, v39, 19
-; FLATSCR-NEXT: v_readlane_b32 s57, v39, 18
-; FLATSCR-NEXT: v_readlane_b32 s56, v39, 17
-; FLATSCR-NEXT: v_readlane_b32 s55, v39, 16
-; FLATSCR-NEXT: v_readlane_b32 s54, v39, 15
-; FLATSCR-NEXT: v_readlane_b32 s53, v39, 14
-; FLATSCR-NEXT: v_readlane_b32 s52, v39, 13
-; FLATSCR-NEXT: v_readlane_b32 s51, v39, 12
-; FLATSCR-NEXT: v_readlane_b32 s50, v39, 11
-; FLATSCR-NEXT: v_readlane_b32 s49, v39, 10
-; FLATSCR-NEXT: v_readlane_b32 s48, v39, 9
-; FLATSCR-NEXT: v_readlane_b32 s47, v39, 8
-; FLATSCR-NEXT: v_readlane_b32 s46, v39, 7
-; FLATSCR-NEXT: v_readlane_b32 s45, v39, 6
-; FLATSCR-NEXT: v_readlane_b32 s44, v39, 5
-; FLATSCR-NEXT: v_readlane_b32 s43, v39, 4
-; FLATSCR-NEXT: v_readlane_b32 s42, v39, 3
-; FLATSCR-NEXT: v_readlane_b32 s41, v39, 2
-; FLATSCR-NEXT: v_readlane_b32 s40, v39, 1
-; FLATSCR-NEXT: v_readlane_b32 s39, v39, 0
+; FLATSCR-NEXT: v_readlane_b32 s102, v39, 32
+; FLATSCR-NEXT: v_readlane_b32 s101, v39, 31
+; FLATSCR-NEXT: v_readlane_b32 s100, v39, 30
+; FLATSCR-NEXT: v_readlane_b32 s99, v39, 29
+; FLATSCR-NEXT: v_readlane_b32 s98, v39, 28
+; FLATSCR-NEXT: v_readlane_b32 s97, v39, 27
+; FLATSCR-NEXT: v_readlane_b32 s96, v39, 26
+; FLATSCR-NEXT: v_readlane_b32 s95, v39, 25
+; FLATSCR-NEXT: v_readlane_b32 s94, v39, 24
+; FLATSCR-NEXT: v_readlane_b32 s85, v39, 23
+; FLATSCR-NEXT: v_readlane_b32 s84, v39, 22
+; FLATSCR-NEXT: v_readlane_b32 s83, v39, 21
+; FLATSCR-NEXT: v_readlane_b32 s82, v39, 20
+; FLATSCR-NEXT: v_readlane_b32 s81, v39, 19
+; FLATSCR-NEXT: v_readlane_b32 s80, v39, 18
+; FLATSCR-NEXT: v_readlane_b32 s79, v39, 17
+; FLATSCR-NEXT: v_readlane_b32 s78, v39, 16
+; FLATSCR-NEXT: v_readlane_b32 s69, v39, 15
+; FLATSCR-NEXT: v_readlane_b32 s68, v39, 14
+; FLATSCR-NEXT: v_readlane_b32 s67, v39, 13
+; FLATSCR-NEXT: v_readlane_b32 s66, v39, 12
+; FLATSCR-NEXT: v_readlane_b32 s65, v39, 11
+; FLATSCR-NEXT: v_readlane_b32 s64, v39, 10
+; FLATSCR-NEXT: v_readlane_b32 s63, v39, 9
+; FLATSCR-NEXT: v_readlane_b32 s62, v39, 8
+; FLATSCR-NEXT: v_readlane_b32 s53, v39, 7
+; FLATSCR-NEXT: v_readlane_b32 s52, v39, 6
+; FLATSCR-NEXT: v_readlane_b32 s51, v39, 5
+; FLATSCR-NEXT: v_readlane_b32 s50, v39, 4
+; FLATSCR-NEXT: v_readlane_b32 s49, v39, 3
+; FLATSCR-NEXT: v_readlane_b32 s48, v39, 2
+; FLATSCR-NEXT: v_readlane_b32 s47, v39, 1
+; FLATSCR-NEXT: v_readlane_b32 s46, v39, 0
; FLATSCR-NEXT: s_mov_b32 s32, s33
; FLATSCR-NEXT: s_xor_saveexec_b64 s[2:3], -1
; FLATSCR-NEXT: s_add_i32 s1, s33, 0x1004
diff --git a/llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir b/llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir
index a14d515688a8b..0a3bf35427e24 100644
--- a/llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir
+++ b/llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir
@@ -14,15 +14,13 @@ body: |
; CHECK-LABEL: name: def_csr_sgpr
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: liveins: $sgpr42, $sgpr43, $sgpr46, $sgpr47
+ ; CHECK-NEXT: liveins: $sgpr46, $sgpr47
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
+ ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
- ; CHECK-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr42, 0, $vgpr0
- ; CHECK-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr43, 1, $vgpr0
- ; CHECK-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr46, 2, $vgpr0
- ; CHECK-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr47, 3, $vgpr0
+ ; CHECK-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr46, 0, $vgpr0
+ ; CHECK-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr47, 1, $vgpr0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
diff --git a/llvm/test/CodeGen/AMDGPU/ds_read2.ll b/llvm/test/CodeGen/AMDGPU/ds_read2.ll
index 4d9c85ef99dcd..4c2e3f426d29f 100644
--- a/llvm/test/CodeGen/AMDGPU/ds_read2.ll
+++ b/llvm/test/CodeGen/AMDGPU/ds_read2.ll
@@ -1321,19 +1321,19 @@ bb:
define amdgpu_kernel void @ds_read_call_read(ptr addrspace(1) %out, ptr addrspace(3) %arg) {
; CI-LABEL: ds_read_call_read:
; CI: ; %bb.0:
-; CI-NEXT: s_getpc_b64 s[40:41]
-; CI-NEXT: s_mov_b32 s40, s0
-; CI-NEXT: s_load_dwordx4 s[40:43], s[40:41], 0x0
+; CI-NEXT: s_getpc_b64 s[64:65]
+; CI-NEXT: s_mov_b32 s64, s0
+; CI-NEXT: s_load_dwordx4 s[64:67], s[64:65], 0x0
; CI-NEXT: s_mov_b32 s14, s10
; CI-NEXT: v_lshlrev_b32_e32 v3, 2, v0
; CI-NEXT: s_mov_b32 m0, -1
; CI-NEXT: s_mov_b32 s12, s8
; CI-NEXT: s_waitcnt lgkmcnt(0)
-; CI-NEXT: s_add_u32 s40, s40, s11
+; CI-NEXT: s_add_u32 s64, s64, s11
; CI-NEXT: s_mov_b64 s[10:11], s[6:7]
-; CI-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x0
+; CI-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x0
; CI-NEXT: s_load_dword s6, s[4:5], 0x2
-; CI-NEXT: s_addc_u32 s41, s41, 0
+; CI-NEXT: s_addc_u32 s65, s65, 0
; CI-NEXT: s_add_u32 s8, s4, 12
; CI-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; CI-NEXT: s_mov_b32 s13, s9
@@ -1345,36 +1345,36 @@ define amdgpu_kernel void @ds_read_call_read(ptr addrspace(1) %out, ptr addrspac
; CI-NEXT: v_or_b32_e32 v0, v0, v1
; CI-NEXT: s_mov_b64 s[4:5], s[0:1]
; CI-NEXT: s_mov_b64 s[6:7], s[2:3]
-; CI-NEXT: s_mov_b64 s[0:1], s[40:41]
+; CI-NEXT: s_mov_b64 s[0:1], s[64:65]
; CI-NEXT: s_mov_b32 s17, void_func_void at abs32@hi
; CI-NEXT: s_mov_b32 s16, void_func_void at abs32@lo
; CI-NEXT: v_or_b32_e32 v31, v0, v2
-; CI-NEXT: s_mov_b64 s[2:3], s[42:43]
+; CI-NEXT: s_mov_b64 s[2:3], s[66:67]
; CI-NEXT: s_mov_b32 s32, 0
-; CI-NEXT: s_mov_b32 s39, 0xf000
-; CI-NEXT: s_mov_b32 s38, -1
+; CI-NEXT: s_mov_b32 s51, 0xf000
+; CI-NEXT: s_mov_b32 s50, -1
; CI-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CI-NEXT: ds_read_b32 v0, v40 offset:4
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: v_add_i32_e32 v0, vcc, v41, v0
-; CI-NEXT: buffer_store_dword v0, off, s[36:39], 0
+; CI-NEXT: buffer_store_dword v0, off, s[48:51], 0
; CI-NEXT: s_endpgm
;
; GFX9-LABEL: ds_read_call_read:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_getpc_b64 s[36:37]
-; GFX9-NEXT: s_mov_b32 s36, s0
-; GFX9-NEXT: s_load_dwordx4 s[36:39], s[36:37], 0x0
+; GFX9-NEXT: s_getpc_b64 s[48:49]
+; GFX9-NEXT: s_mov_b32 s48, s0
+; GFX9-NEXT: s_load_dwordx4 s[48:51], s[48:49], 0x0
; GFX9-NEXT: s_mov_b32 s14, s10
; GFX9-NEXT: s_mov_b32 s12, s8
; GFX9-NEXT: s_mov_b32 s13, s9
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_add_u32 s36, s36, s11
+; GFX9-NEXT: s_add_u32 s48, s48, s11
; GFX9-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX9-NEXT: s_load_dword s6, s[4:5], 0x8
; GFX9-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x0
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_add_u32 s8, s4, 12
; GFX9-NEXT: s_addc_u32 s9, s5, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
@@ -1383,11 +1383,11 @@ define amdgpu_kernel void @ds_read_call_read(ptr addrspace(1) %out, ptr addrspac
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: s_mov_b32 s17, void_func_void at abs32@hi
; GFX9-NEXT: s_mov_b32 s16, void_func_void at abs32@lo
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: v_mov_b32_e32 v40, 0
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
diff --git a/llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll b/llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll
index 8b02bdbb70b7b..f671ea5f10cd8 100644
--- a/llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll
+++ b/llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll
@@ -28,63 +28,63 @@ define weak_odr void @test(i32 %0) !dbg !34 {
; CHECK-NEXT: v_writelane_b32 v41, s35, 3
; CHECK-NEXT: v_writelane_b32 v41, s36, 4
; CHECK-NEXT: v_writelane_b32 v41, s37, 5
-; CHECK-NEXT: v_writelane_b32 v41, s38, 6
-; CHECK-NEXT: v_writelane_b32 v41, s39, 7
-; CHECK-NEXT: v_writelane_b32 v41, s40, 8
-; CHECK-NEXT: v_writelane_b32 v41, s41, 9
-; CHECK-NEXT: v_writelane_b32 v41, s42, 10
-; CHECK-NEXT: v_writelane_b32 v41, s43, 11
-; CHECK-NEXT: v_writelane_b32 v41, s44, 12
+; CHECK-NEXT: v_writelane_b32 v41, s46, 6
+; CHECK-NEXT: v_writelane_b32 v41, s47, 7
+; CHECK-NEXT: v_writelane_b32 v41, s48, 8
+; CHECK-NEXT: v_writelane_b32 v41, s49, 9
+; CHECK-NEXT: v_writelane_b32 v41, s50, 10
+; CHECK-NEXT: v_writelane_b32 v41, s51, 11
+; CHECK-NEXT: v_writelane_b32 v41, s52, 12
; CHECK-NEXT: s_addk_i32 s32, 0x400
-; CHECK-NEXT: v_writelane_b32 v41, s45, 13
-; CHECK-NEXT: v_writelane_b32 v41, s46, 14
-; CHECK-NEXT: s_mov_b64 s[40:41], s[4:5]
+; CHECK-NEXT: v_writelane_b32 v41, s53, 13
+; CHECK-NEXT: v_writelane_b32 v41, s62, 14
+; CHECK-NEXT: s_mov_b64 s[48:49], s[4:5]
; CHECK-NEXT: ;DEBUG_VALUE: dummy:dummy <- undef
; CHECK-NEXT: .Ltmp0:
; CHECK-NEXT: .loc 1 49 9 prologue_end ; dummy:49:9
; CHECK-NEXT: s_getpc_b64 s[4:5]
; CHECK-NEXT: s_add_u32 s4, s4, __kmpc_alloc_shared at gotpcrel32@lo+4
; CHECK-NEXT: s_addc_u32 s5, s5, __kmpc_alloc_shared at gotpcrel32@hi+12
-; CHECK-NEXT: v_writelane_b32 v41, s47, 15
-; CHECK-NEXT: s_load_dwordx2 s[46:47], s[4:5], 0x0
-; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
+; CHECK-NEXT: v_writelane_b32 v41, s63, 15
+; CHECK-NEXT: s_load_dwordx2 s[62:63], s[4:5], 0x0
+; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
; CHECK-NEXT: v_mov_b32_e32 v40, v31
-; CHECK-NEXT: s_mov_b32 s42, s15
-; CHECK-NEXT: s_mov_b32 s43, s14
-; CHECK-NEXT: s_mov_b32 s44, s13
-; CHECK-NEXT: s_mov_b32 s45, s12
+; CHECK-NEXT: s_mov_b32 s50, s15
+; CHECK-NEXT: s_mov_b32 s51, s14
+; CHECK-NEXT: s_mov_b32 s52, s13
+; CHECK-NEXT: s_mov_b32 s53, s12
; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11]
; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9]
-; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
+; CHECK-NEXT: s_mov_b64 s[46:47], s[6:7]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
-; CHECK-NEXT: s_swappc_b64 s[30:31], s[46:47]
-; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
-; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
+; CHECK-NEXT: s_swappc_b64 s[30:31], s[62:63]
+; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
+; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47]
; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37]
; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
-; CHECK-NEXT: s_mov_b32 s12, s45
-; CHECK-NEXT: s_mov_b32 s13, s44
-; CHECK-NEXT: s_mov_b32 s14, s43
-; CHECK-NEXT: s_mov_b32 s15, s42
+; CHECK-NEXT: s_mov_b32 s12, s53
+; CHECK-NEXT: s_mov_b32 s13, s52
+; CHECK-NEXT: s_mov_b32 s14, s51
+; CHECK-NEXT: s_mov_b32 s15, s50
; CHECK-NEXT: v_mov_b32_e32 v31, v40
-; CHECK-NEXT: s_swappc_b64 s[30:31], s[46:47]
+; CHECK-NEXT: s_swappc_b64 s[30:31], s[62:63]
; CHECK-NEXT: .Ltmp1:
; CHECK-NEXT: ;DEBUG_VALUE: dummy:dummy <- [$vgpr0_vgpr1+0]
; CHECK-NEXT: .loc 1 0 9 is_stmt 0 ; dummy:0:9
; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
; CHECK-NEXT: v_mov_b32_e32 v2, 0
; CHECK-NEXT: flat_store_dword v[0:1], v2
-; CHECK-NEXT: v_readlane_b32 s47, v41, 15
-; CHECK-NEXT: v_readlane_b32 s46, v41, 14
-; CHECK-NEXT: v_readlane_b32 s45, v41, 13
-; CHECK-NEXT: v_readlane_b32 s44, v41, 12
-; CHECK-NEXT: v_readlane_b32 s43, v41, 11
-; CHECK-NEXT: v_readlane_b32 s42, v41, 10
-; CHECK-NEXT: v_readlane_b32 s41, v41, 9
-; CHECK-NEXT: v_readlane_b32 s40, v41, 8
-; CHECK-NEXT: v_readlane_b32 s39, v41, 7
-; CHECK-NEXT: v_readlane_b32 s38, v41, 6
+; CHECK-NEXT: v_readlane_b32 s63, v41, 15
+; CHECK-NEXT: v_readlane_b32 s62, v41, 14
+; CHECK-NEXT: v_readlane_b32 s53, v41, 13
+; CHECK-NEXT: v_readlane_b32 s52, v41, 12
+; CHECK-NEXT: v_readlane_b32 s51, v41, 11
+; CHECK-NEXT: v_readlane_b32 s50, v41, 10
+; CHECK-NEXT: v_readlane_b32 s49, v41, 9
+; CHECK-NEXT: v_readlane_b32 s48, v41, 8
+; CHECK-NEXT: v_readlane_b32 s47, v41, 7
+; CHECK-NEXT: v_readlane_b32 s46, v41, 6
; CHECK-NEXT: v_readlane_b32 s37, v41, 5
; CHECK-NEXT: v_readlane_b32 s36, v41, 4
; CHECK-NEXT: v_readlane_b32 s35, v41, 3
diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-mov-b32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-mov-b32.mir
index 0714def30053d..4dd03a17f7caa 100644
--- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-mov-b32.mir
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-mov-b32.mir
@@ -142,9 +142,8 @@ body: |
; GFX1100-NEXT: renamable $sgpr20 = S_MOV_B32 killed $sgpr22
; GFX1100-NEXT: undef renamable $sgpr22 = COPY killed undef renamable $sgpr22, implicit-def $sgpr22_sgpr23
; GFX1100-NEXT: undef renamable $sgpr26 = COPY killed undef renamable $sgpr26, implicit-def $sgpr26_sgpr27
- ; GFX1100-NEXT: $sgpr32 = S_ADD_I32 $sgpr32, 8, implicit-def $scc
- ; GFX1100-NEXT: renamable $sgpr31 = S_MOV_B32 $sgpr32
- ; GFX1100-NEXT: $sgpr32 = S_ADD_I32 $sgpr32, -8, implicit-def $scc
+ ; GFX1100-NEXT: $sgpr38 = S_ADD_I32 $sgpr32, 8, implicit-def $scc
+ ; GFX1100-NEXT: renamable $sgpr31 = S_MOV_B32 killed $sgpr38
; GFX1100-NEXT: renamable $vgpr3 = COPY killed renamable $sgpr30, implicit $exec
; GFX1100-NEXT: renamable $vgpr0_vgpr1 = COPY renamable $sgpr28_sgpr29, implicit $exec
; GFX1100-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $vcc, implicit $exec
@@ -168,9 +167,8 @@ body: |
; GFX1200-NEXT: renamable $sgpr20 = S_MOV_B32 killed $sgpr22
; GFX1200-NEXT: undef renamable $sgpr22 = COPY killed undef renamable $sgpr22, implicit-def $sgpr22_sgpr23
; GFX1200-NEXT: undef renamable $sgpr26 = COPY killed undef renamable $sgpr26, implicit-def $sgpr26_sgpr27
- ; GFX1200-NEXT: $sgpr32 = S_ADD_I32 $sgpr32, 8, implicit-def $scc
- ; GFX1200-NEXT: renamable $sgpr31 = S_MOV_B32 $sgpr32
- ; GFX1200-NEXT: $sgpr32 = S_ADD_I32 $sgpr32, -8, implicit-def $scc
+ ; GFX1200-NEXT: $sgpr38 = S_ADD_I32 $sgpr32, 8, implicit-def $scc
+ ; GFX1200-NEXT: renamable $sgpr31 = S_MOV_B32 killed $sgpr38
; GFX1200-NEXT: renamable $vgpr3 = COPY killed renamable $sgpr30, implicit $exec
; GFX1200-NEXT: renamable $vgpr0_vgpr1 = COPY renamable $sgpr28_sgpr29, implicit $exec
; GFX1200-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $vcc, implicit $exec
@@ -706,8 +704,9 @@ body: |
; GFX8-NEXT: S_NOP 0, implicit-def $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63
; GFX8-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
; GFX8-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
- ; GFX8-NEXT: $vgpr0 = V_MAD_U32_U24_e64 24, 64, $sgpr32, 0, implicit $exec
- ; GFX8-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $vgpr0, implicit $exec
+ ; GFX8-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX8-NEXT: $sgpr4 = S_MOV_B32 24
+ ; GFX8-NEXT: $vgpr0, dead $sgpr70_sgpr71 = V_ADD_CO_U32_e64 killed $sgpr4, killed $vgpr0, 0, implicit $exec
; GFX8-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
; GFX8-NEXT: S_NOP 0, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX8-NEXT: S_NOP 0, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
@@ -810,10 +809,10 @@ body: |
; GFX1100-NEXT: S_NOP 0, implicit-def $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63
; GFX1100-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc_lo, implicit $exec
; GFX1100-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
- ; GFX1100-NEXT: $vcc_hi = S_ADDC_U32 $sgpr32, 24, implicit-def $scc, implicit $scc
- ; GFX1100-NEXT: S_BITCMP1_B32 $vcc_hi, 0, implicit-def $scc
- ; GFX1100-NEXT: $vcc_hi = S_BITSET0_B32 0, $vcc_hi
- ; GFX1100-NEXT: renamable $sgpr4 = S_MOV_B32 killed $vcc_hi
+ ; GFX1100-NEXT: $sgpr70 = S_ADDC_U32 $sgpr32, 24, implicit-def $scc, implicit $scc
+ ; GFX1100-NEXT: S_BITCMP1_B32 $sgpr70, 0, implicit-def $scc
+ ; GFX1100-NEXT: $sgpr70 = S_BITSET0_B32 0, $sgpr70
+ ; GFX1100-NEXT: renamable $sgpr4 = S_MOV_B32 killed $sgpr70
; GFX1100-NEXT: S_NOP 0, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX1100-NEXT: S_NOP 0, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX1100-NEXT: S_NOP 0, implicit $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23
@@ -837,10 +836,10 @@ body: |
; GFX1200-NEXT: S_NOP 0, implicit-def $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63
; GFX1200-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc_lo, implicit $exec
; GFX1200-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
- ; GFX1200-NEXT: $vcc_hi = S_ADDC_U32 $sgpr32, 24, implicit-def $scc, implicit $scc
- ; GFX1200-NEXT: S_BITCMP1_B32 $vcc_hi, 0, implicit-def $scc
- ; GFX1200-NEXT: $vcc_hi = S_BITSET0_B32 0, $vcc_hi
- ; GFX1200-NEXT: renamable $sgpr4 = S_MOV_B32 killed $vcc_hi
+ ; GFX1200-NEXT: $sgpr70 = S_ADDC_U32 $sgpr32, 24, implicit-def $scc, implicit $scc
+ ; GFX1200-NEXT: S_BITCMP1_B32 $sgpr70, 0, implicit-def $scc
+ ; GFX1200-NEXT: $sgpr70 = S_BITSET0_B32 0, $sgpr70
+ ; GFX1200-NEXT: renamable $sgpr4 = S_MOV_B32 killed $sgpr70
; GFX1200-NEXT: S_NOP 0, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX1200-NEXT: S_NOP 0, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX1200-NEXT: S_NOP 0, implicit $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23
@@ -900,9 +899,9 @@ body: |
; GFX8-NEXT: S_NOP 0, implicit-def $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63
; GFX8-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
; GFX8-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
- ; GFX8-NEXT: $vgpr0 = V_MOV_B32_e32 68, implicit $exec
- ; GFX8-NEXT: $vgpr0 = V_MAD_U32_U24_e64 killed $vgpr0, 64, $sgpr32, 0, implicit $exec
- ; GFX8-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $vgpr0, implicit $exec
+ ; GFX8-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX8-NEXT: $sgpr4 = S_MOV_B32 68
+ ; GFX8-NEXT: $vgpr0, dead $sgpr70_sgpr71 = V_ADD_CO_U32_e64 killed $sgpr4, killed $vgpr0, 0, implicit $exec
; GFX8-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
; GFX8-NEXT: S_NOP 0, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX8-NEXT: S_NOP 0, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
@@ -1005,10 +1004,10 @@ body: |
; GFX1100-NEXT: S_NOP 0, implicit-def $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63
; GFX1100-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc_lo, implicit $exec
; GFX1100-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
- ; GFX1100-NEXT: $vcc_hi = S_ADDC_U32 $sgpr32, 68, implicit-def $scc, implicit $scc
- ; GFX1100-NEXT: S_BITCMP1_B32 $vcc_hi, 0, implicit-def $scc
- ; GFX1100-NEXT: $vcc_hi = S_BITSET0_B32 0, $vcc_hi
- ; GFX1100-NEXT: renamable $sgpr4 = S_MOV_B32 killed $vcc_hi
+ ; GFX1100-NEXT: $sgpr70 = S_ADDC_U32 $sgpr32, 68, implicit-def $scc, implicit $scc
+ ; GFX1100-NEXT: S_BITCMP1_B32 $sgpr70, 0, implicit-def $scc
+ ; GFX1100-NEXT: $sgpr70 = S_BITSET0_B32 0, $sgpr70
+ ; GFX1100-NEXT: renamable $sgpr4 = S_MOV_B32 killed $sgpr70
; GFX1100-NEXT: S_NOP 0, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX1100-NEXT: S_NOP 0, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX1100-NEXT: S_NOP 0, implicit $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23
@@ -1032,10 +1031,10 @@ body: |
; GFX1200-NEXT: S_NOP 0, implicit-def $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63
; GFX1200-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc_lo, implicit $exec
; GFX1200-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
- ; GFX1200-NEXT: $vcc_hi = S_ADDC_U32 $sgpr32, 68, implicit-def $scc, implicit $scc
- ; GFX1200-NEXT: S_BITCMP1_B32 $vcc_hi, 0, implicit-def $scc
- ; GFX1200-NEXT: $vcc_hi = S_BITSET0_B32 0, $vcc_hi
- ; GFX1200-NEXT: renamable $sgpr4 = S_MOV_B32 killed $vcc_hi
+ ; GFX1200-NEXT: $sgpr70 = S_ADDC_U32 $sgpr32, 68, implicit-def $scc, implicit $scc
+ ; GFX1200-NEXT: S_BITCMP1_B32 $sgpr70, 0, implicit-def $scc
+ ; GFX1200-NEXT: $sgpr70 = S_BITSET0_B32 0, $sgpr70
+ ; GFX1200-NEXT: renamable $sgpr4 = S_MOV_B32 killed $sgpr70
; GFX1200-NEXT: S_NOP 0, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX1200-NEXT: S_NOP 0, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX1200-NEXT: S_NOP 0, implicit $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23
diff --git a/llvm/test/CodeGen/AMDGPU/function-args-inreg.ll b/llvm/test/CodeGen/AMDGPU/function-args-inreg.ll
index 32f255df82499..ff2fb986e7828 100644
--- a/llvm/test/CodeGen/AMDGPU/function-args-inreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/function-args-inreg.ll
@@ -2060,9 +2060,9 @@ define void @void_func_a13i32_inreg([13 x i32] inreg %arg0, ptr addrspace(1) %p
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_mov_b32 s29, s33
; GFX9-NEXT: s_mov_b32 s33, s32
-; GFX9-NEXT: s_or_saveexec_b64 vcc, -1
+; GFX9-NEXT: s_or_saveexec_b64 s[38:39], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, vcc
+; GFX9-NEXT: s_mov_b64 exec, s[38:39]
; GFX9-NEXT: v_mov_b32_e32 v2, s28
; GFX9-NEXT: global_store_dword v[0:1], v2, off offset:48
; GFX9-NEXT: v_mov_b32_e32 v5, s27
diff --git a/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll b/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll
index 22257d3eba7d6..3e84aa37fbcaa 100644
--- a/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll
+++ b/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll
@@ -336,14 +336,14 @@ define amdgpu_kernel void @indirect_2_level_use_stack() #0 {
; GCN-LABEL: {{^}}multi_call_use_use_stack:
; GCN: .set multi_call_use_use_stack.num_vgpr, max(41, use_stack0.num_vgpr, use_stack1.num_vgpr)
; GCN: .set multi_call_use_use_stack.num_agpr, max(0, use_stack0.num_agpr, use_stack1.num_agpr)
-; GCN: .set multi_call_use_use_stack.numbered_sgpr, max(44, use_stack0.numbered_sgpr, use_stack1.numbered_sgpr)
+; GCN: .set multi_call_use_use_stack.numbered_sgpr, max(52, use_stack0.numbered_sgpr, use_stack1.numbered_sgpr)
; GCN: .set multi_call_use_use_stack.private_seg_size, 0+(max(use_stack0.private_seg_size, use_stack1.private_seg_size))
; GCN: .set multi_call_use_use_stack.uses_vcc, or(1, use_stack0.uses_vcc, use_stack1.uses_vcc)
; GCN: .set multi_call_use_use_stack.uses_flat_scratch, or(1, use_stack0.uses_flat_scratch, use_stack1.uses_flat_scratch)
; GCN: .set multi_call_use_use_stack.has_dyn_sized_stack, or(0, use_stack0.has_dyn_sized_stack, use_stack1.has_dyn_sized_stack)
; GCN: .set multi_call_use_use_stack.has_recursion, or(0, use_stack0.has_recursion, use_stack1.has_recursion)
; GCN: .set multi_call_use_use_stack.has_indirect_call, or(0, use_stack0.has_indirect_call, use_stack1.has_indirect_call)
-; GCN: TotalNumSgprs: 50
+; GCN: TotalNumSgprs: 58
; GCN: NumVgprs: 41
; GCN: ScratchSize: 2052
define amdgpu_kernel void @multi_call_use_use_stack() #0 {
@@ -357,7 +357,7 @@ declare void @external() #0
; GCN-LABEL: {{^}}multi_call_with_external:
; GCN: .set multi_call_with_external.num_vgpr, max(41, amdgpu.max_num_vgpr)
; GCN: .set multi_call_with_external.num_agpr, max(0, amdgpu.max_num_agpr)
-; GCN: .set multi_call_with_external.numbered_sgpr, max(44, amdgpu.max_num_sgpr)
+; GCN: .set multi_call_with_external.numbered_sgpr, max(52, amdgpu.max_num_sgpr)
; GCN: .set multi_call_with_external.private_seg_size, 0+(max(use_stack0.private_seg_size, use_stack1.private_seg_size))
; GCN: .set multi_call_with_external.uses_vcc, 1
; GCN: .set multi_call_with_external.uses_flat_scratch, 1
@@ -377,7 +377,7 @@ define amdgpu_kernel void @multi_call_with_external() #0 {
; GCN-LABEL: {{^}}multi_call_with_external_and_duplicates:
; GCN: .set multi_call_with_external_and_duplicates.num_vgpr, max(41, amdgpu.max_num_vgpr)
; GCN: .set multi_call_with_external_and_duplicates.num_agpr, max(0, amdgpu.max_num_agpr)
-; GCN: .set multi_call_with_external_and_duplicates.numbered_sgpr, max(46, amdgpu.max_num_sgpr)
+; GCN: .set multi_call_with_external_and_duplicates.numbered_sgpr, max(54, amdgpu.max_num_sgpr)
; GCN: .set multi_call_with_external_and_duplicates.private_seg_size, 0+(max(use_stack0.private_seg_size, use_stack1.private_seg_size))
; GCN: .set multi_call_with_external_and_duplicates.uses_vcc, 1
; GCN: .set multi_call_with_external_and_duplicates.uses_flat_scratch, 1
@@ -594,7 +594,7 @@ define amdgpu_kernel void @usage_multi_stage_recurse_noattrs(i32 %n) #0 {
; GCN-LABEL: {{^}}multi_call_with_multi_stage_recurse:
; GCN: .set multi_call_with_multi_stage_recurse.num_vgpr, max(41, use_stack0.num_vgpr, use_stack1.num_vgpr, multi_stage_recurse1.num_vgpr)
; GCN: .set multi_call_with_multi_stage_recurse.num_agpr, max(0, use_stack0.num_agpr, use_stack1.num_agpr, multi_stage_recurse1.num_agpr)
-; GCN: .set multi_call_with_multi_stage_recurse.numbered_sgpr, max(45, use_stack0.numbered_sgpr, use_stack1.numbered_sgpr, multi_stage_recurse1.numbered_sgpr)
+; GCN: .set multi_call_with_multi_stage_recurse.numbered_sgpr, max(53, use_stack0.numbered_sgpr, use_stack1.numbered_sgpr, multi_stage_recurse1.numbered_sgpr)
; GCN: .set multi_call_with_multi_stage_recurse.private_seg_size, 0+(max(use_stack0.private_seg_size, use_stack1.private_seg_size, multi_stage_recurse1.private_seg_size))
; GCN: .set multi_call_with_multi_stage_recurse.uses_vcc, or(1, use_stack0.uses_vcc, use_stack1.uses_vcc, multi_stage_recurse1.uses_vcc)
; GCN: .set multi_call_with_multi_stage_recurse.uses_flat_scratch, or(1, use_stack0.uses_flat_scratch, use_stack1.uses_flat_scratch, multi_stage_recurse1.uses_flat_scratch)
diff --git a/llvm/test/CodeGen/AMDGPU/gfx-call-non-gfx-func.ll b/llvm/test/CodeGen/AMDGPU/gfx-call-non-gfx-func.ll
index 1ad365df2e8a8..be12d4be59106 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx-call-non-gfx-func.ll
+++ b/llvm/test/CodeGen/AMDGPU/gfx-call-non-gfx-func.ll
@@ -40,12 +40,44 @@ define amdgpu_gfx void @gfx_func() {
; SDAG-NEXT: v_writelane_b32 v40, s28, 24
; SDAG-NEXT: v_writelane_b32 v40, s29, 25
; SDAG-NEXT: v_writelane_b32 v40, s30, 26
+; SDAG-NEXT: v_writelane_b32 v40, s31, 27
+; SDAG-NEXT: v_writelane_b32 v40, s70, 28
+; SDAG-NEXT: v_writelane_b32 v40, s71, 29
+; SDAG-NEXT: v_writelane_b32 v40, s72, 30
+; SDAG-NEXT: v_writelane_b32 v40, s73, 31
+; SDAG-NEXT: v_writelane_b32 v40, s74, 32
+; SDAG-NEXT: v_writelane_b32 v40, s75, 33
+; SDAG-NEXT: v_writelane_b32 v40, s76, 34
+; SDAG-NEXT: v_writelane_b32 v40, s77, 35
+; SDAG-NEXT: v_writelane_b32 v40, s86, 36
+; SDAG-NEXT: v_writelane_b32 v40, s87, 37
+; SDAG-NEXT: v_writelane_b32 v40, s88, 38
+; SDAG-NEXT: v_writelane_b32 v40, s89, 39
+; SDAG-NEXT: v_writelane_b32 v40, s90, 40
+; SDAG-NEXT: v_writelane_b32 v40, s91, 41
+; SDAG-NEXT: v_writelane_b32 v40, s92, 42
; SDAG-NEXT: s_mov_b32 s35, extern_c_func at abs32@hi
; SDAG-NEXT: s_mov_b32 s34, extern_c_func at abs32@lo
; SDAG-NEXT: s_mov_b64 s[8:9], 0
; SDAG-NEXT: s_addk_i32 s32, 0x400
-; SDAG-NEXT: v_writelane_b32 v40, s31, 27
+; SDAG-NEXT: v_writelane_b32 v40, s93, 43
; SDAG-NEXT: s_swappc_b64 s[30:31], s[34:35]
+; SDAG-NEXT: v_readlane_b32 s93, v40, 43
+; SDAG-NEXT: v_readlane_b32 s92, v40, 42
+; SDAG-NEXT: v_readlane_b32 s91, v40, 41
+; SDAG-NEXT: v_readlane_b32 s90, v40, 40
+; SDAG-NEXT: v_readlane_b32 s89, v40, 39
+; SDAG-NEXT: v_readlane_b32 s88, v40, 38
+; SDAG-NEXT: v_readlane_b32 s87, v40, 37
+; SDAG-NEXT: v_readlane_b32 s86, v40, 36
+; SDAG-NEXT: v_readlane_b32 s77, v40, 35
+; SDAG-NEXT: v_readlane_b32 s76, v40, 34
+; SDAG-NEXT: v_readlane_b32 s75, v40, 33
+; SDAG-NEXT: v_readlane_b32 s74, v40, 32
+; SDAG-NEXT: v_readlane_b32 s73, v40, 31
+; SDAG-NEXT: v_readlane_b32 s72, v40, 30
+; SDAG-NEXT: v_readlane_b32 s71, v40, 29
+; SDAG-NEXT: v_readlane_b32 s70, v40, 28
; SDAG-NEXT: v_readlane_b32 s31, v40, 27
; SDAG-NEXT: v_readlane_b32 s30, v40, 26
; SDAG-NEXT: v_readlane_b32 s29, v40, 25
@@ -117,12 +149,44 @@ define amdgpu_gfx void @gfx_func() {
; GISEL-NEXT: v_writelane_b32 v40, s28, 24
; GISEL-NEXT: v_writelane_b32 v40, s29, 25
; GISEL-NEXT: v_writelane_b32 v40, s30, 26
+; GISEL-NEXT: v_writelane_b32 v40, s31, 27
+; GISEL-NEXT: v_writelane_b32 v40, s70, 28
+; GISEL-NEXT: v_writelane_b32 v40, s71, 29
+; GISEL-NEXT: v_writelane_b32 v40, s72, 30
+; GISEL-NEXT: v_writelane_b32 v40, s73, 31
+; GISEL-NEXT: v_writelane_b32 v40, s74, 32
+; GISEL-NEXT: v_writelane_b32 v40, s75, 33
+; GISEL-NEXT: v_writelane_b32 v40, s76, 34
+; GISEL-NEXT: v_writelane_b32 v40, s77, 35
+; GISEL-NEXT: v_writelane_b32 v40, s86, 36
+; GISEL-NEXT: v_writelane_b32 v40, s87, 37
+; GISEL-NEXT: v_writelane_b32 v40, s88, 38
+; GISEL-NEXT: v_writelane_b32 v40, s89, 39
+; GISEL-NEXT: v_writelane_b32 v40, s90, 40
+; GISEL-NEXT: v_writelane_b32 v40, s91, 41
+; GISEL-NEXT: v_writelane_b32 v40, s92, 42
; GISEL-NEXT: s_mov_b32 s34, extern_c_func at abs32@lo
; GISEL-NEXT: s_mov_b32 s35, extern_c_func at abs32@hi
; GISEL-NEXT: s_mov_b64 s[8:9], 0
; GISEL-NEXT: s_addk_i32 s32, 0x400
-; GISEL-NEXT: v_writelane_b32 v40, s31, 27
+; GISEL-NEXT: v_writelane_b32 v40, s93, 43
; GISEL-NEXT: s_swappc_b64 s[30:31], s[34:35]
+; GISEL-NEXT: v_readlane_b32 s93, v40, 43
+; GISEL-NEXT: v_readlane_b32 s92, v40, 42
+; GISEL-NEXT: v_readlane_b32 s91, v40, 41
+; GISEL-NEXT: v_readlane_b32 s90, v40, 40
+; GISEL-NEXT: v_readlane_b32 s89, v40, 39
+; GISEL-NEXT: v_readlane_b32 s88, v40, 38
+; GISEL-NEXT: v_readlane_b32 s87, v40, 37
+; GISEL-NEXT: v_readlane_b32 s86, v40, 36
+; GISEL-NEXT: v_readlane_b32 s77, v40, 35
+; GISEL-NEXT: v_readlane_b32 s76, v40, 34
+; GISEL-NEXT: v_readlane_b32 s75, v40, 33
+; GISEL-NEXT: v_readlane_b32 s74, v40, 32
+; GISEL-NEXT: v_readlane_b32 s73, v40, 31
+; GISEL-NEXT: v_readlane_b32 s72, v40, 30
+; GISEL-NEXT: v_readlane_b32 s71, v40, 29
+; GISEL-NEXT: v_readlane_b32 s70, v40, 28
; GISEL-NEXT: v_readlane_b32 s31, v40, 27
; GISEL-NEXT: v_readlane_b32 s30, v40, 26
; GISEL-NEXT: v_readlane_b32 s29, v40, 25
diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll
index 2e3ca34af4c74..ef230e4b877b0 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll
+++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll
@@ -9091,66 +9091,34 @@ define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 {
; GFX9-NEXT: v_writelane_b32 v40, s35, 3
; GFX9-NEXT: v_writelane_b32 v40, s36, 4
; GFX9-NEXT: v_writelane_b32 v40, s37, 5
-; GFX9-NEXT: v_writelane_b32 v40, s38, 6
-; GFX9-NEXT: v_writelane_b32 v40, s39, 7
-; GFX9-NEXT: v_writelane_b32 v40, s40, 8
-; GFX9-NEXT: v_writelane_b32 v40, s41, 9
-; GFX9-NEXT: v_writelane_b32 v40, s42, 10
-; GFX9-NEXT: v_writelane_b32 v40, s43, 11
-; GFX9-NEXT: v_writelane_b32 v40, s44, 12
-; GFX9-NEXT: v_writelane_b32 v40, s45, 13
-; GFX9-NEXT: v_writelane_b32 v40, s46, 14
-; GFX9-NEXT: v_writelane_b32 v40, s47, 15
-; GFX9-NEXT: v_writelane_b32 v40, s48, 16
-; GFX9-NEXT: v_writelane_b32 v40, s49, 17
-; GFX9-NEXT: v_writelane_b32 v40, s50, 18
-; GFX9-NEXT: v_writelane_b32 v40, s51, 19
-; GFX9-NEXT: v_writelane_b32 v40, s52, 20
-; GFX9-NEXT: v_writelane_b32 v40, s53, 21
-; GFX9-NEXT: v_writelane_b32 v40, s54, 22
-; GFX9-NEXT: v_writelane_b32 v40, s55, 23
-; GFX9-NEXT: v_writelane_b32 v40, s56, 24
-; GFX9-NEXT: v_writelane_b32 v40, s57, 25
-; GFX9-NEXT: v_writelane_b32 v40, s58, 26
-; GFX9-NEXT: v_writelane_b32 v40, s59, 27
-; GFX9-NEXT: v_writelane_b32 v40, s60, 28
-; GFX9-NEXT: v_writelane_b32 v40, s61, 29
+; GFX9-NEXT: v_writelane_b32 v40, s46, 6
+; GFX9-NEXT: v_writelane_b32 v40, s47, 7
+; GFX9-NEXT: v_writelane_b32 v40, s48, 8
+; GFX9-NEXT: v_writelane_b32 v40, s49, 9
+; GFX9-NEXT: v_writelane_b32 v40, s50, 10
+; GFX9-NEXT: v_writelane_b32 v40, s51, 11
+; GFX9-NEXT: v_writelane_b32 v40, s52, 12
+; GFX9-NEXT: v_writelane_b32 v40, s53, 13
; GFX9-NEXT: s_addk_i32 s32, 0x800
-; GFX9-NEXT: v_writelane_b32 v40, s62, 30
+; GFX9-NEXT: v_writelane_b32 v40, s62, 14
; GFX9-NEXT: s_mov_b32 s5, byval_align16_f64_arg at abs32@hi
; GFX9-NEXT: s_mov_b32 s4, byval_align16_f64_arg at abs32@lo
-; GFX9-NEXT: v_writelane_b32 v40, s63, 31
+; GFX9-NEXT: v_writelane_b32 v40, s63, 15
; GFX9-NEXT: s_waitcnt vmcnt(2)
; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32
; GFX9-NEXT: s_waitcnt vmcnt(2)
; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:4
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: v_readlane_b32 s63, v40, 31
-; GFX9-NEXT: v_readlane_b32 s62, v40, 30
-; GFX9-NEXT: v_readlane_b32 s61, v40, 29
-; GFX9-NEXT: v_readlane_b32 s60, v40, 28
-; GFX9-NEXT: v_readlane_b32 s59, v40, 27
-; GFX9-NEXT: v_readlane_b32 s58, v40, 26
-; GFX9-NEXT: v_readlane_b32 s57, v40, 25
-; GFX9-NEXT: v_readlane_b32 s56, v40, 24
-; GFX9-NEXT: v_readlane_b32 s55, v40, 23
-; GFX9-NEXT: v_readlane_b32 s54, v40, 22
-; GFX9-NEXT: v_readlane_b32 s53, v40, 21
-; GFX9-NEXT: v_readlane_b32 s52, v40, 20
-; GFX9-NEXT: v_readlane_b32 s51, v40, 19
-; GFX9-NEXT: v_readlane_b32 s50, v40, 18
-; GFX9-NEXT: v_readlane_b32 s49, v40, 17
-; GFX9-NEXT: v_readlane_b32 s48, v40, 16
-; GFX9-NEXT: v_readlane_b32 s47, v40, 15
-; GFX9-NEXT: v_readlane_b32 s46, v40, 14
-; GFX9-NEXT: v_readlane_b32 s45, v40, 13
-; GFX9-NEXT: v_readlane_b32 s44, v40, 12
-; GFX9-NEXT: v_readlane_b32 s43, v40, 11
-; GFX9-NEXT: v_readlane_b32 s42, v40, 10
-; GFX9-NEXT: v_readlane_b32 s41, v40, 9
-; GFX9-NEXT: v_readlane_b32 s40, v40, 8
-; GFX9-NEXT: v_readlane_b32 s39, v40, 7
-; GFX9-NEXT: v_readlane_b32 s38, v40, 6
+; GFX9-NEXT: v_readlane_b32 s63, v40, 15
+; GFX9-NEXT: v_readlane_b32 s62, v40, 14
+; GFX9-NEXT: v_readlane_b32 s53, v40, 13
+; GFX9-NEXT: v_readlane_b32 s52, v40, 12
+; GFX9-NEXT: v_readlane_b32 s51, v40, 11
+; GFX9-NEXT: v_readlane_b32 s50, v40, 10
+; GFX9-NEXT: v_readlane_b32 s49, v40, 9
+; GFX9-NEXT: v_readlane_b32 s48, v40, 8
+; GFX9-NEXT: v_readlane_b32 s47, v40, 7
+; GFX9-NEXT: v_readlane_b32 s46, v40, 6
; GFX9-NEXT: v_readlane_b32 s37, v40, 5
; GFX9-NEXT: v_readlane_b32 s36, v40, 4
; GFX9-NEXT: v_readlane_b32 s35, v40, 3
@@ -9191,59 +9159,27 @@ define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 {
; GFX10-NEXT: v_writelane_b32 v40, s35, 3
; GFX10-NEXT: v_writelane_b32 v40, s36, 4
; GFX10-NEXT: v_writelane_b32 v40, s37, 5
-; GFX10-NEXT: v_writelane_b32 v40, s38, 6
-; GFX10-NEXT: v_writelane_b32 v40, s39, 7
-; GFX10-NEXT: v_writelane_b32 v40, s40, 8
-; GFX10-NEXT: v_writelane_b32 v40, s41, 9
-; GFX10-NEXT: v_writelane_b32 v40, s42, 10
-; GFX10-NEXT: v_writelane_b32 v40, s43, 11
-; GFX10-NEXT: v_writelane_b32 v40, s44, 12
-; GFX10-NEXT: v_writelane_b32 v40, s45, 13
-; GFX10-NEXT: v_writelane_b32 v40, s46, 14
-; GFX10-NEXT: v_writelane_b32 v40, s47, 15
-; GFX10-NEXT: v_writelane_b32 v40, s48, 16
-; GFX10-NEXT: v_writelane_b32 v40, s49, 17
-; GFX10-NEXT: v_writelane_b32 v40, s50, 18
-; GFX10-NEXT: v_writelane_b32 v40, s51, 19
-; GFX10-NEXT: v_writelane_b32 v40, s52, 20
-; GFX10-NEXT: v_writelane_b32 v40, s53, 21
-; GFX10-NEXT: v_writelane_b32 v40, s54, 22
-; GFX10-NEXT: v_writelane_b32 v40, s55, 23
-; GFX10-NEXT: v_writelane_b32 v40, s56, 24
-; GFX10-NEXT: v_writelane_b32 v40, s57, 25
-; GFX10-NEXT: v_writelane_b32 v40, s58, 26
-; GFX10-NEXT: v_writelane_b32 v40, s59, 27
-; GFX10-NEXT: v_writelane_b32 v40, s60, 28
-; GFX10-NEXT: v_writelane_b32 v40, s61, 29
-; GFX10-NEXT: v_writelane_b32 v40, s62, 30
-; GFX10-NEXT: v_writelane_b32 v40, s63, 31
+; GFX10-NEXT: v_writelane_b32 v40, s46, 6
+; GFX10-NEXT: v_writelane_b32 v40, s47, 7
+; GFX10-NEXT: v_writelane_b32 v40, s48, 8
+; GFX10-NEXT: v_writelane_b32 v40, s49, 9
+; GFX10-NEXT: v_writelane_b32 v40, s50, 10
+; GFX10-NEXT: v_writelane_b32 v40, s51, 11
+; GFX10-NEXT: v_writelane_b32 v40, s52, 12
+; GFX10-NEXT: v_writelane_b32 v40, s53, 13
+; GFX10-NEXT: v_writelane_b32 v40, s62, 14
+; GFX10-NEXT: v_writelane_b32 v40, s63, 15
; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX10-NEXT: v_readlane_b32 s63, v40, 31
-; GFX10-NEXT: v_readlane_b32 s62, v40, 30
-; GFX10-NEXT: v_readlane_b32 s61, v40, 29
-; GFX10-NEXT: v_readlane_b32 s60, v40, 28
-; GFX10-NEXT: v_readlane_b32 s59, v40, 27
-; GFX10-NEXT: v_readlane_b32 s58, v40, 26
-; GFX10-NEXT: v_readlane_b32 s57, v40, 25
-; GFX10-NEXT: v_readlane_b32 s56, v40, 24
-; GFX10-NEXT: v_readlane_b32 s55, v40, 23
-; GFX10-NEXT: v_readlane_b32 s54, v40, 22
-; GFX10-NEXT: v_readlane_b32 s53, v40, 21
-; GFX10-NEXT: v_readlane_b32 s52, v40, 20
-; GFX10-NEXT: v_readlane_b32 s51, v40, 19
-; GFX10-NEXT: v_readlane_b32 s50, v40, 18
-; GFX10-NEXT: v_readlane_b32 s49, v40, 17
-; GFX10-NEXT: v_readlane_b32 s48, v40, 16
-; GFX10-NEXT: v_readlane_b32 s47, v40, 15
-; GFX10-NEXT: v_readlane_b32 s46, v40, 14
-; GFX10-NEXT: v_readlane_b32 s45, v40, 13
-; GFX10-NEXT: v_readlane_b32 s44, v40, 12
-; GFX10-NEXT: v_readlane_b32 s43, v40, 11
-; GFX10-NEXT: v_readlane_b32 s42, v40, 10
-; GFX10-NEXT: v_readlane_b32 s41, v40, 9
-; GFX10-NEXT: v_readlane_b32 s40, v40, 8
-; GFX10-NEXT: v_readlane_b32 s39, v40, 7
-; GFX10-NEXT: v_readlane_b32 s38, v40, 6
+; GFX10-NEXT: v_readlane_b32 s63, v40, 15
+; GFX10-NEXT: v_readlane_b32 s62, v40, 14
+; GFX10-NEXT: v_readlane_b32 s53, v40, 13
+; GFX10-NEXT: v_readlane_b32 s52, v40, 12
+; GFX10-NEXT: v_readlane_b32 s51, v40, 11
+; GFX10-NEXT: v_readlane_b32 s50, v40, 10
+; GFX10-NEXT: v_readlane_b32 s49, v40, 9
+; GFX10-NEXT: v_readlane_b32 s48, v40, 8
+; GFX10-NEXT: v_readlane_b32 s47, v40, 7
+; GFX10-NEXT: v_readlane_b32 s46, v40, 6
; GFX10-NEXT: v_readlane_b32 s37, v40, 5
; GFX10-NEXT: v_readlane_b32 s36, v40, 4
; GFX10-NEXT: v_readlane_b32 s35, v40, 3
@@ -9279,61 +9215,29 @@ define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 {
; GFX11-NEXT: v_writelane_b32 v40, s35, 3
; GFX11-NEXT: v_writelane_b32 v40, s36, 4
; GFX11-NEXT: v_writelane_b32 v40, s37, 5
-; GFX11-NEXT: v_writelane_b32 v40, s38, 6
-; GFX11-NEXT: v_writelane_b32 v40, s39, 7
-; GFX11-NEXT: v_writelane_b32 v40, s40, 8
-; GFX11-NEXT: v_writelane_b32 v40, s41, 9
-; GFX11-NEXT: v_writelane_b32 v40, s42, 10
-; GFX11-NEXT: v_writelane_b32 v40, s43, 11
-; GFX11-NEXT: v_writelane_b32 v40, s44, 12
-; GFX11-NEXT: v_writelane_b32 v40, s45, 13
-; GFX11-NEXT: v_writelane_b32 v40, s46, 14
-; GFX11-NEXT: v_writelane_b32 v40, s47, 15
-; GFX11-NEXT: v_writelane_b32 v40, s48, 16
-; GFX11-NEXT: v_writelane_b32 v40, s49, 17
-; GFX11-NEXT: v_writelane_b32 v40, s50, 18
-; GFX11-NEXT: v_writelane_b32 v40, s51, 19
-; GFX11-NEXT: v_writelane_b32 v40, s52, 20
-; GFX11-NEXT: v_writelane_b32 v40, s53, 21
-; GFX11-NEXT: v_writelane_b32 v40, s54, 22
-; GFX11-NEXT: v_writelane_b32 v40, s55, 23
-; GFX11-NEXT: v_writelane_b32 v40, s56, 24
-; GFX11-NEXT: v_writelane_b32 v40, s57, 25
-; GFX11-NEXT: v_writelane_b32 v40, s58, 26
-; GFX11-NEXT: v_writelane_b32 v40, s59, 27
-; GFX11-NEXT: v_writelane_b32 v40, s60, 28
-; GFX11-NEXT: v_writelane_b32 v40, s61, 29
-; GFX11-NEXT: v_writelane_b32 v40, s62, 30
-; GFX11-NEXT: v_writelane_b32 v40, s63, 31
+; GFX11-NEXT: v_writelane_b32 v40, s46, 6
+; GFX11-NEXT: v_writelane_b32 v40, s47, 7
+; GFX11-NEXT: v_writelane_b32 v40, s48, 8
+; GFX11-NEXT: v_writelane_b32 v40, s49, 9
+; GFX11-NEXT: v_writelane_b32 v40, s50, 10
+; GFX11-NEXT: v_writelane_b32 v40, s51, 11
+; GFX11-NEXT: v_writelane_b32 v40, s52, 12
+; GFX11-NEXT: v_writelane_b32 v40, s53, 13
+; GFX11-NEXT: v_writelane_b32 v40, s62, 14
+; GFX11-NEXT: v_writelane_b32 v40, s63, 15
; GFX11-NEXT: s_waitcnt vmcnt(1)
; GFX11-NEXT: scratch_store_b64 off, v[32:33], s32
; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX11-NEXT: v_readlane_b32 s63, v40, 31
-; GFX11-NEXT: v_readlane_b32 s62, v40, 30
-; GFX11-NEXT: v_readlane_b32 s61, v40, 29
-; GFX11-NEXT: v_readlane_b32 s60, v40, 28
-; GFX11-NEXT: v_readlane_b32 s59, v40, 27
-; GFX11-NEXT: v_readlane_b32 s58, v40, 26
-; GFX11-NEXT: v_readlane_b32 s57, v40, 25
-; GFX11-NEXT: v_readlane_b32 s56, v40, 24
-; GFX11-NEXT: v_readlane_b32 s55, v40, 23
-; GFX11-NEXT: v_readlane_b32 s54, v40, 22
-; GFX11-NEXT: v_readlane_b32 s53, v40, 21
-; GFX11-NEXT: v_readlane_b32 s52, v40, 20
-; GFX11-NEXT: v_readlane_b32 s51, v40, 19
-; GFX11-NEXT: v_readlane_b32 s50, v40, 18
-; GFX11-NEXT: v_readlane_b32 s49, v40, 17
-; GFX11-NEXT: v_readlane_b32 s48, v40, 16
-; GFX11-NEXT: v_readlane_b32 s47, v40, 15
-; GFX11-NEXT: v_readlane_b32 s46, v40, 14
-; GFX11-NEXT: v_readlane_b32 s45, v40, 13
-; GFX11-NEXT: v_readlane_b32 s44, v40, 12
-; GFX11-NEXT: v_readlane_b32 s43, v40, 11
-; GFX11-NEXT: v_readlane_b32 s42, v40, 10
-; GFX11-NEXT: v_readlane_b32 s41, v40, 9
-; GFX11-NEXT: v_readlane_b32 s40, v40, 8
-; GFX11-NEXT: v_readlane_b32 s39, v40, 7
-; GFX11-NEXT: v_readlane_b32 s38, v40, 6
+; GFX11-NEXT: v_readlane_b32 s63, v40, 15
+; GFX11-NEXT: v_readlane_b32 s62, v40, 14
+; GFX11-NEXT: v_readlane_b32 s53, v40, 13
+; GFX11-NEXT: v_readlane_b32 s52, v40, 12
+; GFX11-NEXT: v_readlane_b32 s51, v40, 11
+; GFX11-NEXT: v_readlane_b32 s50, v40, 10
+; GFX11-NEXT: v_readlane_b32 s49, v40, 9
+; GFX11-NEXT: v_readlane_b32 s48, v40, 8
+; GFX11-NEXT: v_readlane_b32 s47, v40, 7
+; GFX11-NEXT: v_readlane_b32 s46, v40, 6
; GFX11-NEXT: v_readlane_b32 s37, v40, 5
; GFX11-NEXT: v_readlane_b32 s36, v40, 4
; GFX11-NEXT: v_readlane_b32 s35, v40, 3
@@ -9369,61 +9273,29 @@ define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 {
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s35, 3
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s36, 4
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s37, 5
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s38, 6
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s39, 7
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s40, 8
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s41, 9
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s42, 10
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s43, 11
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s44, 12
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s45, 13
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s46, 14
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s47, 15
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s48, 16
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s49, 17
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s50, 18
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s51, 19
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s52, 20
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s53, 21
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s54, 22
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s55, 23
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s56, 24
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s57, 25
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s58, 26
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s59, 27
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s60, 28
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s61, 29
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s62, 30
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s63, 31
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s46, 6
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s47, 7
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s48, 8
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s49, 9
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s50, 10
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s51, 11
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s52, 12
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s53, 13
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s62, 14
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s63, 15
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(1)
; GFX10-SCRATCH-NEXT: scratch_store_dwordx2 off, v[32:33], s32
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s63, v40, 31
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s62, v40, 30
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s61, v40, 29
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s60, v40, 28
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s59, v40, 27
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s58, v40, 26
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s57, v40, 25
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s56, v40, 24
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s55, v40, 23
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s54, v40, 22
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s53, v40, 21
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s52, v40, 20
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s51, v40, 19
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s50, v40, 18
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s49, v40, 17
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s48, v40, 16
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s47, v40, 15
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s46, v40, 14
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s45, v40, 13
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s44, v40, 12
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s43, v40, 11
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s42, v40, 10
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s41, v40, 9
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s40, v40, 8
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s39, v40, 7
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s38, v40, 6
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s63, v40, 15
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s62, v40, 14
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s53, v40, 13
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s52, v40, 12
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s51, v40, 11
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s50, v40, 10
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s49, v40, 9
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s48, v40, 8
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s47, v40, 7
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s46, v40, 6
; GFX10-SCRATCH-NEXT: v_readlane_b32 s37, v40, 5
; GFX10-SCRATCH-NEXT: v_readlane_b32 s36, v40, 4
; GFX10-SCRATCH-NEXT: v_readlane_b32 s35, v40, 3
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll
index 15be44a335a1d..3c85914536f28 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll
@@ -365,12 +365,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX7LESS-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_align4_unsafe:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s38, -1
-; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s36, s36, s11
-; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0
+; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
+; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
; GFX7LESS-NEXT: s_mov_b32 s14, s10
; GFX7LESS-NEXT: s_mov_b32 s13, s9
; GFX7LESS-NEXT: s_mov_b32 s12, s8
@@ -388,8 +388,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -437,12 +437,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
;
; GFX9-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_align4_unsafe:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s11
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s11
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-NEXT: s_mov_b32 s12, s8
; GFX9-NEXT: s_add_u32 s8, s34, 44
@@ -458,9 +458,9 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -504,13 +504,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
;
; GFX1064-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_align4_unsafe:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s38, -1
-; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s36, s36, s11
+; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s50, -1
+; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s48, s48, s11
; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-NEXT: s_addc_u32 s49, s49, 0
; GFX1064-NEXT: s_mov_b32 s12, s8
; GFX1064-NEXT: s_add_u32 s8, s34, 44
; GFX1064-NEXT: s_mov_b32 s13, s9
@@ -526,8 +526,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1064-NEXT: s_mov_b32 s32, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -571,13 +571,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
;
; GFX1032-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_align4_unsafe:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s38, -1
-; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s36, s36, s11
+; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s50, -1
+; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s48, s48, s11
; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-NEXT: s_mov_b32 s12, s8
; GFX1032-NEXT: s_add_u32 s8, s34, 44
; GFX1032-NEXT: s_mov_b32 s13, s9
@@ -593,8 +593,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1032-NEXT: s_mov_b32 s32, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -735,19 +735,19 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX7LESS-DPP-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_align4_unsafe:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s42, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s43, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s40, s40, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s41, s41, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5]
@@ -760,11 +760,11 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[40:41]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[42:43]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[36:39], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0
; GFX7LESS-DPP-NEXT: .LBB1_1: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -773,7 +773,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v2
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v1
-; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc
+; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v2
; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
@@ -785,12 +785,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
;
; GFX9-DPP-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_align4_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s38, -1
-; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11
-; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s50, -1
+; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-DPP-NEXT: s_mov_b32 s12, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44
@@ -806,9 +806,9 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -867,13 +867,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
;
; GFX1064-DPP-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_align4_unsafe:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s38, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX1064-DPP-NEXT: s_mov_b32 s12, s8
; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1064-DPP-NEXT: s_mov_b32 s13, s9
@@ -889,8 +889,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -944,13 +944,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
;
; GFX1032-DPP-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_align4_unsafe:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s38, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-DPP-NEXT: s_mov_b32 s12, s8
; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1032-DPP-NEXT: s_mov_b32 s13, s9
@@ -966,8 +966,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -1585,12 +1585,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
; GFX7LESS-LABEL: global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s38, -1
-; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s36, s36, s11
-; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0
+; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
+; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
; GFX7LESS-NEXT: s_mov_b32 s14, s10
; GFX7LESS-NEXT: s_mov_b32 s13, s9
; GFX7LESS-NEXT: s_mov_b32 s12, s8
@@ -1608,8 +1608,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -1657,12 +1657,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
;
; GFX9-LABEL: global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s11
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s11
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-NEXT: s_mov_b32 s12, s8
; GFX9-NEXT: s_add_u32 s8, s34, 44
@@ -1678,9 +1678,9 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -1724,13 +1724,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
;
; GFX1064-LABEL: global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s38, -1
-; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s36, s36, s11
+; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s50, -1
+; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s48, s48, s11
; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-NEXT: s_addc_u32 s49, s49, 0
; GFX1064-NEXT: s_mov_b32 s12, s8
; GFX1064-NEXT: s_add_u32 s8, s34, 44
; GFX1064-NEXT: s_mov_b32 s13, s9
@@ -1746,8 +1746,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1064-NEXT: s_mov_b32 s32, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -1791,13 +1791,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
;
; GFX1032-LABEL: global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s38, -1
-; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s36, s36, s11
+; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s50, -1
+; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s48, s48, s11
; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-NEXT: s_mov_b32 s12, s8
; GFX1032-NEXT: s_add_u32 s8, s34, 44
; GFX1032-NEXT: s_mov_b32 s13, s9
@@ -1813,8 +1813,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1032-NEXT: s_mov_b32 s32, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -1955,19 +1955,19 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
; GFX7LESS-DPP-LABEL: global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s42, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s43, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s40, s40, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s41, s41, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5]
@@ -1980,11 +1980,11 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[40:41]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[42:43]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[36:39], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0
; GFX7LESS-DPP-NEXT: .LBB3_1: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -1993,7 +1993,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v2
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v1
-; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc
+; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v2
; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
@@ -2005,12 +2005,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
;
; GFX9-DPP-LABEL: global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s38, -1
-; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11
-; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s50, -1
+; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-DPP-NEXT: s_mov_b32 s12, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44
@@ -2026,9 +2026,9 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -2087,13 +2087,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
;
; GFX1064-DPP-LABEL: global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s38, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX1064-DPP-NEXT: s_mov_b32 s12, s8
; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1064-DPP-NEXT: s_mov_b32 s13, s9
@@ -2109,8 +2109,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -2164,13 +2164,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
;
; GFX1032-DPP-LABEL: global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s38, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-DPP-NEXT: s_mov_b32 s12, s8
; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1032-DPP-NEXT: s_mov_b32 s13, s9
@@ -2186,8 +2186,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -2865,12 +2865,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX7LESS-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s38, -1
-; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s36, s36, s11
-; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0
+; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
+; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
; GFX7LESS-NEXT: s_mov_b32 s14, s10
; GFX7LESS-NEXT: s_mov_b32 s13, s9
; GFX7LESS-NEXT: s_mov_b32 s12, s8
@@ -2888,8 +2888,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -2937,12 +2937,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
;
; GFX9-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s11
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s11
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-NEXT: s_mov_b32 s12, s8
; GFX9-NEXT: s_add_u32 s8, s34, 44
@@ -2958,9 +2958,9 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -3004,13 +3004,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
;
; GFX1064-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s38, -1
-; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s36, s36, s11
+; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s50, -1
+; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s48, s48, s11
; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-NEXT: s_addc_u32 s49, s49, 0
; GFX1064-NEXT: s_mov_b32 s12, s8
; GFX1064-NEXT: s_add_u32 s8, s34, 44
; GFX1064-NEXT: s_mov_b32 s13, s9
@@ -3026,8 +3026,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1064-NEXT: s_mov_b32 s32, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -3071,13 +3071,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
;
; GFX1032-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s38, -1
-; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s36, s36, s11
+; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s50, -1
+; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s48, s48, s11
; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-NEXT: s_mov_b32 s12, s8
; GFX1032-NEXT: s_add_u32 s8, s34, 44
; GFX1032-NEXT: s_mov_b32 s13, s9
@@ -3093,8 +3093,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1032-NEXT: s_mov_b32 s32, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -3235,19 +3235,19 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX7LESS-DPP-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s42, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s43, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s40, s40, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s41, s41, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5]
@@ -3260,11 +3260,11 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[40:41]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[42:43]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[36:39], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0
; GFX7LESS-DPP-NEXT: .LBB5_1: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -3273,7 +3273,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v2
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v1
-; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc
+; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v2
; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
@@ -3285,12 +3285,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
;
; GFX9-DPP-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s38, -1
-; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11
-; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s50, -1
+; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-DPP-NEXT: s_mov_b32 s12, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44
@@ -3306,9 +3306,9 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -3367,13 +3367,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
;
; GFX1064-DPP-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s38, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX1064-DPP-NEXT: s_mov_b32 s12, s8
; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1064-DPP-NEXT: s_mov_b32 s13, s9
@@ -3389,8 +3389,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -3444,13 +3444,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
;
; GFX1032-DPP-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s38, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-DPP-NEXT: s_mov_b32 s12, s8
; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1032-DPP-NEXT: s_mov_b32 s13, s9
@@ -3466,8 +3466,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -3641,12 +3641,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX7LESS-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s38, -1
-; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s36, s36, s11
-; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0
+; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
+; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
; GFX7LESS-NEXT: s_mov_b32 s14, s10
; GFX7LESS-NEXT: s_mov_b32 s13, s9
; GFX7LESS-NEXT: s_mov_b32 s12, s8
@@ -3664,8 +3664,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -3713,12 +3713,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
;
; GFX9-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s11
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s11
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-NEXT: s_mov_b32 s12, s8
; GFX9-NEXT: s_add_u32 s8, s34, 44
@@ -3734,9 +3734,9 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -3780,13 +3780,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
;
; GFX1064-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s38, -1
-; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s36, s36, s11
+; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s50, -1
+; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s48, s48, s11
; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-NEXT: s_addc_u32 s49, s49, 0
; GFX1064-NEXT: s_mov_b32 s12, s8
; GFX1064-NEXT: s_add_u32 s8, s34, 44
; GFX1064-NEXT: s_mov_b32 s13, s9
@@ -3802,8 +3802,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1064-NEXT: s_mov_b32 s32, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -3847,13 +3847,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
;
; GFX1032-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s38, -1
-; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s36, s36, s11
+; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s50, -1
+; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s48, s48, s11
; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-NEXT: s_mov_b32 s12, s8
; GFX1032-NEXT: s_add_u32 s8, s34, 44
; GFX1032-NEXT: s_mov_b32 s13, s9
@@ -3869,8 +3869,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1032-NEXT: s_mov_b32 s32, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -4011,19 +4011,19 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX7LESS-DPP-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s42, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s43, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s40, s40, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s41, s41, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5]
@@ -4036,11 +4036,11 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[40:41]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[42:43]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[36:39], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0
; GFX7LESS-DPP-NEXT: .LBB6_1: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -4049,7 +4049,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v2
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v1
-; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc
+; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v2
; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
@@ -4061,12 +4061,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
;
; GFX9-DPP-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s38, -1
-; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11
-; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s50, -1
+; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-DPP-NEXT: s_mov_b32 s12, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44
@@ -4082,9 +4082,9 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -4143,13 +4143,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
;
; GFX1064-DPP-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s38, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX1064-DPP-NEXT: s_mov_b32 s12, s8
; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1064-DPP-NEXT: s_mov_b32 s13, s9
@@ -4165,8 +4165,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -4220,13 +4220,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
;
; GFX1032-DPP-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s38, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-DPP-NEXT: s_mov_b32 s12, s8
; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1032-DPP-NEXT: s_mov_b32 s13, s9
@@ -4242,8 +4242,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -4920,12 +4920,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
; GFX7LESS-LABEL: global_atomic_fadd_uni_address_div_value_default_scope_strictfp:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s38, -1
-; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s36, s36, s11
-; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0
+; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
+; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
; GFX7LESS-NEXT: s_mov_b32 s14, s10
; GFX7LESS-NEXT: s_mov_b32 s13, s9
; GFX7LESS-NEXT: s_mov_b32 s12, s8
@@ -4943,8 +4943,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -4992,12 +4992,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
;
; GFX9-LABEL: global_atomic_fadd_uni_address_div_value_default_scope_strictfp:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s11
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s11
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-NEXT: s_mov_b32 s12, s8
; GFX9-NEXT: s_add_u32 s8, s34, 44
@@ -5013,9 +5013,9 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -5059,13 +5059,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
;
; GFX1064-LABEL: global_atomic_fadd_uni_address_div_value_default_scope_strictfp:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s38, -1
-; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s36, s36, s11
+; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s50, -1
+; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s48, s48, s11
; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-NEXT: s_addc_u32 s49, s49, 0
; GFX1064-NEXT: s_mov_b32 s12, s8
; GFX1064-NEXT: s_add_u32 s8, s34, 44
; GFX1064-NEXT: s_mov_b32 s13, s9
@@ -5081,8 +5081,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1064-NEXT: s_mov_b32 s32, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -5126,16 +5126,16 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
;
; GFX1032-LABEL: global_atomic_fadd_uni_address_div_value_default_scope_strictfp:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s38, -1
-; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s36, s36, s11
-; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_addc_u32 s37, s37, 0
-; GFX1032-NEXT: s_mov_b32 s12, s8
-; GFX1032-NEXT: s_add_u32 s8, s34, 44
-; GFX1032-NEXT: s_mov_b32 s13, s9
+; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s50, -1
+; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
+; GFX1032-NEXT: s_addc_u32 s49, s49, 0
+; GFX1032-NEXT: s_mov_b32 s12, s8
+; GFX1032-NEXT: s_add_u32 s8, s34, 44
+; GFX1032-NEXT: s_mov_b32 s13, s9
; GFX1032-NEXT: s_addc_u32 s9, s35, 0
; GFX1032-NEXT: s_getpc_b64 s[4:5]
; GFX1032-NEXT: s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
@@ -5148,8 +5148,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1032-NEXT: s_mov_b32 s32, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -5316,19 +5316,19 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
; GFX7LESS-DPP-LABEL: global_atomic_fadd_uni_address_div_value_default_scope_strictfp:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s42, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s43, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s40, s40, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s41, s41, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5]
@@ -5341,11 +5341,11 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[40:41]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[42:43]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[36:39], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0
; GFX7LESS-DPP-NEXT: .LBB8_1: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -5354,7 +5354,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v2
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v1
-; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc
+; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v2
; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
@@ -5366,12 +5366,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
;
; GFX9-DPP-LABEL: global_atomic_fadd_uni_address_div_value_default_scope_strictfp:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s38, -1
-; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11
-; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s50, -1
+; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-DPP-NEXT: s_mov_b32 s12, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44
@@ -5387,9 +5387,9 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -5448,13 +5448,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
;
; GFX1064-DPP-LABEL: global_atomic_fadd_uni_address_div_value_default_scope_strictfp:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s38, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX1064-DPP-NEXT: s_mov_b32 s12, s8
; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1064-DPP-NEXT: s_mov_b32 s13, s9
@@ -5470,8 +5470,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -5525,13 +5525,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
;
; GFX1032-DPP-LABEL: global_atomic_fadd_uni_address_div_value_default_scope_strictfp:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s38, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-DPP-NEXT: s_mov_b32 s12, s8
; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1032-DPP-NEXT: s_mov_b32 s13, s9
@@ -5547,8 +5547,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -5747,14 +5747,14 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX7LESS-LABEL: global_atomic_fadd_double_uni_address_uni_value_agent_scope_unsafe:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
-; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
-; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX7LESS-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-NEXT: s_mov_b32 s67, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s64, s64, s11
+; GFX7LESS-NEXT: s_addc_u32 s65, s65, 0
+; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v3, s0, 0
; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v3, s1, v3
@@ -5763,16 +5763,16 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX7LESS-NEXT: s_cbranch_execz .LBB9_3
; GFX7LESS-NEXT: ; %bb.1:
; GFX7LESS-NEXT: s_mov_b32 s33, s10
-; GFX7LESS-NEXT: s_mov_b32 s42, s9
-; GFX7LESS-NEXT: s_mov_b32 s43, s8
+; GFX7LESS-NEXT: s_mov_b32 s50, s9
+; GFX7LESS-NEXT: s_mov_b32 s51, s8
; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x9
+; GFX7LESS-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9
; GFX7LESS-NEXT: s_bcnt1_i32_b64 s2, s[0:1]
-; GFX7LESS-NEXT: s_mov_b64 s[46:47], 0
+; GFX7LESS-NEXT: s_mov_b64 s[62:63], 0
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v3, 20, v2
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
+; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v4, 10, v1
; GFX7LESS-NEXT: v_cvt_f64_u32_e32 v[1:2], s2
; GFX7LESS-NEXT: v_or_b32_e32 v4, v0, v4
@@ -5785,11 +5785,11 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7LESS-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-NEXT: v_add_f64 v[2:3], v[0:1], v[41:42]
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0
+; GFX7LESS-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:4
+; GFX7LESS-NEXT: buffer_store_dword v0, off, s[64:67], 0
; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:12
-; GFX7LESS-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:8
+; GFX7LESS-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:12
+; GFX7LESS-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:8
; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0
; GFX7LESS-NEXT: s_getpc_b64 s[0:1]
; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
@@ -5802,64 +5802,64 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX7LESS-NEXT: v_mov_b32_e32 v5, 8
; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-NEXT: s_mov_b32 s12, s43
-; GFX7LESS-NEXT: s_mov_b32 s13, s42
+; GFX7LESS-NEXT: s_mov_b32 s12, s51
+; GFX7LESS-NEXT: s_mov_b32 s13, s50
; GFX7LESS-NEXT: s_mov_b32 s14, s33
; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX7LESS-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-NEXT: v_mov_b32_e32 v2, s44
-; GFX7LESS-NEXT: v_mov_b32_e32 v3, s45
+; GFX7LESS-NEXT: v_mov_b32_e32 v2, s52
+; GFX7LESS-NEXT: v_mov_b32_e32 v3, s53
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-NEXT: buffer_load_dword v0, off, s[48:51], 0
-; GFX7LESS-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4
+; GFX7LESS-NEXT: buffer_load_dword v0, off, s[64:67], 0
+; GFX7LESS-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX7LESS-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX7LESS-NEXT: s_cbranch_execnz .LBB9_2
; GFX7LESS-NEXT: .LBB9_3:
; GFX7LESS-NEXT: s_endpgm
;
; GFX9-LABEL: global_atomic_fadd_double_uni_address_uni_value_agent_scope_unsafe:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX9-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s66, -1
+; GFX9-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX9-NEXT: s_mov_b64 s[0:1], exec
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_mov_b32 s67, 0xe00000
; GFX9-NEXT: v_mbcnt_lo_u32_b32 v3, s0, 0
-; GFX9-NEXT: s_add_u32 s48, s48, s11
+; GFX9-NEXT: s_add_u32 s64, s64, s11
; GFX9-NEXT: v_mbcnt_hi_u32_b32 v3, s1, v3
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
-; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX9-NEXT: s_addc_u32 s65, s65, 0
+; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
; GFX9-NEXT: s_movk_i32 s32, 0x800
; GFX9-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX9-NEXT: s_cbranch_execz .LBB9_3
; GFX9-NEXT: ; %bb.1:
; GFX9-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
-; GFX9-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24
+; GFX9-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24
; GFX9-NEXT: v_cvt_f64_u32_e32 v[3:4], s0
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b32 s33, s10
; GFX9-NEXT: v_mul_f64 v[41:42], v[3:4], 4.0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
-; GFX9-NEXT: s_mov_b32 s42, s9
-; GFX9-NEXT: s_mov_b32 s43, s8
+; GFX9-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
+; GFX9-NEXT: s_mov_b32 s50, s9
+; GFX9-NEXT: s_mov_b32 s51, s8
; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX9-NEXT: s_mov_b64 s[36:37], s[4:5]
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v4, s1
-; GFX9-NEXT: s_mov_b64 s[46:47], 0
+; GFX9-NEXT: s_mov_b64 s[62:63], 0
; GFX9-NEXT: v_mov_b32_e32 v3, s0
; GFX9-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX9-NEXT: .LBB9_2: ; %atomicrmw.start
@@ -5872,68 +5872,68 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX9-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
; GFX9-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:4
-; GFX9-NEXT: buffer_store_dword v3, off, s[48:51], 0
-; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX9-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
-; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX9-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:4
+; GFX9-NEXT: buffer_store_dword v3, off, s[64:67], 0
+; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX9-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12
+; GFX9-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8
+; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-NEXT: s_mov_b32 s12, s43
-; GFX9-NEXT: s_mov_b32 s13, s42
+; GFX9-NEXT: s_mov_b32 s12, s51
+; GFX9-NEXT: s_mov_b32 s13, s50
; GFX9-NEXT: s_mov_b32 s14, s33
; GFX9-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX9-NEXT: v_mov_b32_e32 v0, 8
; GFX9-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-NEXT: v_mov_b32_e32 v2, s44
-; GFX9-NEXT: v_mov_b32_e32 v3, s45
+; GFX9-NEXT: v_mov_b32_e32 v2, s52
+; GFX9-NEXT: v_mov_b32_e32 v3, s53
; GFX9-NEXT: v_mov_b32_e32 v4, 0
; GFX9-NEXT: v_mov_b32_e32 v5, 8
; GFX9-NEXT: v_mov_b32_e32 v6, 0
; GFX9-NEXT: v_mov_b32_e32 v7, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX9-NEXT: buffer_load_dword v3, off, s[48:51], 0
-; GFX9-NEXT: buffer_load_dword v4, off, s[48:51], 0 offset:4
+; GFX9-NEXT: buffer_load_dword v3, off, s[64:67], 0
+; GFX9-NEXT: buffer_load_dword v4, off, s[64:67], 0 offset:4
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX9-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX9-NEXT: s_cbranch_execnz .LBB9_2
; GFX9-NEXT: .LBB9_3:
; GFX9-NEXT: s_endpgm
;
; GFX1064-LABEL: global_atomic_fadd_double_uni_address_uni_value_agent_scope_unsafe:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s50, -1
-; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s66, -1
+; GFX1064-NEXT: s_mov_b32 s67, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s64, s64, s11
; GFX1064-NEXT: s_mov_b32 s33, s10
; GFX1064-NEXT: s_mov_b64 s[10:11], exec
-; GFX1064-NEXT: s_addc_u32 s49, s49, 0
+; GFX1064-NEXT: s_addc_u32 s65, s65, 0
; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v3, s10, 0
-; GFX1064-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1064-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1064-NEXT: s_movk_i32 s32, 0x800
; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v3, s11, v3
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
; GFX1064-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064-NEXT: s_cbranch_execz .LBB9_3
; GFX1064-NEXT: ; %bb.1:
-; GFX1064-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24
+; GFX1064-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24
; GFX1064-NEXT: s_bcnt1_i32_b64 s0, s[10:11]
-; GFX1064-NEXT: s_mov_b32 s42, s9
+; GFX1064-NEXT: s_mov_b32 s50, s9
; GFX1064-NEXT: v_cvt_f64_u32_e32 v[3:4], s0
-; GFX1064-NEXT: s_mov_b32 s43, s8
+; GFX1064-NEXT: s_mov_b32 s51, s8
; GFX1064-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1064-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX1064-NEXT: s_mov_b64 s[46:47], 0
+; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1064-NEXT: s_mov_b64 s[62:63], 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
+; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1064-NEXT: v_mul_f64 v[41:42], v[3:4], 4.0
; GFX1064-NEXT: v_lshlrev_b32_e32 v3, 20, v2
; GFX1064-NEXT: v_lshlrev_b32_e32 v4, 10, v1
@@ -5950,69 +5950,69 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX1064-NEXT: s_getpc_b64 s[0:1]
; GFX1064-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1064-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-NEXT: buffer_store_dword v1, off, s[48:51], 0
+; GFX1064-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX1064-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX1064-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1064-NEXT: v_mov_b32_e32 v31, v40
; GFX1064-NEXT: v_mov_b32_e32 v0, 8
; GFX1064-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-NEXT: v_mov_b32_e32 v2, s44
+; GFX1064-NEXT: v_mov_b32_e32 v2, s52
; GFX1064-NEXT: v_mov_b32_e32 v5, 8
; GFX1064-NEXT: v_mov_b32_e32 v6, 0
; GFX1064-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1064-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1064-NEXT: s_mov_b32 s12, s43
-; GFX1064-NEXT: s_mov_b32 s13, s42
+; GFX1064-NEXT: s_mov_b32 s12, s51
+; GFX1064-NEXT: s_mov_b32 s13, s50
; GFX1064-NEXT: s_mov_b32 s14, s33
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-NEXT: v_mov_b32_e32 v3, s45
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX1064-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX1064-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX1064-NEXT: v_mov_b32_e32 v3, s53
; GFX1064-NEXT: v_mov_b32_e32 v4, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-NEXT: s_clause 0x1
-; GFX1064-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX1064-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX1064-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1064-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX1064-NEXT: s_cbranch_execnz .LBB9_2
; GFX1064-NEXT: .LBB9_3:
; GFX1064-NEXT: s_endpgm
;
; GFX1032-LABEL: global_atomic_fadd_double_uni_address_uni_value_agent_scope_unsafe:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s42, s9
+; GFX1032-NEXT: s_mov_b32 s50, s9
; GFX1032-NEXT: s_mov_b32 s9, exec_lo
-; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, s9, 0
-; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s50, -1
-; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s66, -1
+; GFX1032-NEXT: s_mov_b32 s67, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s64, s64, s11
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
-; GFX1032-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-NEXT: s_mov_b64 s[40:41], s[0:1]
-; GFX1032-NEXT: s_mov_b32 s46, 0
+; GFX1032-NEXT: s_addc_u32 s65, s65, 0
+; GFX1032-NEXT: s_mov_b64 s[48:49], s[0:1]
+; GFX1032-NEXT: s_mov_b32 s62, 0
; GFX1032-NEXT: s_movk_i32 s32, 0x400
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-NEXT: s_cbranch_execz .LBB9_3
; GFX1032-NEXT: ; %bb.1:
-; GFX1032-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24
+; GFX1032-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24
; GFX1032-NEXT: s_bcnt1_i32_b32 s0, s9
; GFX1032-NEXT: s_mov_b32 s33, s10
; GFX1032-NEXT: v_cvt_f64_u32_e32 v[3:4], s0
-; GFX1032-NEXT: s_mov_b32 s43, s8
+; GFX1032-NEXT: s_mov_b32 s51, s8
; GFX1032-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1032-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
+; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1032-NEXT: v_mul_f64 v[41:42], v[3:4], 4.0
; GFX1032-NEXT: v_lshlrev_b32_e32 v3, 20, v2
; GFX1032-NEXT: v_lshlrev_b32_e32 v4, 10, v1
@@ -6029,37 +6029,37 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX1032-NEXT: s_getpc_b64 s[0:1]
; GFX1032-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1032-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-NEXT: buffer_store_dword v1, off, s[48:51], 0
+; GFX1032-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX1032-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX1032-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1032-NEXT: v_mov_b32_e32 v31, v40
; GFX1032-NEXT: v_mov_b32_e32 v0, 8
; GFX1032-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-NEXT: v_mov_b32_e32 v2, s44
+; GFX1032-NEXT: v_mov_b32_e32 v2, s52
; GFX1032-NEXT: v_mov_b32_e32 v5, 8
; GFX1032-NEXT: v_mov_b32_e32 v6, 0
; GFX1032-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1032-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1032-NEXT: s_mov_b32 s12, s43
-; GFX1032-NEXT: s_mov_b32 s13, s42
+; GFX1032-NEXT: s_mov_b32 s12, s51
+; GFX1032-NEXT: s_mov_b32 s13, s50
; GFX1032-NEXT: s_mov_b32 s14, s33
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1032-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1032-NEXT: v_mov_b32_e32 v3, s45
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX1032-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX1032-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX1032-NEXT: v_mov_b32_e32 v3, s53
; GFX1032-NEXT: v_mov_b32_e32 v4, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-NEXT: s_clause 0x1
-; GFX1032-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1032-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX1032-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX1032-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s46
+; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
; GFX1032-NEXT: s_cbranch_execnz .LBB9_2
; GFX1032-NEXT: .LBB9_3:
; GFX1032-NEXT: s_endpgm
@@ -6070,7 +6070,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX1164-NEXT: s_mov_b64 s[10:11], exec
; GFX1164-NEXT: v_mov_b32_e32 v40, v0
; GFX1164-NEXT: v_mbcnt_lo_u32_b32 v0, s10, 0
-; GFX1164-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1164-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1164-NEXT: s_mov_b32 s32, 32
; GFX1164-NEXT: s_mov_b64 s[0:1], exec
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
@@ -6079,16 +6079,16 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX1164-NEXT: s_cbranch_execz .LBB9_3
; GFX1164-NEXT: ; %bb.1:
; GFX1164-NEXT: s_bcnt1_i32_b64 s0, s[10:11]
-; GFX1164-NEXT: s_load_b64 s[44:45], s[4:5], 0x24
+; GFX1164-NEXT: s_load_b64 s[52:53], s[4:5], 0x24
; GFX1164-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
-; GFX1164-NEXT: s_mov_b32 s42, s9
-; GFX1164-NEXT: s_mov_b32 s43, s8
+; GFX1164-NEXT: s_mov_b32 s50, s9
+; GFX1164-NEXT: s_mov_b32 s51, s8
; GFX1164-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1164-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX1164-NEXT: s_mov_b64 s[46:47], 0
+; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1164-NEXT: s_mov_b64 s[62:63], 0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: s_load_b64 s[0:1], s[44:45], 0x0
+; GFX1164-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164-NEXT: v_mul_f64 v[41:42], v[0:1], 4.0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
@@ -6111,18 +6111,18 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX1164-NEXT: v_mov_b32_e32 v31, v40
; GFX1164-NEXT: v_mov_b32_e32 v0, 8
; GFX1164-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-NEXT: v_mov_b32_e32 v2, s44
+; GFX1164-NEXT: v_mov_b32_e32 v2, s52
; GFX1164-NEXT: v_mov_b32_e32 v5, 8
; GFX1164-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1164-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1164-NEXT: s_mov_b32 s12, s43
-; GFX1164-NEXT: s_mov_b32 s13, s42
+; GFX1164-NEXT: s_mov_b32 s12, s51
+; GFX1164-NEXT: s_mov_b32 s13, s50
; GFX1164-NEXT: s_mov_b32 s14, s33
; GFX1164-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1164-NEXT: v_mov_b32_e32 v3, s45
+; GFX1164-NEXT: v_mov_b32_e32 v3, s53
; GFX1164-NEXT: v_mov_b32_e32 v4, 0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1]
@@ -6130,8 +6130,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[46:47]
+; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63]
; GFX1164-NEXT: s_cbranch_execnz .LBB9_2
; GFX1164-NEXT: .LBB9_3:
; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2
@@ -6142,24 +6142,24 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX1132-NEXT: s_mov_b32 s8, exec_lo
; GFX1132-NEXT: v_mov_b32_e32 v40, v0
; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, s8, 0
-; GFX1132-NEXT: s_mov_b64 s[40:41], s[0:1]
-; GFX1132-NEXT: s_mov_b32 s46, 0
+; GFX1132-NEXT: s_mov_b64 s[48:49], s[0:1]
+; GFX1132-NEXT: s_mov_b32 s62, 0
; GFX1132-NEXT: s_mov_b32 s32, 32
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-NEXT: s_cbranch_execz .LBB9_3
; GFX1132-NEXT: ; %bb.1:
; GFX1132-NEXT: s_bcnt1_i32_b32 s0, s8
-; GFX1132-NEXT: s_load_b64 s[44:45], s[4:5], 0x24
+; GFX1132-NEXT: s_load_b64 s[52:53], s[4:5], 0x24
; GFX1132-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
; GFX1132-NEXT: s_mov_b32 s33, s15
-; GFX1132-NEXT: s_mov_b32 s42, s14
-; GFX1132-NEXT: s_mov_b32 s43, s13
+; GFX1132-NEXT: s_mov_b32 s50, s14
+; GFX1132-NEXT: s_mov_b32 s51, s13
; GFX1132-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1132-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_load_b64 s[0:1], s[44:45], 0x0
+; GFX1132-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-NEXT: v_mul_f64 v[41:42], v[0:1], 4.0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
@@ -6179,25 +6179,25 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX1132-NEXT: scratch_store_b64 off, v[1:2], off
; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1132-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8
-; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44
+; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s52
; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
; GFX1132-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1132-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1132-NEXT: s_mov_b32 s12, s43
-; GFX1132-NEXT: s_mov_b32 s13, s42
+; GFX1132-NEXT: s_mov_b32 s12, s51
+; GFX1132-NEXT: s_mov_b32 s13, s50
; GFX1132-NEXT: s_mov_b32 s14, s33
; GFX1132-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1132-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0
+; GFX1132-NEXT: v_dual_mov_b32 v3, s53 :: v_dual_mov_b32 v4, 0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-NEXT: scratch_load_b64 v[1:2], off, off
; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46
+; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
; GFX1132-NEXT: s_cbranch_execnz .LBB9_2
; GFX1132-NEXT: .LBB9_3:
; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2
@@ -6206,14 +6206,14 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX7LESS-DPP-LABEL: global_atomic_fadd_double_uni_address_uni_value_agent_scope_unsafe:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], exec
; GFX7LESS-DPP-NEXT: v_mbcnt_lo_u32_b32_e64 v3, s0, 0
; GFX7LESS-DPP-NEXT: v_mbcnt_hi_u32_b32_e32 v3, s1, v3
@@ -6222,16 +6222,16 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX7LESS-DPP-NEXT: s_cbranch_execz .LBB9_3
; GFX7LESS-DPP-NEXT: ; %bb.1:
; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s10
-; GFX7LESS-DPP-NEXT: s_mov_b32 s42, s9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s43, s8
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, s9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9
; GFX7LESS-DPP-NEXT: s_bcnt1_i32_b64 s2, s[0:1]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], 0
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[62:63], 0
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1
; GFX7LESS-DPP-NEXT: v_cvt_f64_u32_e32 v[1:2], s2
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v4, v0, v4
@@ -6244,11 +6244,11 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_add_f64 v[2:3], v[0:1], v[41:42]
-; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0
+; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:4
+; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[64:67], 0
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:8
+; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:12
+; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:8
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
@@ -6261,64 +6261,64 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s43
-; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s42
+; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51
+; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s44
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s52
+; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-DPP-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[48:51], 0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4
+; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[64:67], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX7LESS-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB9_2
; GFX7LESS-DPP-NEXT: .LBB9_3:
; GFX7LESS-DPP-NEXT: s_endpgm
;
; GFX9-DPP-LABEL: global_atomic_fadd_double_uni_address_uni_value_agent_scope_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s50, -1
-; GFX9-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX9-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s66, -1
+; GFX9-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX9-DPP-NEXT: s_mov_b64 s[0:1], exec
-; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-DPP-NEXT: s_mov_b32 s67, 0xe00000
; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, s0, 0
-; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX9-DPP-NEXT: s_add_u32 s64, s64, s11
; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v3, s1, v3
-; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX9-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800
; GFX9-DPP-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX9-DPP-NEXT: s_cbranch_execz .LBB9_3
; GFX9-DPP-NEXT: ; %bb.1:
; GFX9-DPP-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
-; GFX9-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24
+; GFX9-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24
; GFX9-DPP-NEXT: v_cvt_f64_u32_e32 v[3:4], s0
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b32 s33, s10
; GFX9-DPP-NEXT: v_mul_f64 v[41:42], v[3:4], 4.0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
-; GFX9-DPP-NEXT: s_mov_b32 s42, s9
-; GFX9-DPP-NEXT: s_mov_b32 s43, s8
+; GFX9-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
+; GFX9-DPP-NEXT: s_mov_b32 s50, s9
+; GFX9-DPP-NEXT: s_mov_b32 s51, s8
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: v_mov_b32_e32 v4, s1
-; GFX9-DPP-NEXT: s_mov_b64 s[46:47], 0
+; GFX9-DPP-NEXT: s_mov_b64 s[62:63], 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s0
; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX9-DPP-NEXT: .LBB9_2: ; %atomicrmw.start
@@ -6331,68 +6331,68 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:4
-; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0
-; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX9-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
-; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:4
+; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0
+; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12
+; GFX9-DPP-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8
+; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-DPP-NEXT: s_mov_b32 s12, s43
-; GFX9-DPP-NEXT: s_mov_b32 s13, s42
+; GFX9-DPP-NEXT: s_mov_b32 s12, s51
+; GFX9-DPP-NEXT: s_mov_b32 s13, s50
; GFX9-DPP-NEXT: s_mov_b32 s14, s33
; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s44
-; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s52
+; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX9-DPP-NEXT: buffer_load_dword v3, off, s[48:51], 0
-; GFX9-DPP-NEXT: buffer_load_dword v4, off, s[48:51], 0 offset:4
+; GFX9-DPP-NEXT: buffer_load_dword v3, off, s[64:67], 0
+; GFX9-DPP-NEXT: buffer_load_dword v4, off, s[64:67], 0 offset:4
; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX9-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX9-DPP-NEXT: s_cbranch_execnz .LBB9_2
; GFX9-DPP-NEXT: .LBB9_3:
; GFX9-DPP-NEXT: s_endpgm
;
; GFX1064-DPP-LABEL: global_atomic_fadd_double_uni_address_uni_value_agent_scope_unsafe:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s66, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s67, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s64, s64, s11
; GFX1064-DPP-NEXT: s_mov_b32 s33, s10
; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], exec
-; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX1064-DPP-NEXT: s_addc_u32 s65, s65, 0
; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, s10, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1064-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1064-DPP-NEXT: s_movk_i32 s32, 0x800
; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v3, s11, v3
; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064-DPP-NEXT: s_cbranch_execz .LBB9_3
; GFX1064-DPP-NEXT: ; %bb.1:
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24
+; GFX1064-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24
; GFX1064-DPP-NEXT: s_bcnt1_i32_b64 s0, s[10:11]
-; GFX1064-DPP-NEXT: s_mov_b32 s42, s9
+; GFX1064-DPP-NEXT: s_mov_b32 s50, s9
; GFX1064-DPP-NEXT: v_cvt_f64_u32_e32 v[3:4], s0
-; GFX1064-DPP-NEXT: s_mov_b32 s43, s8
+; GFX1064-DPP-NEXT: s_mov_b32 s51, s8
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], 0
+; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
+; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1064-DPP-NEXT: v_mul_f64 v[41:42], v[3:4], 4.0
; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2
; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1
@@ -6409,69 +6409,69 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX1064-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
+; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX1064-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40
; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s44
+; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s52
; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX1064-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1064-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1064-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1064-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1064-DPP-NEXT: s_mov_b32 s13, s50
; GFX1064-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX1064-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-DPP-NEXT: s_clause 0x1
-; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB9_2
; GFX1064-DPP-NEXT: .LBB9_3:
; GFX1064-DPP-NEXT: s_endpgm
;
; GFX1032-DPP-LABEL: global_atomic_fadd_double_uni_address_uni_value_agent_scope_unsafe:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s42, s9
+; GFX1032-DPP-NEXT: s_mov_b32 s50, s9
; GFX1032-DPP-NEXT: s_mov_b32 s9, exec_lo
-; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, s9, 0
-; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s66, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s67, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s64, s64, s11
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
-; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
-; GFX1032-DPP-NEXT: s_mov_b32 s46, 0
+; GFX1032-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX1032-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
+; GFX1032-DPP-NEXT: s_mov_b32 s62, 0
; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB9_3
; GFX1032-DPP-NEXT: ; %bb.1:
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24
+; GFX1032-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24
; GFX1032-DPP-NEXT: s_bcnt1_i32_b32 s0, s9
; GFX1032-DPP-NEXT: s_mov_b32 s33, s10
; GFX1032-DPP-NEXT: v_cvt_f64_u32_e32 v[3:4], s0
-; GFX1032-DPP-NEXT: s_mov_b32 s43, s8
+; GFX1032-DPP-NEXT: s_mov_b32 s51, s8
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
+; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1032-DPP-NEXT: v_mul_f64 v[41:42], v[3:4], 4.0
; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2
; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1
@@ -6488,37 +6488,37 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX1032-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
+; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX1032-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s44
+; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s52
; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX1032-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1032-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1032-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1032-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1032-DPP-NEXT: s_mov_b32 s13, s50
; GFX1032-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-DPP-NEXT: s_clause 0x1
-; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-DPP-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s46
+; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB9_2
; GFX1032-DPP-NEXT: .LBB9_3:
; GFX1032-DPP-NEXT: s_endpgm
@@ -6529,7 +6529,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], exec
; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0
; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s10, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1164-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1164-DPP-NEXT: s_mov_b32 s32, 32
; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec
; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
@@ -6538,16 +6538,16 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX1164-DPP-NEXT: s_cbranch_execz .LBB9_3
; GFX1164-DPP-NEXT: ; %bb.1:
; GFX1164-DPP-NEXT: s_bcnt1_i32_b64 s0, s[10:11]
-; GFX1164-DPP-NEXT: s_load_b64 s[44:45], s[4:5], 0x24
+; GFX1164-DPP-NEXT: s_load_b64 s[52:53], s[4:5], 0x24
; GFX1164-DPP-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
-; GFX1164-DPP-NEXT: s_mov_b32 s42, s9
-; GFX1164-DPP-NEXT: s_mov_b32 s43, s8
+; GFX1164-DPP-NEXT: s_mov_b32 s50, s9
+; GFX1164-DPP-NEXT: s_mov_b32 s51, s8
; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], 0
+; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[44:45], 0x0
+; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164-DPP-NEXT: v_mul_f64 v[41:42], v[0:1], 4.0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
@@ -6570,18 +6570,18 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX1164-DPP-NEXT: v_mov_b32_e32 v31, v40
; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s44
+; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s52
; GFX1164-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1164-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1164-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1164-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1164-DPP-NEXT: s_mov_b32 s13, s50
; GFX1164-DPP-NEXT: s_mov_b32 s14, s33
; GFX1164-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
@@ -6589,8 +6589,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[46:47]
+; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63]
; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB9_2
; GFX1164-DPP-NEXT: .LBB9_3:
; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2
@@ -6601,24 +6601,24 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX1132-DPP-NEXT: s_mov_b32 s8, exec_lo
; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0
; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s8, 0
-; GFX1132-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
-; GFX1132-DPP-NEXT: s_mov_b32 s46, 0
+; GFX1132-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
+; GFX1132-DPP-NEXT: s_mov_b32 s62, 0
; GFX1132-DPP-NEXT: s_mov_b32 s32, 32
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-DPP-NEXT: s_cbranch_execz .LBB9_3
; GFX1132-DPP-NEXT: ; %bb.1:
; GFX1132-DPP-NEXT: s_bcnt1_i32_b32 s0, s8
-; GFX1132-DPP-NEXT: s_load_b64 s[44:45], s[4:5], 0x24
+; GFX1132-DPP-NEXT: s_load_b64 s[52:53], s[4:5], 0x24
; GFX1132-DPP-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
; GFX1132-DPP-NEXT: s_mov_b32 s33, s15
-; GFX1132-DPP-NEXT: s_mov_b32 s42, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s43, s13
+; GFX1132-DPP-NEXT: s_mov_b32 s50, s14
+; GFX1132-DPP-NEXT: s_mov_b32 s51, s13
; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[44:45], 0x0
+; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-DPP-NEXT: v_mul_f64 v[41:42], v[0:1], 4.0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
@@ -6638,25 +6638,25 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX1132-DPP-NEXT: scratch_store_b64 off, v[1:2], off
; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1132-DPP-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44
+; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s52
; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1132-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1132-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1132-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1132-DPP-NEXT: s_mov_b32 s13, s50
; GFX1132-DPP-NEXT: s_mov_b32 s14, s33
; GFX1132-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0
+; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s53 :: v_dual_mov_b32 v4, 0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-DPP-NEXT: scratch_load_b64 v[1:2], off, off
; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-DPP-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46
+; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB9_2
; GFX1132-DPP-NEXT: .LBB9_3:
; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2
@@ -6669,19 +6669,19 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX7LESS-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_align4_unsafe:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
-; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
+; GFX7LESS-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s82, -1
+; GFX7LESS-NEXT: s_mov_b32 s83, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s80, s80, s11
+; GFX7LESS-NEXT: s_addc_u32 s81, s81, 0
; GFX7LESS-NEXT: s_mov_b32 s33, s10
-; GFX7LESS-NEXT: s_mov_b32 s42, s9
-; GFX7LESS-NEXT: s_mov_b32 s43, s8
+; GFX7LESS-NEXT: s_mov_b32 s50, s9
+; GFX7LESS-NEXT: s_mov_b32 s51, s8
; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0
; GFX7LESS-NEXT: s_getpc_b64 s[0:1]
@@ -6692,15 +6692,15 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-NEXT: v_or_b32_e32 v40, v0, v2
-; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-NEXT: s_mov_b32 s12, s43
-; GFX7LESS-NEXT: s_mov_b32 s13, s42
+; GFX7LESS-NEXT: s_mov_b32 s12, s51
+; GFX7LESS-NEXT: s_mov_b32 s13, s50
; GFX7LESS-NEXT: s_mov_b32 s14, s33
; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -6725,21 +6725,21 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX7LESS-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX7LESS-NEXT: s_cbranch_execz .LBB10_5
; GFX7LESS-NEXT: ; %bb.3:
-; GFX7LESS-NEXT: s_load_dwordx2 s[44:45], s[36:37], 0x9
-; GFX7LESS-NEXT: s_mov_b32 s47, 0xf000
-; GFX7LESS-NEXT: s_mov_b32 s46, -1
+; GFX7LESS-NEXT: s_load_dwordx2 s[64:65], s[36:37], 0x9
+; GFX7LESS-NEXT: s_mov_b32 s67, 0xf000
+; GFX7LESS-NEXT: s_mov_b32 s66, -1
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[44:47], 0
-; GFX7LESS-NEXT: s_mov_b64 s[46:47], 0
+; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[64:67], 0
+; GFX7LESS-NEXT: s_mov_b64 s[52:53], 0
; GFX7LESS-NEXT: .LBB10_4: ; %atomicrmw.start
; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7LESS-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-NEXT: v_add_f64 v[2:3], v[0:1], v[41:42]
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0
+; GFX7LESS-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:4
+; GFX7LESS-NEXT: buffer_store_dword v0, off, s[80:83], 0
; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:12
-; GFX7LESS-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:8
+; GFX7LESS-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:12
+; GFX7LESS-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:8
; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0
; GFX7LESS-NEXT: s_getpc_b64 s[0:1]
; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
@@ -6752,44 +6752,44 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX7LESS-NEXT: v_mov_b32_e32 v5, 8
; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-NEXT: s_mov_b32 s12, s43
-; GFX7LESS-NEXT: s_mov_b32 s13, s42
+; GFX7LESS-NEXT: s_mov_b32 s12, s51
+; GFX7LESS-NEXT: s_mov_b32 s13, s50
; GFX7LESS-NEXT: s_mov_b32 s14, s33
; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83]
; GFX7LESS-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-NEXT: v_mov_b32_e32 v2, s44
-; GFX7LESS-NEXT: v_mov_b32_e32 v3, s45
+; GFX7LESS-NEXT: v_mov_b32_e32 v2, s64
+; GFX7LESS-NEXT: v_mov_b32_e32 v3, s65
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-NEXT: buffer_load_dword v0, off, s[48:51], 0
-; GFX7LESS-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4
+; GFX7LESS-NEXT: buffer_load_dword v0, off, s[80:83], 0
+; GFX7LESS-NEXT: buffer_load_dword v1, off, s[80:83], 0 offset:4
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX7LESS-NEXT: s_or_b64 s[52:53], vcc, s[52:53]
+; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[52:53]
; GFX7LESS-NEXT: s_cbranch_execnz .LBB10_4
; GFX7LESS-NEXT: .LBB10_5:
; GFX7LESS-NEXT: s_endpgm
;
; GFX9-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_align4_unsafe:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s11
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s66, -1
+; GFX9-NEXT: s_mov_b32 s67, 0xe00000
+; GFX9-NEXT: s_add_u32 s64, s64, s11
+; GFX9-NEXT: s_addc_u32 s65, s65, 0
; GFX9-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX9-NEXT: s_mov_b32 s43, s8
+; GFX9-NEXT: s_mov_b32 s51, s8
; GFX9-NEXT: s_add_u32 s8, s36, 44
-; GFX9-NEXT: s_mov_b32 s42, s9
+; GFX9-NEXT: s_mov_b32 s50, s9
; GFX9-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX9-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX9-NEXT: s_getpc_b64 s[0:1]
; GFX9-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
@@ -6798,17 +6798,17 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b32 s33, s10
; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7]
-; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX9-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-NEXT: s_mov_b32 s12, s43
-; GFX9-NEXT: s_mov_b32 s13, s42
+; GFX9-NEXT: s_mov_b32 s12, s51
+; GFX9-NEXT: s_mov_b32 s13, s50
; GFX9-NEXT: s_mov_b32 s14, s33
; GFX9-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX9-NEXT: s_movk_i32 s32, 0x800
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -6833,11 +6833,11 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX9-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX9-NEXT: s_cbranch_execz .LBB10_5
; GFX9-NEXT: ; %bb.3:
-; GFX9-NEXT: s_load_dwordx2 s[44:45], s[36:37], 0x24
+; GFX9-NEXT: s_load_dwordx2 s[52:53], s[36:37], 0x24
; GFX9-NEXT: v_mov_b32_e32 v0, 0
-; GFX9-NEXT: s_mov_b64 s[46:47], 0
+; GFX9-NEXT: s_mov_b64 s[62:63], 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45]
+; GFX9-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53]
; GFX9-NEXT: .LBB10_4: ; %atomicrmw.start
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-NEXT: s_waitcnt vmcnt(0)
@@ -6848,53 +6848,53 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX9-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
; GFX9-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX9-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX9-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX9-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX9-NEXT: buffer_store_dword v1, off, s[64:67], 0
+; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX9-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX9-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-NEXT: s_mov_b32 s12, s43
-; GFX9-NEXT: s_mov_b32 s13, s42
+; GFX9-NEXT: s_mov_b32 s12, s51
+; GFX9-NEXT: s_mov_b32 s13, s50
; GFX9-NEXT: s_mov_b32 s14, s33
; GFX9-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX9-NEXT: v_mov_b32_e32 v0, 8
; GFX9-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-NEXT: v_mov_b32_e32 v2, s44
-; GFX9-NEXT: v_mov_b32_e32 v3, s45
+; GFX9-NEXT: v_mov_b32_e32 v2, s52
+; GFX9-NEXT: v_mov_b32_e32 v3, s53
; GFX9-NEXT: v_mov_b32_e32 v4, 0
; GFX9-NEXT: v_mov_b32_e32 v5, 8
; GFX9-NEXT: v_mov_b32_e32 v6, 0
; GFX9-NEXT: v_mov_b32_e32 v7, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX9-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX9-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX9-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX9-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX9-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX9-NEXT: s_cbranch_execnz .LBB10_4
; GFX9-NEXT: .LBB10_5:
; GFX9-NEXT: s_endpgm
;
; GFX1064-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_align4_unsafe:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s50, -1
-; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s66, -1
+; GFX1064-NEXT: s_mov_b32 s67, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s64, s64, s11
; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-NEXT: s_mov_b32 s43, s8
+; GFX1064-NEXT: s_addc_u32 s65, s65, 0
+; GFX1064-NEXT: s_mov_b32 s51, s8
; GFX1064-NEXT: s_add_u32 s8, s34, 44
-; GFX1064-NEXT: s_mov_b32 s42, s9
+; GFX1064-NEXT: s_mov_b32 s50, s9
; GFX1064-NEXT: s_addc_u32 s9, s35, 0
-; GFX1064-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1064-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1064-NEXT: s_getpc_b64 s[0:1]
; GFX1064-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
; GFX1064-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
@@ -6903,17 +6903,17 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1064-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1064-NEXT: s_mov_b32 s33, s10
; GFX1064-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-NEXT: s_mov_b32 s12, s43
+; GFX1064-NEXT: s_mov_b32 s12, s51
; GFX1064-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-NEXT: s_mov_b32 s13, s42
+; GFX1064-NEXT: s_mov_b32 s13, s50
; GFX1064-NEXT: s_mov_b32 s14, s33
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1064-NEXT: s_movk_i32 s32, 0x800
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -6938,11 +6938,11 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1064-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX1064-NEXT: s_cbranch_execz .LBB10_5
; GFX1064-NEXT: ; %bb.3:
-; GFX1064-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24
+; GFX1064-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24
; GFX1064-NEXT: v_mov_b32_e32 v0, 0
-; GFX1064-NEXT: s_mov_b64 s[46:47], 0
+; GFX1064-NEXT: s_mov_b64 s[62:63], 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45]
+; GFX1064-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53]
; GFX1064-NEXT: .LBB10_4: ; %atomicrmw.start
; GFX1064-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1064-NEXT: s_waitcnt vmcnt(0)
@@ -6952,55 +6952,55 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1064-NEXT: s_getpc_b64 s[0:1]
; GFX1064-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1064-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-NEXT: buffer_store_dword v1, off, s[48:51], 0
+; GFX1064-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX1064-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX1064-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1064-NEXT: v_mov_b32_e32 v31, v40
; GFX1064-NEXT: v_mov_b32_e32 v0, 8
; GFX1064-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-NEXT: v_mov_b32_e32 v2, s44
+; GFX1064-NEXT: v_mov_b32_e32 v2, s52
; GFX1064-NEXT: v_mov_b32_e32 v5, 8
; GFX1064-NEXT: v_mov_b32_e32 v6, 0
; GFX1064-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-NEXT: s_mov_b32 s12, s43
-; GFX1064-NEXT: s_mov_b32 s13, s42
+; GFX1064-NEXT: s_mov_b32 s12, s51
+; GFX1064-NEXT: s_mov_b32 s13, s50
; GFX1064-NEXT: s_mov_b32 s14, s33
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-NEXT: v_mov_b32_e32 v3, s45
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX1064-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX1064-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX1064-NEXT: v_mov_b32_e32 v3, s53
; GFX1064-NEXT: v_mov_b32_e32 v4, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-NEXT: s_clause 0x1
-; GFX1064-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX1064-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX1064-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1064-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX1064-NEXT: s_cbranch_execnz .LBB10_4
; GFX1064-NEXT: .LBB10_5:
; GFX1064-NEXT: s_endpgm
;
; GFX1032-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_align4_unsafe:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s50, -1
-; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s66, -1
+; GFX1032-NEXT: s_mov_b32 s67, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s64, s64, s11
; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-NEXT: s_mov_b32 s43, s8
+; GFX1032-NEXT: s_addc_u32 s65, s65, 0
+; GFX1032-NEXT: s_mov_b32 s51, s8
; GFX1032-NEXT: s_add_u32 s8, s34, 44
-; GFX1032-NEXT: s_mov_b32 s42, s9
+; GFX1032-NEXT: s_mov_b32 s50, s9
; GFX1032-NEXT: s_addc_u32 s9, s35, 0
-; GFX1032-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1032-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1032-NEXT: s_getpc_b64 s[0:1]
; GFX1032-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
; GFX1032-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
@@ -7009,17 +7009,17 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1032-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1032-NEXT: s_mov_b32 s33, s10
; GFX1032-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-NEXT: s_mov_b32 s12, s43
+; GFX1032-NEXT: s_mov_b32 s12, s51
; GFX1032-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-NEXT: s_mov_b32 s13, s42
+; GFX1032-NEXT: s_mov_b32 s13, s50
; GFX1032-NEXT: s_mov_b32 s14, s33
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1032-NEXT: s_movk_i32 s32, 0x400
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -7038,16 +7038,16 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1032-NEXT: s_cbranch_scc1 .LBB10_1
; GFX1032-NEXT: ; %bb.2: ; %ComputeEnd
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1032-NEXT: s_mov_b32 s46, 0
+; GFX1032-NEXT: s_mov_b32 s62, 0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-NEXT: s_xor_b32 s0, exec_lo, s0
; GFX1032-NEXT: s_cbranch_execz .LBB10_5
; GFX1032-NEXT: ; %bb.3:
-; GFX1032-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24
+; GFX1032-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24
; GFX1032-NEXT: v_mov_b32_e32 v0, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45]
+; GFX1032-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53]
; GFX1032-NEXT: .LBB10_4: ; %atomicrmw.start
; GFX1032-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1032-NEXT: s_waitcnt vmcnt(0)
@@ -7057,37 +7057,37 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1032-NEXT: s_getpc_b64 s[0:1]
; GFX1032-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1032-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-NEXT: buffer_store_dword v1, off, s[48:51], 0
+; GFX1032-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX1032-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX1032-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1032-NEXT: v_mov_b32_e32 v31, v40
; GFX1032-NEXT: v_mov_b32_e32 v0, 8
; GFX1032-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-NEXT: v_mov_b32_e32 v2, s44
+; GFX1032-NEXT: v_mov_b32_e32 v2, s52
; GFX1032-NEXT: v_mov_b32_e32 v5, 8
; GFX1032-NEXT: v_mov_b32_e32 v6, 0
; GFX1032-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-NEXT: s_mov_b32 s12, s43
-; GFX1032-NEXT: s_mov_b32 s13, s42
+; GFX1032-NEXT: s_mov_b32 s12, s51
+; GFX1032-NEXT: s_mov_b32 s13, s50
; GFX1032-NEXT: s_mov_b32 s14, s33
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1032-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1032-NEXT: v_mov_b32_e32 v3, s45
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX1032-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX1032-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX1032-NEXT: v_mov_b32_e32 v3, s53
; GFX1032-NEXT: v_mov_b32_e32 v4, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-NEXT: s_clause 0x1
-; GFX1032-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1032-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX1032-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX1032-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s46
+; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
; GFX1032-NEXT: s_cbranch_execnz .LBB10_4
; GFX1032-NEXT: .LBB10_5:
; GFX1032-NEXT: s_endpgm
@@ -7095,11 +7095,11 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1164-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_align4_unsafe:
; GFX1164: ; %bb.0:
; GFX1164-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1164-NEXT: s_mov_b32 s43, s8
+; GFX1164-NEXT: s_mov_b32 s51, s8
; GFX1164-NEXT: s_add_u32 s8, s34, 44
-; GFX1164-NEXT: s_mov_b32 s42, s9
+; GFX1164-NEXT: s_mov_b32 s50, s9
; GFX1164-NEXT: s_addc_u32 s9, s35, 0
-; GFX1164-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1164-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1164-NEXT: s_getpc_b64 s[0:1]
; GFX1164-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
; GFX1164-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
@@ -7107,15 +7107,15 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1164-NEXT: s_mov_b32 s33, s10
; GFX1164-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1164-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1164-NEXT: s_mov_b32 s12, s43
-; GFX1164-NEXT: s_mov_b32 s13, s42
+; GFX1164-NEXT: s_mov_b32 s12, s51
+; GFX1164-NEXT: s_mov_b32 s13, s50
; GFX1164-NEXT: s_mov_b32 s14, s33
; GFX1164-NEXT: s_mov_b32 s32, 32
; GFX1164-NEXT: v_mov_b32_e32 v40, v0
-; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1164-NEXT: v_mov_b32_e32 v41, 0
@@ -7143,11 +7143,11 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1164-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX1164-NEXT: s_cbranch_execz .LBB10_5
; GFX1164-NEXT: ; %bb.3:
-; GFX1164-NEXT: s_load_b64 s[44:45], s[34:35], 0x24
+; GFX1164-NEXT: s_load_b64 s[52:53], s[34:35], 0x24
; GFX1164-NEXT: v_mov_b32_e32 v0, 0
-; GFX1164-NEXT: s_mov_b64 s[46:47], 0
+; GFX1164-NEXT: s_mov_b64 s[62:63], 0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: global_load_b64 v[1:2], v0, s[44:45]
+; GFX1164-NEXT: global_load_b64 v[1:2], v0, s[52:53]
; GFX1164-NEXT: s_set_inst_prefetch_distance 0x1
; GFX1164-NEXT: .p2align 6
; GFX1164-NEXT: .LBB10_4: ; %atomicrmw.start
@@ -7164,18 +7164,18 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1164-NEXT: v_mov_b32_e32 v31, v40
; GFX1164-NEXT: v_mov_b32_e32 v0, 8
; GFX1164-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-NEXT: v_mov_b32_e32 v2, s44
+; GFX1164-NEXT: v_mov_b32_e32 v2, s52
; GFX1164-NEXT: v_mov_b32_e32 v5, 8
; GFX1164-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1164-NEXT: s_mov_b32 s12, s43
-; GFX1164-NEXT: s_mov_b32 s13, s42
+; GFX1164-NEXT: s_mov_b32 s12, s51
+; GFX1164-NEXT: s_mov_b32 s13, s50
; GFX1164-NEXT: s_mov_b32 s14, s33
; GFX1164-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1164-NEXT: v_mov_b32_e32 v3, s45
+; GFX1164-NEXT: v_mov_b32_e32 v3, s53
; GFX1164-NEXT: v_mov_b32_e32 v4, 0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1]
@@ -7183,8 +7183,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[46:47]
+; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63]
; GFX1164-NEXT: s_cbranch_execnz .LBB10_4
; GFX1164-NEXT: .LBB10_5:
; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2
@@ -7193,7 +7193,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1132-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_align4_unsafe:
; GFX1132: ; %bb.0:
; GFX1132-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1132-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1132-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1132-NEXT: s_add_u32 s8, s34, 44
; GFX1132-NEXT: s_addc_u32 s9, s35, 0
; GFX1132-NEXT: s_getpc_b64 s[0:1]
@@ -7202,9 +7202,9 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1132-NEXT: v_mov_b32_e32 v31, v0
; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1132-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1132-NEXT: s_mov_b32 s42, s14
-; GFX1132-NEXT: s_mov_b32 s43, s13
-; GFX1132-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1132-NEXT: s_mov_b32 s50, s14
+; GFX1132-NEXT: s_mov_b32 s51, s13
+; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1132-NEXT: s_mov_b32 s12, s13
@@ -7213,7 +7213,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1132-NEXT: s_mov_b32 s32, 32
; GFX1132-NEXT: s_mov_b32 s33, s15
; GFX1132-NEXT: v_mov_b32_e32 v40, v0
-; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-NEXT: v_mov_b32_e32 v41, 0
@@ -7233,17 +7233,17 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1132-NEXT: s_cbranch_scc1 .LBB10_1
; GFX1132-NEXT: ; %bb.2: ; %ComputeEnd
; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1132-NEXT: s_mov_b32 s46, 0
+; GFX1132-NEXT: s_mov_b32 s62, 0
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-NEXT: s_xor_b32 s0, exec_lo, s0
; GFX1132-NEXT: s_cbranch_execz .LBB10_5
; GFX1132-NEXT: ; %bb.3:
-; GFX1132-NEXT: s_load_b64 s[44:45], s[34:35], 0x24
+; GFX1132-NEXT: s_load_b64 s[52:53], s[34:35], 0x24
; GFX1132-NEXT: v_mov_b32_e32 v0, 0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: global_load_b64 v[1:2], v0, s[44:45]
+; GFX1132-NEXT: global_load_b64 v[1:2], v0, s[52:53]
; GFX1132-NEXT: s_set_inst_prefetch_distance 0x1
; GFX1132-NEXT: .p2align 6
; GFX1132-NEXT: .LBB10_4: ; %atomicrmw.start
@@ -7258,25 +7258,25 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1132-NEXT: scratch_store_b64 off, v[1:2], off
; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1132-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8
-; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44
+; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s52
; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
; GFX1132-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1132-NEXT: s_mov_b32 s12, s43
-; GFX1132-NEXT: s_mov_b32 s13, s42
+; GFX1132-NEXT: s_mov_b32 s12, s51
+; GFX1132-NEXT: s_mov_b32 s13, s50
; GFX1132-NEXT: s_mov_b32 s14, s33
; GFX1132-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1132-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0
+; GFX1132-NEXT: v_dual_mov_b32 v3, s53 :: v_dual_mov_b32 v4, 0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-NEXT: scratch_load_b64 v[1:2], off, off
; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46
+; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
; GFX1132-NEXT: s_cbranch_execnz .LBB10_4
; GFX1132-NEXT: .LBB10_5:
; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2
@@ -7285,22 +7285,22 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX7LESS-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_align4_unsafe:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s82, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s83, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s80, s80, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s81, s81, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s10
-; GFX7LESS-DPP-NEXT: s_mov_b32 s42, s9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s43, s8
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, s9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s47, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s46, -1
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[64:65], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
@@ -7311,30 +7311,30 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v42, v0, v2
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s43
-; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s42
+; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51
+; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v42
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v40, v0
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v41, v1
-; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[0:1], off, s[44:47], 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[0:1], off, s[64:67], 0
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[52:53], 0
; GFX7LESS-DPP-NEXT: .LBB10_1: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_add_f64 v[2:3], v[0:1], v[40:41]
-; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0
+; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:4
+; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[80:83], 0
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:8
+; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:12
+; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:8
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
@@ -7347,44 +7347,44 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s43
-; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s42
+; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51
+; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v42
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s44
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s64
+; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s65
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-DPP-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[48:51], 0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4
+; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[80:83], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[80:83], 0 offset:4
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX7LESS-DPP-NEXT: s_or_b64 s[52:53], vcc, s[52:53]
+; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[52:53]
; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB10_1
; GFX7LESS-DPP-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX7LESS-DPP-NEXT: s_endpgm
;
; GFX9-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_align4_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s54, -1
-; GFX9-DPP-NEXT: s_mov_b32 s55, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s52, s52, s11
-; GFX9-DPP-NEXT: s_addc_u32 s53, s53, 0
+; GFX9-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s82, -1
+; GFX9-DPP-NEXT: s_mov_b32 s83, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s80, s80, s11
+; GFX9-DPP-NEXT: s_addc_u32 s81, s81, 0
; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX9-DPP-NEXT: s_mov_b32 s43, s8
+; GFX9-DPP-NEXT: s_mov_b32 s51, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX9-DPP-NEXT: s_mov_b32 s42, s9
+; GFX9-DPP-NEXT: s_mov_b32 s50, s9
; GFX9-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX9-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX9-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX9-DPP-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
; GFX9-DPP-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
@@ -7393,17 +7393,17 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b32 s33, s10
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
-; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[52:53]
-; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
+; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-DPP-NEXT: s_mov_b32 s12, s43
-; GFX9-DPP-NEXT: s_mov_b32 s13, s42
+; GFX9-DPP-NEXT: s_mov_b32 s12, s51
+; GFX9-DPP-NEXT: s_mov_b32 s13, s50
; GFX9-DPP-NEXT: s_mov_b32 s14, s33
; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[54:55]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -7450,74 +7450,74 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v1, exec_hi, v1
; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX9-DPP-NEXT: v_readlane_b32 s45, v9, 63
-; GFX9-DPP-NEXT: v_readlane_b32 s44, v8, 63
+; GFX9-DPP-NEXT: v_readlane_b32 s53, v9, 63
+; GFX9-DPP-NEXT: v_readlane_b32 s52, v8, 63
; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1]
; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9-DPP-NEXT: s_cbranch_execz .LBB10_3
; GFX9-DPP-NEXT: ; %bb.1:
-; GFX9-DPP-NEXT: s_load_dwordx2 s[46:47], s[36:37], 0x24
-; GFX9-DPP-NEXT: s_mov_b64 s[48:49], 0
+; GFX9-DPP-NEXT: s_load_dwordx2 s[62:63], s[36:37], 0x24
+; GFX9-DPP-NEXT: s_mov_b64 s[64:65], 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[46:47]
+; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[62:63]
; GFX9-DPP-NEXT: .LBB10_2: ; %atomicrmw.start
; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX9-DPP-NEXT: v_add_f64 v[3:4], v[1:2], s[44:45]
+; GFX9-DPP-NEXT: v_add_f64 v[3:4], v[1:2], s[52:53]
; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44
; GFX9-DPP-NEXT: s_addc_u32 s9, s37, 0
; GFX9-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[52:53]
-; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[52:55], 0 offset:4
-; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[52:55], 0
-; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[52:55], 0 offset:12
-; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[52:55], 0 offset:8
-; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
+; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:4
+; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0
+; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[80:83], 0 offset:12
+; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:8
+; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-DPP-NEXT: s_mov_b32 s12, s43
-; GFX9-DPP-NEXT: s_mov_b32 s13, s42
+; GFX9-DPP-NEXT: s_mov_b32 s12, s51
+; GFX9-DPP-NEXT: s_mov_b32 s13, s50
; GFX9-DPP-NEXT: s_mov_b32 s14, s33
; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[54:55]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s46
-; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s47
+; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s62
+; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s63
; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[52:55], 0
-; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[52:55], 0 offset:4
+; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[80:83], 0
+; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[80:83], 0 offset:4
; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-DPP-NEXT: s_or_b64 s[48:49], vcc, s[48:49]
-; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[48:49]
+; GFX9-DPP-NEXT: s_or_b64 s[64:65], vcc, s[64:65]
+; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[64:65]
; GFX9-DPP-NEXT: s_cbranch_execnz .LBB10_2
; GFX9-DPP-NEXT: .LBB10_3:
; GFX9-DPP-NEXT: s_endpgm
;
; GFX1064-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_align4_unsafe:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s66, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s67, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s64, s64, s11
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-DPP-NEXT: s_mov_b32 s43, s8
+; GFX1064-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX1064-DPP-NEXT: s_mov_b32 s51, s8
; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1064-DPP-NEXT: s_mov_b32 s42, s9
+; GFX1064-DPP-NEXT: s_mov_b32 s50, s9
; GFX1064-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1064-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX1064-DPP-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
@@ -7526,17 +7526,17 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1064-DPP-NEXT: s_mov_b32 s33, s10
; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-DPP-NEXT: s_mov_b32 s12, s43
+; GFX1064-DPP-NEXT: s_mov_b32 s12, s51
; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1064-DPP-NEXT: s_mov_b32 s13, s50
; GFX1064-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1064-DPP-NEXT: s_movk_i32 s32, 0x800
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -7581,10 +7581,10 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064-DPP-NEXT: s_cbranch_execz .LBB10_3
; GFX1064-DPP-NEXT: ; %bb.1:
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24
-; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], 0
+; GFX1064-DPP-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24
+; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45]
+; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53]
; GFX1064-DPP-NEXT: .LBB10_2: ; %atomicrmw.start
; GFX1064-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1064-DPP-NEXT: s_waitcnt vmcnt(0)
@@ -7594,55 +7594,55 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX1064-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
+; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX1064-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40
; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s44
+; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s52
; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX1064-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1064-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1064-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1064-DPP-NEXT: s_mov_b32 s13, s50
; GFX1064-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX1064-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-DPP-NEXT: s_clause 0x1
-; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB10_2
; GFX1064-DPP-NEXT: .LBB10_3:
; GFX1064-DPP-NEXT: s_endpgm
;
; GFX1032-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_align4_unsafe:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s66, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s67, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s64, s64, s11
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-DPP-NEXT: s_mov_b32 s43, s8
+; GFX1032-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX1032-DPP-NEXT: s_mov_b32 s51, s8
; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1032-DPP-NEXT: s_mov_b32 s42, s9
+; GFX1032-DPP-NEXT: s_mov_b32 s50, s9
; GFX1032-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1032-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX1032-DPP-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
@@ -7651,17 +7651,17 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1032-DPP-NEXT: s_mov_b32 s33, s10
; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-DPP-NEXT: s_mov_b32 s12, s43
+; GFX1032-DPP-NEXT: s_mov_b32 s12, s51
; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1032-DPP-NEXT: s_mov_b32 s13, s50
; GFX1032-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -7696,14 +7696,14 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1032-DPP-NEXT: v_mov_b32_e32 v41, v8
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v42, v9
-; GFX1032-DPP-NEXT: s_mov_b32 s46, 0
+; GFX1032-DPP-NEXT: s_mov_b32 s62, 0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB10_3
; GFX1032-DPP-NEXT: ; %bb.1:
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24
+; GFX1032-DPP-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45]
+; GFX1032-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53]
; GFX1032-DPP-NEXT: .LBB10_2: ; %atomicrmw.start
; GFX1032-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1032-DPP-NEXT: s_waitcnt vmcnt(0)
@@ -7713,37 +7713,37 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX1032-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
+; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX1032-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s44
+; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s52
; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX1032-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1032-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1032-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1032-DPP-NEXT: s_mov_b32 s13, s50
; GFX1032-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-DPP-NEXT: s_clause 0x1
-; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-DPP-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s46
+; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB10_2
; GFX1032-DPP-NEXT: .LBB10_3:
; GFX1032-DPP-NEXT: s_endpgm
@@ -7751,11 +7751,11 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1164-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_align4_unsafe:
; GFX1164-DPP: ; %bb.0:
; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1164-DPP-NEXT: s_mov_b32 s43, s8
+; GFX1164-DPP-NEXT: s_mov_b32 s51, s8
; GFX1164-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1164-DPP-NEXT: s_mov_b32 s42, s9
+; GFX1164-DPP-NEXT: s_mov_b32 s50, s9
; GFX1164-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1164-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1164-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX1164-DPP-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
; GFX1164-DPP-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
@@ -7763,15 +7763,15 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1164-DPP-NEXT: s_mov_b32 s33, s10
; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1164-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1164-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1164-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1164-DPP-NEXT: s_mov_b32 s13, s50
; GFX1164-DPP-NEXT: s_mov_b32 s14, s33
; GFX1164-DPP-NEXT: s_mov_b32 s32, 32
; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0
-; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -7825,10 +7825,10 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1
; GFX1164-DPP-NEXT: s_cbranch_execz .LBB10_3
; GFX1164-DPP-NEXT: ; %bb.1:
-; GFX1164-DPP-NEXT: s_load_b64 s[44:45], s[34:35], 0x24
-; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], 0
+; GFX1164-DPP-NEXT: s_load_b64 s[52:53], s[34:35], 0x24
+; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[44:45]
+; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[52:53]
; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x1
; GFX1164-DPP-NEXT: .p2align 6
; GFX1164-DPP-NEXT: .LBB10_2: ; %atomicrmw.start
@@ -7845,18 +7845,18 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1164-DPP-NEXT: v_mov_b32_e32 v31, v40
; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s44
+; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s52
; GFX1164-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1164-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1164-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1164-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1164-DPP-NEXT: s_mov_b32 s13, s50
; GFX1164-DPP-NEXT: s_mov_b32 s14, s33
; GFX1164-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
@@ -7864,8 +7864,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[46:47]
+; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63]
; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB10_2
; GFX1164-DPP-NEXT: .LBB10_3:
; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2
@@ -7874,7 +7874,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1132-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_align4_unsafe:
; GFX1132-DPP: ; %bb.0:
; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1132-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1132-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1132-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1132-DPP-NEXT: s_addc_u32 s9, s35, 0
; GFX1132-DPP-NEXT: s_getpc_b64 s[0:1]
@@ -7883,9 +7883,9 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1132-DPP-NEXT: v_mov_b32_e32 v31, v0
; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1132-DPP-NEXT: s_mov_b32 s42, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s43, s13
-; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1132-DPP-NEXT: s_mov_b32 s50, s14
+; GFX1132-DPP-NEXT: s_mov_b32 s51, s13
+; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
@@ -7894,7 +7894,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1132-DPP-NEXT: s_mov_b32 s32, 32
; GFX1132-DPP-NEXT: s_mov_b32 s33, s15
; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0
-; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
@@ -7936,14 +7936,14 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX1132-DPP-NEXT: v_mov_b32_e32 v42, v9
-; GFX1132-DPP-NEXT: s_mov_b32 s46, 0
+; GFX1132-DPP-NEXT: s_mov_b32 s62, 0
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1
; GFX1132-DPP-NEXT: s_cbranch_execz .LBB10_3
; GFX1132-DPP-NEXT: ; %bb.1:
-; GFX1132-DPP-NEXT: s_load_b64 s[44:45], s[34:35], 0x24
+; GFX1132-DPP-NEXT: s_load_b64 s[52:53], s[34:35], 0x24
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[44:45]
+; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[52:53]
; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x1
; GFX1132-DPP-NEXT: .p2align 6
; GFX1132-DPP-NEXT: .LBB10_2: ; %atomicrmw.start
@@ -7958,25 +7958,25 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1132-DPP-NEXT: scratch_store_b64 off, v[1:2], off
; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1132-DPP-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44
+; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s52
; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1132-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1132-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1132-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1132-DPP-NEXT: s_mov_b32 s13, s50
; GFX1132-DPP-NEXT: s_mov_b32 s14, s33
; GFX1132-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0
+; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s53 :: v_dual_mov_b32 v4, 0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-DPP-NEXT: scratch_load_b64 v[1:2], off, off
; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-DPP-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46
+; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB10_2
; GFX1132-DPP-NEXT: .LBB10_3:
; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2
@@ -8508,12 +8508,12 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
; GFX7LESS-LABEL: global_atomic_fadd_double_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s38, -1
-; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s36, s36, s11
-; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0
+; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
+; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
; GFX7LESS-NEXT: s_mov_b32 s14, s10
; GFX7LESS-NEXT: s_mov_b32 s13, s9
; GFX7LESS-NEXT: s_mov_b32 s12, s8
@@ -8531,8 +8531,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -8585,12 +8585,12 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
;
; GFX9-LABEL: global_atomic_fadd_double_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s11
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s11
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-NEXT: s_mov_b32 s12, s8
; GFX9-NEXT: s_add_u32 s8, s34, 44
@@ -8606,9 +8606,9 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -8655,13 +8655,13 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
;
; GFX1064-LABEL: global_atomic_fadd_double_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s38, -1
-; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s36, s36, s11
+; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s50, -1
+; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s48, s48, s11
; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-NEXT: s_addc_u32 s49, s49, 0
; GFX1064-NEXT: s_mov_b32 s12, s8
; GFX1064-NEXT: s_add_u32 s8, s34, 44
; GFX1064-NEXT: s_mov_b32 s13, s9
@@ -8677,8 +8677,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1064-NEXT: s_mov_b32 s32, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -8725,13 +8725,13 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
;
; GFX1032-LABEL: global_atomic_fadd_double_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s38, -1
-; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s36, s36, s11
+; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s50, -1
+; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s48, s48, s11
; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-NEXT: s_mov_b32 s12, s8
; GFX1032-NEXT: s_add_u32 s8, s34, 44
; GFX1032-NEXT: s_mov_b32 s13, s9
@@ -8747,8 +8747,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1032-NEXT: s_mov_b32 s32, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -8922,19 +8922,19 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
; GFX7LESS-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s42, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s43, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s40, s40, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s41, s41, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5]
@@ -8947,11 +8947,11 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[40:41]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[42:43]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[4:5], off, s[36:39], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[4:5], off, s[48:51], 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0
; GFX7LESS-DPP-NEXT: .LBB12_1: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -8962,7 +8962,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v8, v4
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, v3
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, v2
-; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[36:39], 0 glc
+; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[48:51], 0 glc
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_cmp_eq_u64_e32 vcc, v[6:7], v[4:5]
; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
@@ -8975,12 +8975,12 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
;
; GFX9-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s38, -1
-; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11
-; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s50, -1
+; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-DPP-NEXT: s_mov_b32 s12, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44
@@ -8996,9 +8996,9 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -9074,13 +9074,13 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
;
; GFX1064-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s38, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX1064-DPP-NEXT: s_mov_b32 s12, s8
; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1064-DPP-NEXT: s_mov_b32 s13, s9
@@ -9096,8 +9096,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -9163,13 +9163,13 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
;
; GFX1032-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s38, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-DPP-NEXT: s_mov_b32 s12, s8
; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1032-DPP-NEXT: s_mov_b32 s13, s9
@@ -9185,8 +9185,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -9941,12 +9941,12 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX7LESS-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s38, -1
-; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s36, s36, s11
-; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0
+; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
+; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
; GFX7LESS-NEXT: s_mov_b32 s14, s10
; GFX7LESS-NEXT: s_mov_b32 s13, s9
; GFX7LESS-NEXT: s_mov_b32 s12, s8
@@ -9964,8 +9964,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -10018,12 +10018,12 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
;
; GFX9-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s11
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s11
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-NEXT: s_mov_b32 s12, s8
; GFX9-NEXT: s_add_u32 s8, s34, 44
@@ -10039,9 +10039,9 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -10088,13 +10088,13 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
;
; GFX1064-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s38, -1
-; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s36, s36, s11
+; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s50, -1
+; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s48, s48, s11
; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-NEXT: s_addc_u32 s49, s49, 0
; GFX1064-NEXT: s_mov_b32 s12, s8
; GFX1064-NEXT: s_add_u32 s8, s34, 44
; GFX1064-NEXT: s_mov_b32 s13, s9
@@ -10110,8 +10110,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1064-NEXT: s_mov_b32 s32, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -10158,13 +10158,13 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
;
; GFX1032-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s38, -1
-; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s36, s36, s11
+; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s50, -1
+; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s48, s48, s11
; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-NEXT: s_mov_b32 s12, s8
; GFX1032-NEXT: s_add_u32 s8, s34, 44
; GFX1032-NEXT: s_mov_b32 s13, s9
@@ -10180,8 +10180,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1032-NEXT: s_mov_b32 s32, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -10355,19 +10355,19 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX7LESS-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s42, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s43, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s40, s40, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s41, s41, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5]
@@ -10380,11 +10380,11 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[40:41]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[42:43]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[4:5], off, s[36:39], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[4:5], off, s[48:51], 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0
; GFX7LESS-DPP-NEXT: .LBB14_1: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -10395,7 +10395,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v8, v4
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, v3
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, v2
-; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[36:39], 0 glc
+; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[48:51], 0 glc
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_cmp_eq_u64_e32 vcc, v[6:7], v[4:5]
; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
@@ -10408,12 +10408,12 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
;
; GFX9-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s38, -1
-; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11
-; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s50, -1
+; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-DPP-NEXT: s_mov_b32 s12, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44
@@ -10429,9 +10429,9 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -10507,13 +10507,13 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
;
; GFX1064-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s38, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX1064-DPP-NEXT: s_mov_b32 s12, s8
; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1064-DPP-NEXT: s_mov_b32 s13, s9
@@ -10529,8 +10529,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -10596,13 +10596,13 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
;
; GFX1032-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s38, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-DPP-NEXT: s_mov_b32 s12, s8
; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1032-DPP-NEXT: s_mov_b32 s13, s9
@@ -10618,8 +10618,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -10856,12 +10856,12 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX7LESS-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s38, -1
-; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s36, s36, s11
-; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0
+; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
+; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
; GFX7LESS-NEXT: s_mov_b32 s14, s10
; GFX7LESS-NEXT: s_mov_b32 s13, s9
; GFX7LESS-NEXT: s_mov_b32 s12, s8
@@ -10879,8 +10879,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -10933,12 +10933,12 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
;
; GFX9-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s11
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s11
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-NEXT: s_mov_b32 s12, s8
; GFX9-NEXT: s_add_u32 s8, s34, 44
@@ -10954,9 +10954,9 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -11003,13 +11003,13 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
;
; GFX1064-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s38, -1
-; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s36, s36, s11
+; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s50, -1
+; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s48, s48, s11
; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-NEXT: s_addc_u32 s49, s49, 0
; GFX1064-NEXT: s_mov_b32 s12, s8
; GFX1064-NEXT: s_add_u32 s8, s34, 44
; GFX1064-NEXT: s_mov_b32 s13, s9
@@ -11025,8 +11025,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1064-NEXT: s_mov_b32 s32, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -11073,13 +11073,13 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
;
; GFX1032-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s38, -1
-; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s36, s36, s11
+; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s50, -1
+; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s48, s48, s11
; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-NEXT: s_mov_b32 s12, s8
; GFX1032-NEXT: s_add_u32 s8, s34, 44
; GFX1032-NEXT: s_mov_b32 s13, s9
@@ -11095,8 +11095,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1032-NEXT: s_mov_b32 s32, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -11270,19 +11270,19 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX7LESS-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s42, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s43, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s40, s40, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s41, s41, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5]
@@ -11295,11 +11295,11 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[40:41]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[42:43]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[4:5], off, s[36:39], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[4:5], off, s[48:51], 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0
; GFX7LESS-DPP-NEXT: .LBB15_1: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -11310,7 +11310,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v8, v4
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, v3
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, v2
-; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[36:39], 0 glc
+; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[48:51], 0 glc
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_cmp_eq_u64_e32 vcc, v[6:7], v[4:5]
; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
@@ -11323,12 +11323,12 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
;
; GFX9-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s38, -1
-; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11
-; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s50, -1
+; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-DPP-NEXT: s_mov_b32 s12, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44
@@ -11344,9 +11344,9 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -11422,13 +11422,13 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
;
; GFX1064-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s38, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX1064-DPP-NEXT: s_mov_b32 s12, s8
; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1064-DPP-NEXT: s_mov_b32 s13, s9
@@ -11444,8 +11444,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -11511,13 +11511,13 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
;
; GFX1032-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s38, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-DPP-NEXT: s_mov_b32 s12, s8
; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1032-DPP-NEXT: s_mov_b32 s13, s9
@@ -11533,8 +11533,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -11771,13 +11771,13 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX7LESS-LABEL: global_atomic_fadd_double_uni_address_uni_value_default_scope_strictfp:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
-; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
-; GFX7LESS-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX7LESS-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-NEXT: s_mov_b32 s67, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s64, s64, s11
+; GFX7LESS-NEXT: s_addc_u32 s65, s65, 0
+; GFX7LESS-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v5, exec_lo, 0
; GFX7LESS-NEXT: s_bcnt1_i32_b64 s0, exec
; GFX7LESS-NEXT: s_mov_b32 s1, 0x43300000
@@ -11791,15 +11791,15 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX7LESS-NEXT: s_cbranch_execz .LBB16_3
; GFX7LESS-NEXT: ; %bb.1:
; GFX7LESS-NEXT: s_mov_b32 s33, s10
-; GFX7LESS-NEXT: s_mov_b32 s42, s9
-; GFX7LESS-NEXT: s_mov_b32 s43, s8
+; GFX7LESS-NEXT: s_mov_b32 s50, s9
+; GFX7LESS-NEXT: s_mov_b32 s51, s8
; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX7LESS-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x9
+; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
-; GFX7LESS-NEXT: s_mov_b64 s[46:47], 0
+; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
+; GFX7LESS-NEXT: s_mov_b64 s[62:63], 0
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v3, v0, v1
@@ -11811,11 +11811,11 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7LESS-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-NEXT: v_add_f64 v[2:3], v[0:1], v[41:42]
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0
+; GFX7LESS-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:4
+; GFX7LESS-NEXT: buffer_store_dword v0, off, s[64:67], 0
; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:12
-; GFX7LESS-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:8
+; GFX7LESS-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:12
+; GFX7LESS-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:8
; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0
; GFX7LESS-NEXT: s_getpc_b64 s[0:1]
; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
@@ -11828,40 +11828,40 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX7LESS-NEXT: v_mov_b32_e32 v5, 8
; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-NEXT: s_mov_b32 s12, s43
-; GFX7LESS-NEXT: s_mov_b32 s13, s42
+; GFX7LESS-NEXT: s_mov_b32 s12, s51
+; GFX7LESS-NEXT: s_mov_b32 s13, s50
; GFX7LESS-NEXT: s_mov_b32 s14, s33
; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX7LESS-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-NEXT: v_mov_b32_e32 v2, s44
-; GFX7LESS-NEXT: v_mov_b32_e32 v3, s45
+; GFX7LESS-NEXT: v_mov_b32_e32 v2, s52
+; GFX7LESS-NEXT: v_mov_b32_e32 v3, s53
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-NEXT: buffer_load_dword v0, off, s[48:51], 0
-; GFX7LESS-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4
+; GFX7LESS-NEXT: buffer_load_dword v0, off, s[64:67], 0
+; GFX7LESS-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX7LESS-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX7LESS-NEXT: s_cbranch_execnz .LBB16_2
; GFX7LESS-NEXT: .LBB16_3:
; GFX7LESS-NEXT: s_endpgm
;
; GFX9-LABEL: global_atomic_fadd_double_uni_address_uni_value_default_scope_strictfp:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s11
+; GFX9-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s66, -1
+; GFX9-NEXT: s_mov_b32 s67, 0xe00000
+; GFX9-NEXT: s_add_u32 s64, s64, s11
; GFX9-NEXT: v_mov_b32_e32 v3, 0
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
-; GFX9-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX9-NEXT: s_addc_u32 s65, s65, 0
+; GFX9-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX9-NEXT: s_bcnt1_i32_b64 s0, exec
; GFX9-NEXT: v_mov_b32_e32 v4, 0xc3300000
; GFX9-NEXT: s_mov_b32 s1, 0x43300000
@@ -11874,20 +11874,20 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX9-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9-NEXT: s_cbranch_execz .LBB16_3
; GFX9-NEXT: ; %bb.1:
-; GFX9-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24
+; GFX9-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24
; GFX9-NEXT: v_lshlrev_b32_e32 v3, 20, v2
; GFX9-NEXT: v_lshlrev_b32_e32 v4, 10, v1
; GFX9-NEXT: s_mov_b32 s33, s10
-; GFX9-NEXT: s_mov_b32 s42, s9
+; GFX9-NEXT: s_mov_b32 s50, s9
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
-; GFX9-NEXT: s_mov_b32 s43, s8
+; GFX9-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
+; GFX9-NEXT: s_mov_b32 s51, s8
; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX9-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v2, s1
-; GFX9-NEXT: s_mov_b64 s[46:47], 0
+; GFX9-NEXT: s_mov_b64 s[62:63], 0
; GFX9-NEXT: v_mov_b32_e32 v1, s0
; GFX9-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX9-NEXT: .LBB16_2: ; %atomicrmw.start
@@ -11900,48 +11900,48 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX9-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
; GFX9-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX9-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX9-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX9-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX9-NEXT: buffer_store_dword v1, off, s[64:67], 0
+; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX9-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX9-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-NEXT: s_mov_b32 s12, s43
-; GFX9-NEXT: s_mov_b32 s13, s42
+; GFX9-NEXT: s_mov_b32 s12, s51
+; GFX9-NEXT: s_mov_b32 s13, s50
; GFX9-NEXT: s_mov_b32 s14, s33
; GFX9-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX9-NEXT: v_mov_b32_e32 v0, 8
; GFX9-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-NEXT: v_mov_b32_e32 v2, s44
-; GFX9-NEXT: v_mov_b32_e32 v3, s45
+; GFX9-NEXT: v_mov_b32_e32 v2, s52
+; GFX9-NEXT: v_mov_b32_e32 v3, s53
; GFX9-NEXT: v_mov_b32_e32 v4, 0
; GFX9-NEXT: v_mov_b32_e32 v5, 8
; GFX9-NEXT: v_mov_b32_e32 v6, 0
; GFX9-NEXT: v_mov_b32_e32 v7, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX9-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX9-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX9-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX9-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX9-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX9-NEXT: s_cbranch_execnz .LBB16_2
; GFX9-NEXT: .LBB16_3:
; GFX9-NEXT: s_endpgm
;
; GFX1064-LABEL: global_atomic_fadd_double_uni_address_uni_value_default_scope_strictfp:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s50, -1
-; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s48, s48, s11
-; GFX1064-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1064-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s66, -1
+; GFX1064-NEXT: s_mov_b32 s67, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s64, s64, s11
+; GFX1064-NEXT: s_addc_u32 s65, s65, 0
+; GFX1064-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1064-NEXT: s_bcnt1_i32_b64 s0, exec
; GFX1064-NEXT: s_mov_b32 s1, 0x43300000
; GFX1064-NEXT: s_movk_i32 s32, 0x800
@@ -11953,19 +11953,19 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX1064-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064-NEXT: s_cbranch_execz .LBB16_3
; GFX1064-NEXT: ; %bb.1:
-; GFX1064-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24
+; GFX1064-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24
; GFX1064-NEXT: v_lshlrev_b32_e32 v3, 20, v2
; GFX1064-NEXT: v_lshlrev_b32_e32 v4, 10, v1
; GFX1064-NEXT: s_mov_b32 s33, s10
-; GFX1064-NEXT: s_mov_b32 s42, s9
-; GFX1064-NEXT: s_mov_b32 s43, s8
+; GFX1064-NEXT: s_mov_b32 s50, s9
+; GFX1064-NEXT: s_mov_b32 s51, s8
; GFX1064-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1064-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX1064-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX1064-NEXT: s_mov_b64 s[46:47], 0
+; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1064-NEXT: s_mov_b64 s[62:63], 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
+; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: v_mov_b32_e32 v2, s1
; GFX1064-NEXT: v_mov_b32_e32 v1, s0
@@ -11978,53 +11978,53 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX1064-NEXT: s_getpc_b64 s[0:1]
; GFX1064-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1064-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-NEXT: buffer_store_dword v1, off, s[48:51], 0
+; GFX1064-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX1064-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX1064-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1064-NEXT: v_mov_b32_e32 v31, v40
; GFX1064-NEXT: v_mov_b32_e32 v0, 8
; GFX1064-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-NEXT: v_mov_b32_e32 v2, s44
+; GFX1064-NEXT: v_mov_b32_e32 v2, s52
; GFX1064-NEXT: v_mov_b32_e32 v5, 8
; GFX1064-NEXT: v_mov_b32_e32 v6, 0
; GFX1064-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1064-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1064-NEXT: s_mov_b32 s12, s43
-; GFX1064-NEXT: s_mov_b32 s13, s42
+; GFX1064-NEXT: s_mov_b32 s12, s51
+; GFX1064-NEXT: s_mov_b32 s13, s50
; GFX1064-NEXT: s_mov_b32 s14, s33
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-NEXT: v_mov_b32_e32 v3, s45
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX1064-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX1064-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX1064-NEXT: v_mov_b32_e32 v3, s53
; GFX1064-NEXT: v_mov_b32_e32 v4, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-NEXT: s_clause 0x1
-; GFX1064-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX1064-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX1064-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1064-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX1064-NEXT: s_cbranch_execnz .LBB16_2
; GFX1064-NEXT: .LBB16_3:
; GFX1064-NEXT: s_endpgm
;
; GFX1032-LABEL: global_atomic_fadd_double_uni_address_uni_value_default_scope_strictfp:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s50, -1
-; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s48, s48, s11
-; GFX1032-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1032-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s66, -1
+; GFX1032-NEXT: s_mov_b32 s67, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s64, s64, s11
+; GFX1032-NEXT: s_addc_u32 s65, s65, 0
+; GFX1032-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1032-NEXT: s_bcnt1_i32_b32 s0, exec_lo
; GFX1032-NEXT: s_mov_b32 s1, 0x43300000
-; GFX1032-NEXT: s_mov_b32 s46, 0
+; GFX1032-NEXT: s_mov_b32 s62, 0
; GFX1032-NEXT: v_add_f64 v[3:4], 0xc3300000, s[0:1]
; GFX1032-NEXT: s_movk_i32 s32, 0x400
; GFX1032-NEXT: v_mul_f64 v[41:42], 4.0, v[3:4]
@@ -12033,18 +12033,18 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-NEXT: s_cbranch_execz .LBB16_3
; GFX1032-NEXT: ; %bb.1:
-; GFX1032-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24
+; GFX1032-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24
; GFX1032-NEXT: v_lshlrev_b32_e32 v3, 20, v2
; GFX1032-NEXT: v_lshlrev_b32_e32 v4, 10, v1
; GFX1032-NEXT: s_mov_b32 s33, s10
-; GFX1032-NEXT: s_mov_b32 s42, s9
-; GFX1032-NEXT: s_mov_b32 s43, s8
+; GFX1032-NEXT: s_mov_b32 s50, s9
+; GFX1032-NEXT: s_mov_b32 s51, s8
; GFX1032-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1032-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX1032-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
+; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: v_mov_b32_e32 v2, s1
; GFX1032-NEXT: v_mov_b32_e32 v1, s0
@@ -12057,44 +12057,44 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX1032-NEXT: s_getpc_b64 s[0:1]
; GFX1032-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1032-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-NEXT: buffer_store_dword v1, off, s[48:51], 0
+; GFX1032-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX1032-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX1032-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1032-NEXT: v_mov_b32_e32 v31, v40
; GFX1032-NEXT: v_mov_b32_e32 v0, 8
; GFX1032-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-NEXT: v_mov_b32_e32 v2, s44
+; GFX1032-NEXT: v_mov_b32_e32 v2, s52
; GFX1032-NEXT: v_mov_b32_e32 v5, 8
; GFX1032-NEXT: v_mov_b32_e32 v6, 0
; GFX1032-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1032-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1032-NEXT: s_mov_b32 s12, s43
-; GFX1032-NEXT: s_mov_b32 s13, s42
+; GFX1032-NEXT: s_mov_b32 s12, s51
+; GFX1032-NEXT: s_mov_b32 s13, s50
; GFX1032-NEXT: s_mov_b32 s14, s33
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1032-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1032-NEXT: v_mov_b32_e32 v3, s45
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX1032-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX1032-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX1032-NEXT: v_mov_b32_e32 v3, s53
; GFX1032-NEXT: v_mov_b32_e32 v4, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-NEXT: s_clause 0x1
-; GFX1032-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1032-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX1032-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX1032-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s46
+; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
; GFX1032-NEXT: s_cbranch_execnz .LBB16_2
; GFX1032-NEXT: .LBB16_3:
; GFX1032-NEXT: s_endpgm
;
; GFX1164-LABEL: global_atomic_fadd_double_uni_address_uni_value_default_scope_strictfp:
; GFX1164: ; %bb.0:
-; GFX1164-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1164-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1164-NEXT: s_bcnt1_i32_b64 s0, exec
; GFX1164-NEXT: v_mov_b32_e32 v40, v0
; GFX1164-NEXT: v_mov_b32_e32 v0, 0x43300000
@@ -12115,16 +12115,16 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX1164-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1164-NEXT: s_cbranch_execz .LBB16_3
; GFX1164-NEXT: ; %bb.1:
-; GFX1164-NEXT: s_load_b64 s[44:45], s[4:5], 0x24
+; GFX1164-NEXT: s_load_b64 s[52:53], s[4:5], 0x24
; GFX1164-NEXT: s_mov_b32 s33, s10
-; GFX1164-NEXT: s_mov_b32 s42, s9
-; GFX1164-NEXT: s_mov_b32 s43, s8
+; GFX1164-NEXT: s_mov_b32 s50, s9
+; GFX1164-NEXT: s_mov_b32 s51, s8
; GFX1164-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1164-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX1164-NEXT: s_mov_b64 s[46:47], 0
+; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1164-NEXT: s_mov_b64 s[62:63], 0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: s_load_b64 s[0:1], s[44:45], 0x0
+; GFX1164-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: v_mov_b32_e32 v2, s1
; GFX1164-NEXT: v_mov_b32_e32 v1, s0
@@ -12145,18 +12145,18 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX1164-NEXT: v_mov_b32_e32 v31, v40
; GFX1164-NEXT: v_mov_b32_e32 v0, 8
; GFX1164-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-NEXT: v_mov_b32_e32 v2, s44
+; GFX1164-NEXT: v_mov_b32_e32 v2, s52
; GFX1164-NEXT: v_mov_b32_e32 v5, 8
; GFX1164-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1164-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1164-NEXT: s_mov_b32 s12, s43
-; GFX1164-NEXT: s_mov_b32 s13, s42
+; GFX1164-NEXT: s_mov_b32 s12, s51
+; GFX1164-NEXT: s_mov_b32 s13, s50
; GFX1164-NEXT: s_mov_b32 s14, s33
; GFX1164-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1164-NEXT: v_mov_b32_e32 v3, s45
+; GFX1164-NEXT: v_mov_b32_e32 v3, s53
; GFX1164-NEXT: v_mov_b32_e32 v4, 0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1]
@@ -12164,8 +12164,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[46:47]
+; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63]
; GFX1164-NEXT: s_cbranch_execnz .LBB16_2
; GFX1164-NEXT: .LBB16_3:
; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2
@@ -12173,12 +12173,12 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
;
; GFX1132-LABEL: global_atomic_fadd_double_uni_address_uni_value_default_scope_strictfp:
; GFX1132: ; %bb.0:
-; GFX1132-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1132-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1132-NEXT: s_bcnt1_i32_b32 s0, exec_lo
; GFX1132-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1132-NEXT: v_dual_mov_b32 v40, v0 :: v_dual_mov_b32 v1, s0
; GFX1132-NEXT: v_mov_b32_e32 v0, 0x43300000
-; GFX1132-NEXT: s_mov_b32 s46, 0
+; GFX1132-NEXT: s_mov_b32 s62, 0
; GFX1132-NEXT: s_clause 0x1
; GFX1132-NEXT: scratch_store_b32 off, v0, off offset:20
; GFX1132-NEXT: scratch_store_b32 off, v1, off offset:16
@@ -12193,15 +12193,15 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-NEXT: s_cbranch_execz .LBB16_3
; GFX1132-NEXT: ; %bb.1:
-; GFX1132-NEXT: s_load_b64 s[44:45], s[4:5], 0x24
+; GFX1132-NEXT: s_load_b64 s[52:53], s[4:5], 0x24
; GFX1132-NEXT: s_mov_b32 s33, s15
-; GFX1132-NEXT: s_mov_b32 s42, s14
-; GFX1132-NEXT: s_mov_b32 s43, s13
+; GFX1132-NEXT: s_mov_b32 s50, s14
+; GFX1132-NEXT: s_mov_b32 s51, s13
; GFX1132-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1132-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_load_b64 s[0:1], s[44:45], 0x0
+; GFX1132-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
; GFX1132-NEXT: s_set_inst_prefetch_distance 0x1
@@ -12219,25 +12219,25 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX1132-NEXT: scratch_store_b64 off, v[1:2], off
; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1132-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8
-; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44
+; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s52
; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
; GFX1132-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1132-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1132-NEXT: s_mov_b32 s12, s43
-; GFX1132-NEXT: s_mov_b32 s13, s42
+; GFX1132-NEXT: s_mov_b32 s12, s51
+; GFX1132-NEXT: s_mov_b32 s13, s50
; GFX1132-NEXT: s_mov_b32 s14, s33
; GFX1132-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1132-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0
+; GFX1132-NEXT: v_dual_mov_b32 v3, s53 :: v_dual_mov_b32 v4, 0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-NEXT: scratch_load_b64 v[1:2], off, off
; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46
+; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
; GFX1132-NEXT: s_cbranch_execnz .LBB16_2
; GFX1132-NEXT: .LBB16_3:
; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2
@@ -12246,13 +12246,13 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX7LESS-DPP-LABEL: global_atomic_fadd_double_uni_address_uni_value_default_scope_strictfp:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX7LESS-DPP-NEXT: v_mbcnt_lo_u32_b32_e64 v5, exec_lo, 0
; GFX7LESS-DPP-NEXT: s_bcnt1_i32_b64 s0, exec
; GFX7LESS-DPP-NEXT: s_mov_b32 s1, 0x43300000
@@ -12266,15 +12266,15 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX7LESS-DPP-NEXT: s_cbranch_execz .LBB16_3
; GFX7LESS-DPP-NEXT: ; %bb.1:
; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s10
-; GFX7LESS-DPP-NEXT: s_mov_b32 s42, s9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s43, s8
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, s9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], 0
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[62:63], 0
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v3, v0, v1
@@ -12286,11 +12286,11 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_add_f64 v[2:3], v[0:1], v[41:42]
-; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0
+; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:4
+; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[64:67], 0
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:8
+; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:12
+; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:8
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
@@ -12303,40 +12303,40 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s43
-; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s42
+; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51
+; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s44
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s52
+; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-DPP-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[48:51], 0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4
+; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[64:67], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX7LESS-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB16_2
; GFX7LESS-DPP-NEXT: .LBB16_3:
; GFX7LESS-DPP-NEXT: s_endpgm
;
; GFX9-DPP-LABEL: global_atomic_fadd_double_uni_address_uni_value_default_scope_strictfp:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s50, -1
-; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX9-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s66, -1
+; GFX9-DPP-NEXT: s_mov_b32 s67, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s64, s64, s11
; GFX9-DPP-NEXT: v_mov_b32_e32 v3, 0
-; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX9-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX9-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX9-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX9-DPP-NEXT: s_bcnt1_i32_b64 s0, exec
; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0xc3300000
; GFX9-DPP-NEXT: s_mov_b32 s1, 0x43300000
@@ -12349,20 +12349,20 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9-DPP-NEXT: s_cbranch_execz .LBB16_3
; GFX9-DPP-NEXT: ; %bb.1:
-; GFX9-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24
+; GFX9-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1
; GFX9-DPP-NEXT: s_mov_b32 s33, s10
-; GFX9-DPP-NEXT: s_mov_b32 s42, s9
+; GFX9-DPP-NEXT: s_mov_b32 s50, s9
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
-; GFX9-DPP-NEXT: s_mov_b32 s43, s8
+; GFX9-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
+; GFX9-DPP-NEXT: s_mov_b32 s51, s8
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s1
-; GFX9-DPP-NEXT: s_mov_b64 s[46:47], 0
+; GFX9-DPP-NEXT: s_mov_b64 s[62:63], 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v1, s0
; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX9-DPP-NEXT: .LBB16_2: ; %atomicrmw.start
@@ -12375,48 +12375,48 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0
+; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-DPP-NEXT: s_mov_b32 s12, s43
-; GFX9-DPP-NEXT: s_mov_b32 s13, s42
+; GFX9-DPP-NEXT: s_mov_b32 s12, s51
+; GFX9-DPP-NEXT: s_mov_b32 s13, s50
; GFX9-DPP-NEXT: s_mov_b32 s14, s33
; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s44
-; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s52
+; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX9-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX9-DPP-NEXT: s_cbranch_execnz .LBB16_2
; GFX9-DPP-NEXT: .LBB16_3:
; GFX9-DPP-NEXT: s_endpgm
;
; GFX1064-DPP-LABEL: global_atomic_fadd_double_uni_address_uni_value_default_scope_strictfp:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
-; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1064-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s66, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s67, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s64, s64, s11
+; GFX1064-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX1064-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1064-DPP-NEXT: s_bcnt1_i32_b64 s0, exec
; GFX1064-DPP-NEXT: s_mov_b32 s1, 0x43300000
; GFX1064-DPP-NEXT: s_movk_i32 s32, 0x800
@@ -12428,19 +12428,19 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064-DPP-NEXT: s_cbranch_execz .LBB16_3
; GFX1064-DPP-NEXT: ; %bb.1:
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24
+; GFX1064-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24
; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2
; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1
; GFX1064-DPP-NEXT: s_mov_b32 s33, s10
-; GFX1064-DPP-NEXT: s_mov_b32 s42, s9
-; GFX1064-DPP-NEXT: s_mov_b32 s43, s8
+; GFX1064-DPP-NEXT: s_mov_b32 s50, s9
+; GFX1064-DPP-NEXT: s_mov_b32 s51, s8
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], 0
+; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
+; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s1
; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, s0
@@ -12453,53 +12453,53 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX1064-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
+; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX1064-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40
; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s44
+; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s52
; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX1064-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1064-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1064-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1064-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1064-DPP-NEXT: s_mov_b32 s13, s50
; GFX1064-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX1064-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-DPP-NEXT: s_clause 0x1
-; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB16_2
; GFX1064-DPP-NEXT: .LBB16_3:
; GFX1064-DPP-NEXT: s_endpgm
;
; GFX1032-DPP-LABEL: global_atomic_fadd_double_uni_address_uni_value_default_scope_strictfp:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
-; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1032-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s66, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s67, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s64, s64, s11
+; GFX1032-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX1032-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1032-DPP-NEXT: s_bcnt1_i32_b32 s0, exec_lo
; GFX1032-DPP-NEXT: s_mov_b32 s1, 0x43300000
-; GFX1032-DPP-NEXT: s_mov_b32 s46, 0
+; GFX1032-DPP-NEXT: s_mov_b32 s62, 0
; GFX1032-DPP-NEXT: v_add_f64 v[3:4], 0xc3300000, s[0:1]
; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400
; GFX1032-DPP-NEXT: v_mul_f64 v[41:42], 4.0, v[3:4]
@@ -12508,18 +12508,18 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB16_3
; GFX1032-DPP-NEXT: ; %bb.1:
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24
+; GFX1032-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24
; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2
; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1
; GFX1032-DPP-NEXT: s_mov_b32 s33, s10
-; GFX1032-DPP-NEXT: s_mov_b32 s42, s9
-; GFX1032-DPP-NEXT: s_mov_b32 s43, s8
+; GFX1032-DPP-NEXT: s_mov_b32 s50, s9
+; GFX1032-DPP-NEXT: s_mov_b32 s51, s8
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
+; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s1
; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, s0
@@ -12532,44 +12532,44 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX1032-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
+; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX1032-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s44
+; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s52
; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX1032-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1032-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1032-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1032-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1032-DPP-NEXT: s_mov_b32 s13, s50
; GFX1032-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-DPP-NEXT: s_clause 0x1
-; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-DPP-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s46
+; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB16_2
; GFX1032-DPP-NEXT: .LBB16_3:
; GFX1032-DPP-NEXT: s_endpgm
;
; GFX1164-DPP-LABEL: global_atomic_fadd_double_uni_address_uni_value_default_scope_strictfp:
; GFX1164-DPP: ; %bb.0:
-; GFX1164-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1164-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1164-DPP-NEXT: s_bcnt1_i32_b64 s0, exec
; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0
; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 0x43300000
@@ -12590,16 +12590,16 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1164-DPP-NEXT: s_cbranch_execz .LBB16_3
; GFX1164-DPP-NEXT: ; %bb.1:
-; GFX1164-DPP-NEXT: s_load_b64 s[44:45], s[4:5], 0x24
+; GFX1164-DPP-NEXT: s_load_b64 s[52:53], s[4:5], 0x24
; GFX1164-DPP-NEXT: s_mov_b32 s33, s10
-; GFX1164-DPP-NEXT: s_mov_b32 s42, s9
-; GFX1164-DPP-NEXT: s_mov_b32 s43, s8
+; GFX1164-DPP-NEXT: s_mov_b32 s50, s9
+; GFX1164-DPP-NEXT: s_mov_b32 s51, s8
; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], 0
+; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[44:45], 0x0
+; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s1
; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, s0
@@ -12620,18 +12620,18 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX1164-DPP-NEXT: v_mov_b32_e32 v31, v40
; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s44
+; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s52
; GFX1164-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1164-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1164-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1164-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1164-DPP-NEXT: s_mov_b32 s13, s50
; GFX1164-DPP-NEXT: s_mov_b32 s14, s33
; GFX1164-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
@@ -12639,8 +12639,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[46:47]
+; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63]
; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB16_2
; GFX1164-DPP-NEXT: .LBB16_3:
; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2
@@ -12648,12 +12648,12 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
;
; GFX1132-DPP-LABEL: global_atomic_fadd_double_uni_address_uni_value_default_scope_strictfp:
; GFX1132-DPP: ; %bb.0:
-; GFX1132-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1132-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1132-DPP-NEXT: s_bcnt1_i32_b32 s0, exec_lo
; GFX1132-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1132-DPP-NEXT: v_dual_mov_b32 v40, v0 :: v_dual_mov_b32 v1, s0
; GFX1132-DPP-NEXT: v_mov_b32_e32 v0, 0x43300000
-; GFX1132-DPP-NEXT: s_mov_b32 s46, 0
+; GFX1132-DPP-NEXT: s_mov_b32 s62, 0
; GFX1132-DPP-NEXT: s_clause 0x1
; GFX1132-DPP-NEXT: scratch_store_b32 off, v0, off offset:20
; GFX1132-DPP-NEXT: scratch_store_b32 off, v1, off offset:16
@@ -12668,15 +12668,15 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-DPP-NEXT: s_cbranch_execz .LBB16_3
; GFX1132-DPP-NEXT: ; %bb.1:
-; GFX1132-DPP-NEXT: s_load_b64 s[44:45], s[4:5], 0x24
+; GFX1132-DPP-NEXT: s_load_b64 s[52:53], s[4:5], 0x24
; GFX1132-DPP-NEXT: s_mov_b32 s33, s15
-; GFX1132-DPP-NEXT: s_mov_b32 s42, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s43, s13
+; GFX1132-DPP-NEXT: s_mov_b32 s50, s14
+; GFX1132-DPP-NEXT: s_mov_b32 s51, s13
; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[44:45], 0x0
+; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x1
@@ -12694,25 +12694,25 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX1132-DPP-NEXT: scratch_store_b64 off, v[1:2], off
; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1132-DPP-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44
+; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s52
; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1132-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1132-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1132-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1132-DPP-NEXT: s_mov_b32 s13, s50
; GFX1132-DPP-NEXT: s_mov_b32 s14, s33
; GFX1132-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0
+; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s53 :: v_dual_mov_b32 v4, 0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-DPP-NEXT: scratch_load_b64 v[1:2], off, off
; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-DPP-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46
+; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB16_2
; GFX1132-DPP-NEXT: .LBB16_3:
; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2
@@ -12725,19 +12725,19 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX7LESS-LABEL: global_atomic_fadd_double_uni_address_div_value_default_scope_strictfp:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
-; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
+; GFX7LESS-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s82, -1
+; GFX7LESS-NEXT: s_mov_b32 s83, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s80, s80, s11
+; GFX7LESS-NEXT: s_addc_u32 s81, s81, 0
; GFX7LESS-NEXT: s_mov_b32 s33, s10
-; GFX7LESS-NEXT: s_mov_b32 s42, s9
-; GFX7LESS-NEXT: s_mov_b32 s43, s8
+; GFX7LESS-NEXT: s_mov_b32 s50, s9
+; GFX7LESS-NEXT: s_mov_b32 s51, s8
; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0
; GFX7LESS-NEXT: s_getpc_b64 s[0:1]
@@ -12748,15 +12748,15 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-NEXT: v_or_b32_e32 v40, v0, v2
-; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-NEXT: s_mov_b32 s12, s43
-; GFX7LESS-NEXT: s_mov_b32 s13, s42
+; GFX7LESS-NEXT: s_mov_b32 s12, s51
+; GFX7LESS-NEXT: s_mov_b32 s13, s50
; GFX7LESS-NEXT: s_mov_b32 s14, s33
; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -12781,21 +12781,21 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX7LESS-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX7LESS-NEXT: s_cbranch_execz .LBB17_5
; GFX7LESS-NEXT: ; %bb.3:
-; GFX7LESS-NEXT: s_load_dwordx2 s[44:45], s[36:37], 0x9
-; GFX7LESS-NEXT: s_mov_b32 s47, 0xf000
-; GFX7LESS-NEXT: s_mov_b32 s46, -1
+; GFX7LESS-NEXT: s_load_dwordx2 s[64:65], s[36:37], 0x9
+; GFX7LESS-NEXT: s_mov_b32 s67, 0xf000
+; GFX7LESS-NEXT: s_mov_b32 s66, -1
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[44:47], 0
-; GFX7LESS-NEXT: s_mov_b64 s[46:47], 0
+; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[64:67], 0
+; GFX7LESS-NEXT: s_mov_b64 s[52:53], 0
; GFX7LESS-NEXT: .LBB17_4: ; %atomicrmw.start
; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7LESS-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-NEXT: v_add_f64 v[2:3], v[0:1], v[41:42]
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0
+; GFX7LESS-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:4
+; GFX7LESS-NEXT: buffer_store_dword v0, off, s[80:83], 0
; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:12
-; GFX7LESS-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:8
+; GFX7LESS-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:12
+; GFX7LESS-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:8
; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0
; GFX7LESS-NEXT: s_getpc_b64 s[0:1]
; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
@@ -12808,44 +12808,44 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX7LESS-NEXT: v_mov_b32_e32 v5, 8
; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-NEXT: s_mov_b32 s12, s43
-; GFX7LESS-NEXT: s_mov_b32 s13, s42
+; GFX7LESS-NEXT: s_mov_b32 s12, s51
+; GFX7LESS-NEXT: s_mov_b32 s13, s50
; GFX7LESS-NEXT: s_mov_b32 s14, s33
; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83]
; GFX7LESS-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-NEXT: v_mov_b32_e32 v2, s44
-; GFX7LESS-NEXT: v_mov_b32_e32 v3, s45
+; GFX7LESS-NEXT: v_mov_b32_e32 v2, s64
+; GFX7LESS-NEXT: v_mov_b32_e32 v3, s65
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-NEXT: buffer_load_dword v0, off, s[48:51], 0
-; GFX7LESS-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4
+; GFX7LESS-NEXT: buffer_load_dword v0, off, s[80:83], 0
+; GFX7LESS-NEXT: buffer_load_dword v1, off, s[80:83], 0 offset:4
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX7LESS-NEXT: s_or_b64 s[52:53], vcc, s[52:53]
+; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[52:53]
; GFX7LESS-NEXT: s_cbranch_execnz .LBB17_4
; GFX7LESS-NEXT: .LBB17_5:
; GFX7LESS-NEXT: s_endpgm
;
; GFX9-LABEL: global_atomic_fadd_double_uni_address_div_value_default_scope_strictfp:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s11
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s66, -1
+; GFX9-NEXT: s_mov_b32 s67, 0xe00000
+; GFX9-NEXT: s_add_u32 s64, s64, s11
+; GFX9-NEXT: s_addc_u32 s65, s65, 0
; GFX9-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX9-NEXT: s_mov_b32 s43, s8
+; GFX9-NEXT: s_mov_b32 s51, s8
; GFX9-NEXT: s_add_u32 s8, s36, 44
-; GFX9-NEXT: s_mov_b32 s42, s9
+; GFX9-NEXT: s_mov_b32 s50, s9
; GFX9-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX9-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX9-NEXT: s_getpc_b64 s[0:1]
; GFX9-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
@@ -12854,17 +12854,17 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b32 s33, s10
; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7]
-; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX9-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-NEXT: s_mov_b32 s12, s43
-; GFX9-NEXT: s_mov_b32 s13, s42
+; GFX9-NEXT: s_mov_b32 s12, s51
+; GFX9-NEXT: s_mov_b32 s13, s50
; GFX9-NEXT: s_mov_b32 s14, s33
; GFX9-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX9-NEXT: s_movk_i32 s32, 0x800
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -12889,11 +12889,11 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX9-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX9-NEXT: s_cbranch_execz .LBB17_5
; GFX9-NEXT: ; %bb.3:
-; GFX9-NEXT: s_load_dwordx2 s[44:45], s[36:37], 0x24
+; GFX9-NEXT: s_load_dwordx2 s[52:53], s[36:37], 0x24
; GFX9-NEXT: v_mov_b32_e32 v0, 0
-; GFX9-NEXT: s_mov_b64 s[46:47], 0
+; GFX9-NEXT: s_mov_b64 s[62:63], 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45]
+; GFX9-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53]
; GFX9-NEXT: .LBB17_4: ; %atomicrmw.start
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-NEXT: s_waitcnt vmcnt(0)
@@ -12904,53 +12904,53 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX9-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
; GFX9-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX9-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX9-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX9-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX9-NEXT: buffer_store_dword v1, off, s[64:67], 0
+; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX9-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX9-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-NEXT: s_mov_b32 s12, s43
-; GFX9-NEXT: s_mov_b32 s13, s42
+; GFX9-NEXT: s_mov_b32 s12, s51
+; GFX9-NEXT: s_mov_b32 s13, s50
; GFX9-NEXT: s_mov_b32 s14, s33
; GFX9-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX9-NEXT: v_mov_b32_e32 v0, 8
; GFX9-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-NEXT: v_mov_b32_e32 v2, s44
-; GFX9-NEXT: v_mov_b32_e32 v3, s45
+; GFX9-NEXT: v_mov_b32_e32 v2, s52
+; GFX9-NEXT: v_mov_b32_e32 v3, s53
; GFX9-NEXT: v_mov_b32_e32 v4, 0
; GFX9-NEXT: v_mov_b32_e32 v5, 8
; GFX9-NEXT: v_mov_b32_e32 v6, 0
; GFX9-NEXT: v_mov_b32_e32 v7, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX9-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX9-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX9-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX9-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX9-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX9-NEXT: s_cbranch_execnz .LBB17_4
; GFX9-NEXT: .LBB17_5:
; GFX9-NEXT: s_endpgm
;
; GFX1064-LABEL: global_atomic_fadd_double_uni_address_div_value_default_scope_strictfp:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s50, -1
-; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s66, -1
+; GFX1064-NEXT: s_mov_b32 s67, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s64, s64, s11
; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-NEXT: s_mov_b32 s43, s8
+; GFX1064-NEXT: s_addc_u32 s65, s65, 0
+; GFX1064-NEXT: s_mov_b32 s51, s8
; GFX1064-NEXT: s_add_u32 s8, s34, 44
-; GFX1064-NEXT: s_mov_b32 s42, s9
+; GFX1064-NEXT: s_mov_b32 s50, s9
; GFX1064-NEXT: s_addc_u32 s9, s35, 0
-; GFX1064-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1064-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1064-NEXT: s_getpc_b64 s[0:1]
; GFX1064-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
; GFX1064-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
@@ -12959,17 +12959,17 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1064-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1064-NEXT: s_mov_b32 s33, s10
; GFX1064-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-NEXT: s_mov_b32 s12, s43
+; GFX1064-NEXT: s_mov_b32 s12, s51
; GFX1064-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-NEXT: s_mov_b32 s13, s42
+; GFX1064-NEXT: s_mov_b32 s13, s50
; GFX1064-NEXT: s_mov_b32 s14, s33
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1064-NEXT: s_movk_i32 s32, 0x800
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -12994,11 +12994,11 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1064-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX1064-NEXT: s_cbranch_execz .LBB17_5
; GFX1064-NEXT: ; %bb.3:
-; GFX1064-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24
+; GFX1064-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24
; GFX1064-NEXT: v_mov_b32_e32 v0, 0
-; GFX1064-NEXT: s_mov_b64 s[46:47], 0
+; GFX1064-NEXT: s_mov_b64 s[62:63], 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45]
+; GFX1064-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53]
; GFX1064-NEXT: .LBB17_4: ; %atomicrmw.start
; GFX1064-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1064-NEXT: s_waitcnt vmcnt(0)
@@ -13008,55 +13008,55 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1064-NEXT: s_getpc_b64 s[0:1]
; GFX1064-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1064-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-NEXT: buffer_store_dword v1, off, s[48:51], 0
+; GFX1064-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX1064-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX1064-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1064-NEXT: v_mov_b32_e32 v31, v40
; GFX1064-NEXT: v_mov_b32_e32 v0, 8
; GFX1064-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-NEXT: v_mov_b32_e32 v2, s44
+; GFX1064-NEXT: v_mov_b32_e32 v2, s52
; GFX1064-NEXT: v_mov_b32_e32 v5, 8
; GFX1064-NEXT: v_mov_b32_e32 v6, 0
; GFX1064-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-NEXT: s_mov_b32 s12, s43
-; GFX1064-NEXT: s_mov_b32 s13, s42
+; GFX1064-NEXT: s_mov_b32 s12, s51
+; GFX1064-NEXT: s_mov_b32 s13, s50
; GFX1064-NEXT: s_mov_b32 s14, s33
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-NEXT: v_mov_b32_e32 v3, s45
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX1064-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX1064-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX1064-NEXT: v_mov_b32_e32 v3, s53
; GFX1064-NEXT: v_mov_b32_e32 v4, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-NEXT: s_clause 0x1
-; GFX1064-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX1064-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX1064-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1064-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX1064-NEXT: s_cbranch_execnz .LBB17_4
; GFX1064-NEXT: .LBB17_5:
; GFX1064-NEXT: s_endpgm
;
; GFX1032-LABEL: global_atomic_fadd_double_uni_address_div_value_default_scope_strictfp:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s50, -1
-; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s66, -1
+; GFX1032-NEXT: s_mov_b32 s67, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s64, s64, s11
; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-NEXT: s_mov_b32 s43, s8
+; GFX1032-NEXT: s_addc_u32 s65, s65, 0
+; GFX1032-NEXT: s_mov_b32 s51, s8
; GFX1032-NEXT: s_add_u32 s8, s34, 44
-; GFX1032-NEXT: s_mov_b32 s42, s9
+; GFX1032-NEXT: s_mov_b32 s50, s9
; GFX1032-NEXT: s_addc_u32 s9, s35, 0
-; GFX1032-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1032-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1032-NEXT: s_getpc_b64 s[0:1]
; GFX1032-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
; GFX1032-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
@@ -13065,17 +13065,17 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1032-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1032-NEXT: s_mov_b32 s33, s10
; GFX1032-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-NEXT: s_mov_b32 s12, s43
+; GFX1032-NEXT: s_mov_b32 s12, s51
; GFX1032-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-NEXT: s_mov_b32 s13, s42
+; GFX1032-NEXT: s_mov_b32 s13, s50
; GFX1032-NEXT: s_mov_b32 s14, s33
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1032-NEXT: s_movk_i32 s32, 0x400
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -13094,16 +13094,16 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1032-NEXT: s_cbranch_scc1 .LBB17_1
; GFX1032-NEXT: ; %bb.2: ; %ComputeEnd
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1032-NEXT: s_mov_b32 s46, 0
+; GFX1032-NEXT: s_mov_b32 s62, 0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-NEXT: s_xor_b32 s0, exec_lo, s0
; GFX1032-NEXT: s_cbranch_execz .LBB17_5
; GFX1032-NEXT: ; %bb.3:
-; GFX1032-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24
+; GFX1032-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24
; GFX1032-NEXT: v_mov_b32_e32 v0, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45]
+; GFX1032-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53]
; GFX1032-NEXT: .LBB17_4: ; %atomicrmw.start
; GFX1032-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1032-NEXT: s_waitcnt vmcnt(0)
@@ -13113,37 +13113,37 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1032-NEXT: s_getpc_b64 s[0:1]
; GFX1032-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1032-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-NEXT: buffer_store_dword v1, off, s[48:51], 0
+; GFX1032-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX1032-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX1032-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1032-NEXT: v_mov_b32_e32 v31, v40
; GFX1032-NEXT: v_mov_b32_e32 v0, 8
; GFX1032-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-NEXT: v_mov_b32_e32 v2, s44
+; GFX1032-NEXT: v_mov_b32_e32 v2, s52
; GFX1032-NEXT: v_mov_b32_e32 v5, 8
; GFX1032-NEXT: v_mov_b32_e32 v6, 0
; GFX1032-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-NEXT: s_mov_b32 s12, s43
-; GFX1032-NEXT: s_mov_b32 s13, s42
+; GFX1032-NEXT: s_mov_b32 s12, s51
+; GFX1032-NEXT: s_mov_b32 s13, s50
; GFX1032-NEXT: s_mov_b32 s14, s33
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1032-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1032-NEXT: v_mov_b32_e32 v3, s45
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX1032-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX1032-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX1032-NEXT: v_mov_b32_e32 v3, s53
; GFX1032-NEXT: v_mov_b32_e32 v4, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-NEXT: s_clause 0x1
-; GFX1032-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1032-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX1032-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX1032-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s46
+; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
; GFX1032-NEXT: s_cbranch_execnz .LBB17_4
; GFX1032-NEXT: .LBB17_5:
; GFX1032-NEXT: s_endpgm
@@ -13151,11 +13151,11 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1164-LABEL: global_atomic_fadd_double_uni_address_div_value_default_scope_strictfp:
; GFX1164: ; %bb.0:
; GFX1164-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1164-NEXT: s_mov_b32 s43, s8
+; GFX1164-NEXT: s_mov_b32 s51, s8
; GFX1164-NEXT: s_add_u32 s8, s34, 44
-; GFX1164-NEXT: s_mov_b32 s42, s9
+; GFX1164-NEXT: s_mov_b32 s50, s9
; GFX1164-NEXT: s_addc_u32 s9, s35, 0
-; GFX1164-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1164-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1164-NEXT: s_getpc_b64 s[0:1]
; GFX1164-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
; GFX1164-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
@@ -13163,15 +13163,15 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1164-NEXT: s_mov_b32 s33, s10
; GFX1164-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1164-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1164-NEXT: s_mov_b32 s12, s43
-; GFX1164-NEXT: s_mov_b32 s13, s42
+; GFX1164-NEXT: s_mov_b32 s12, s51
+; GFX1164-NEXT: s_mov_b32 s13, s50
; GFX1164-NEXT: s_mov_b32 s14, s33
; GFX1164-NEXT: s_mov_b32 s32, 32
; GFX1164-NEXT: v_mov_b32_e32 v40, v0
-; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1164-NEXT: v_mov_b32_e32 v41, 0
@@ -13199,11 +13199,11 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1164-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX1164-NEXT: s_cbranch_execz .LBB17_5
; GFX1164-NEXT: ; %bb.3:
-; GFX1164-NEXT: s_load_b64 s[44:45], s[34:35], 0x24
+; GFX1164-NEXT: s_load_b64 s[52:53], s[34:35], 0x24
; GFX1164-NEXT: v_mov_b32_e32 v0, 0
-; GFX1164-NEXT: s_mov_b64 s[46:47], 0
+; GFX1164-NEXT: s_mov_b64 s[62:63], 0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: global_load_b64 v[1:2], v0, s[44:45]
+; GFX1164-NEXT: global_load_b64 v[1:2], v0, s[52:53]
; GFX1164-NEXT: s_set_inst_prefetch_distance 0x1
; GFX1164-NEXT: .p2align 6
; GFX1164-NEXT: .LBB17_4: ; %atomicrmw.start
@@ -13220,18 +13220,18 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1164-NEXT: v_mov_b32_e32 v31, v40
; GFX1164-NEXT: v_mov_b32_e32 v0, 8
; GFX1164-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-NEXT: v_mov_b32_e32 v2, s44
+; GFX1164-NEXT: v_mov_b32_e32 v2, s52
; GFX1164-NEXT: v_mov_b32_e32 v5, 8
; GFX1164-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1164-NEXT: s_mov_b32 s12, s43
-; GFX1164-NEXT: s_mov_b32 s13, s42
+; GFX1164-NEXT: s_mov_b32 s12, s51
+; GFX1164-NEXT: s_mov_b32 s13, s50
; GFX1164-NEXT: s_mov_b32 s14, s33
; GFX1164-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1164-NEXT: v_mov_b32_e32 v3, s45
+; GFX1164-NEXT: v_mov_b32_e32 v3, s53
; GFX1164-NEXT: v_mov_b32_e32 v4, 0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1]
@@ -13239,8 +13239,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[46:47]
+; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63]
; GFX1164-NEXT: s_cbranch_execnz .LBB17_4
; GFX1164-NEXT: .LBB17_5:
; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2
@@ -13249,7 +13249,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1132-LABEL: global_atomic_fadd_double_uni_address_div_value_default_scope_strictfp:
; GFX1132: ; %bb.0:
; GFX1132-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1132-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1132-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1132-NEXT: s_add_u32 s8, s34, 44
; GFX1132-NEXT: s_addc_u32 s9, s35, 0
; GFX1132-NEXT: s_getpc_b64 s[0:1]
@@ -13258,9 +13258,9 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1132-NEXT: v_mov_b32_e32 v31, v0
; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1132-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1132-NEXT: s_mov_b32 s42, s14
-; GFX1132-NEXT: s_mov_b32 s43, s13
-; GFX1132-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1132-NEXT: s_mov_b32 s50, s14
+; GFX1132-NEXT: s_mov_b32 s51, s13
+; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1132-NEXT: s_mov_b32 s12, s13
@@ -13269,7 +13269,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1132-NEXT: s_mov_b32 s32, 32
; GFX1132-NEXT: s_mov_b32 s33, s15
; GFX1132-NEXT: v_mov_b32_e32 v40, v0
-; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-NEXT: v_mov_b32_e32 v41, 0
@@ -13289,17 +13289,17 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1132-NEXT: s_cbranch_scc1 .LBB17_1
; GFX1132-NEXT: ; %bb.2: ; %ComputeEnd
; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1132-NEXT: s_mov_b32 s46, 0
+; GFX1132-NEXT: s_mov_b32 s62, 0
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-NEXT: s_xor_b32 s0, exec_lo, s0
; GFX1132-NEXT: s_cbranch_execz .LBB17_5
; GFX1132-NEXT: ; %bb.3:
-; GFX1132-NEXT: s_load_b64 s[44:45], s[34:35], 0x24
+; GFX1132-NEXT: s_load_b64 s[52:53], s[34:35], 0x24
; GFX1132-NEXT: v_mov_b32_e32 v0, 0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: global_load_b64 v[1:2], v0, s[44:45]
+; GFX1132-NEXT: global_load_b64 v[1:2], v0, s[52:53]
; GFX1132-NEXT: s_set_inst_prefetch_distance 0x1
; GFX1132-NEXT: .p2align 6
; GFX1132-NEXT: .LBB17_4: ; %atomicrmw.start
@@ -13314,25 +13314,25 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1132-NEXT: scratch_store_b64 off, v[1:2], off
; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1132-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8
-; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44
+; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s52
; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
; GFX1132-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1132-NEXT: s_mov_b32 s12, s43
-; GFX1132-NEXT: s_mov_b32 s13, s42
+; GFX1132-NEXT: s_mov_b32 s12, s51
+; GFX1132-NEXT: s_mov_b32 s13, s50
; GFX1132-NEXT: s_mov_b32 s14, s33
; GFX1132-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1132-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0
+; GFX1132-NEXT: v_dual_mov_b32 v3, s53 :: v_dual_mov_b32 v4, 0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-NEXT: scratch_load_b64 v[1:2], off, off
; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46
+; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
; GFX1132-NEXT: s_cbranch_execnz .LBB17_4
; GFX1132-NEXT: .LBB17_5:
; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2
@@ -13341,22 +13341,22 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX7LESS-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_default_scope_strictfp:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s82, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s83, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s80, s80, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s81, s81, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s10
-; GFX7LESS-DPP-NEXT: s_mov_b32 s42, s9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s43, s8
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, s9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s47, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s46, -1
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[64:65], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
@@ -13367,30 +13367,30 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v42, v0, v2
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s43
-; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s42
+; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51
+; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v42
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v40, v0
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v41, v1
-; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[0:1], off, s[44:47], 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[0:1], off, s[64:67], 0
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[52:53], 0
; GFX7LESS-DPP-NEXT: .LBB17_1: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_add_f64 v[2:3], v[0:1], v[40:41]
-; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0
+; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:4
+; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[80:83], 0
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:8
+; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:12
+; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:8
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
@@ -13403,44 +13403,44 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s43
-; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s42
+; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51
+; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v42
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s44
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s64
+; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s65
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-DPP-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[48:51], 0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4
+; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[80:83], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[80:83], 0 offset:4
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX7LESS-DPP-NEXT: s_or_b64 s[52:53], vcc, s[52:53]
+; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[52:53]
; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB17_1
; GFX7LESS-DPP-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX7LESS-DPP-NEXT: s_endpgm
;
; GFX9-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_default_scope_strictfp:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s54, -1
-; GFX9-DPP-NEXT: s_mov_b32 s55, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s52, s52, s11
-; GFX9-DPP-NEXT: s_addc_u32 s53, s53, 0
+; GFX9-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s82, -1
+; GFX9-DPP-NEXT: s_mov_b32 s83, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s80, s80, s11
+; GFX9-DPP-NEXT: s_addc_u32 s81, s81, 0
; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX9-DPP-NEXT: s_mov_b32 s43, s8
+; GFX9-DPP-NEXT: s_mov_b32 s51, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX9-DPP-NEXT: s_mov_b32 s42, s9
+; GFX9-DPP-NEXT: s_mov_b32 s50, s9
; GFX9-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX9-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX9-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX9-DPP-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
; GFX9-DPP-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
@@ -13449,17 +13449,17 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b32 s33, s10
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
-; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[52:53]
-; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
+; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-DPP-NEXT: s_mov_b32 s12, s43
-; GFX9-DPP-NEXT: s_mov_b32 s13, s42
+; GFX9-DPP-NEXT: s_mov_b32 s12, s51
+; GFX9-DPP-NEXT: s_mov_b32 s13, s50
; GFX9-DPP-NEXT: s_mov_b32 s14, s33
; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[54:55]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -13506,74 +13506,74 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v1, exec_hi, v1
; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX9-DPP-NEXT: v_readlane_b32 s45, v9, 63
-; GFX9-DPP-NEXT: v_readlane_b32 s44, v8, 63
+; GFX9-DPP-NEXT: v_readlane_b32 s53, v9, 63
+; GFX9-DPP-NEXT: v_readlane_b32 s52, v8, 63
; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1]
; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9-DPP-NEXT: s_cbranch_execz .LBB17_3
; GFX9-DPP-NEXT: ; %bb.1:
-; GFX9-DPP-NEXT: s_load_dwordx2 s[46:47], s[36:37], 0x24
-; GFX9-DPP-NEXT: s_mov_b64 s[48:49], 0
+; GFX9-DPP-NEXT: s_load_dwordx2 s[62:63], s[36:37], 0x24
+; GFX9-DPP-NEXT: s_mov_b64 s[64:65], 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[46:47]
+; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[62:63]
; GFX9-DPP-NEXT: .LBB17_2: ; %atomicrmw.start
; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX9-DPP-NEXT: v_add_f64 v[3:4], v[1:2], s[44:45]
+; GFX9-DPP-NEXT: v_add_f64 v[3:4], v[1:2], s[52:53]
; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44
; GFX9-DPP-NEXT: s_addc_u32 s9, s37, 0
; GFX9-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[52:53]
-; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[52:55], 0 offset:4
-; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[52:55], 0
-; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[52:55], 0 offset:12
-; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[52:55], 0 offset:8
-; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
+; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:4
+; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0
+; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[80:83], 0 offset:12
+; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:8
+; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-DPP-NEXT: s_mov_b32 s12, s43
-; GFX9-DPP-NEXT: s_mov_b32 s13, s42
+; GFX9-DPP-NEXT: s_mov_b32 s12, s51
+; GFX9-DPP-NEXT: s_mov_b32 s13, s50
; GFX9-DPP-NEXT: s_mov_b32 s14, s33
; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[54:55]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s46
-; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s47
+; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s62
+; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s63
; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[52:55], 0
-; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[52:55], 0 offset:4
+; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[80:83], 0
+; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[80:83], 0 offset:4
; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-DPP-NEXT: s_or_b64 s[48:49], vcc, s[48:49]
-; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[48:49]
+; GFX9-DPP-NEXT: s_or_b64 s[64:65], vcc, s[64:65]
+; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[64:65]
; GFX9-DPP-NEXT: s_cbranch_execnz .LBB17_2
; GFX9-DPP-NEXT: .LBB17_3:
; GFX9-DPP-NEXT: s_endpgm
;
; GFX1064-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_default_scope_strictfp:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s66, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s67, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s64, s64, s11
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-DPP-NEXT: s_mov_b32 s43, s8
+; GFX1064-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX1064-DPP-NEXT: s_mov_b32 s51, s8
; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1064-DPP-NEXT: s_mov_b32 s42, s9
+; GFX1064-DPP-NEXT: s_mov_b32 s50, s9
; GFX1064-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1064-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX1064-DPP-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
@@ -13582,17 +13582,17 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1064-DPP-NEXT: s_mov_b32 s33, s10
; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-DPP-NEXT: s_mov_b32 s12, s43
+; GFX1064-DPP-NEXT: s_mov_b32 s12, s51
; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1064-DPP-NEXT: s_mov_b32 s13, s50
; GFX1064-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1064-DPP-NEXT: s_movk_i32 s32, 0x800
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -13637,10 +13637,10 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064-DPP-NEXT: s_cbranch_execz .LBB17_3
; GFX1064-DPP-NEXT: ; %bb.1:
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24
-; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], 0
+; GFX1064-DPP-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24
+; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45]
+; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53]
; GFX1064-DPP-NEXT: .LBB17_2: ; %atomicrmw.start
; GFX1064-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1064-DPP-NEXT: s_waitcnt vmcnt(0)
@@ -13650,55 +13650,55 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX1064-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
+; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX1064-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40
; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s44
+; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s52
; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX1064-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1064-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1064-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1064-DPP-NEXT: s_mov_b32 s13, s50
; GFX1064-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX1064-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-DPP-NEXT: s_clause 0x1
-; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB17_2
; GFX1064-DPP-NEXT: .LBB17_3:
; GFX1064-DPP-NEXT: s_endpgm
;
; GFX1032-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_default_scope_strictfp:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s66, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s67, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s64, s64, s11
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-DPP-NEXT: s_mov_b32 s43, s8
+; GFX1032-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX1032-DPP-NEXT: s_mov_b32 s51, s8
; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1032-DPP-NEXT: s_mov_b32 s42, s9
+; GFX1032-DPP-NEXT: s_mov_b32 s50, s9
; GFX1032-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1032-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX1032-DPP-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
@@ -13707,17 +13707,17 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1032-DPP-NEXT: s_mov_b32 s33, s10
; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-DPP-NEXT: s_mov_b32 s12, s43
+; GFX1032-DPP-NEXT: s_mov_b32 s12, s51
; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1032-DPP-NEXT: s_mov_b32 s13, s50
; GFX1032-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -13752,14 +13752,14 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1032-DPP-NEXT: v_mov_b32_e32 v41, v8
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v42, v9
-; GFX1032-DPP-NEXT: s_mov_b32 s46, 0
+; GFX1032-DPP-NEXT: s_mov_b32 s62, 0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB17_3
; GFX1032-DPP-NEXT: ; %bb.1:
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24
+; GFX1032-DPP-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45]
+; GFX1032-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53]
; GFX1032-DPP-NEXT: .LBB17_2: ; %atomicrmw.start
; GFX1032-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1032-DPP-NEXT: s_waitcnt vmcnt(0)
@@ -13769,37 +13769,37 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX1032-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
+; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX1032-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s44
+; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s52
; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX1032-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1032-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1032-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1032-DPP-NEXT: s_mov_b32 s13, s50
; GFX1032-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-DPP-NEXT: s_clause 0x1
-; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-DPP-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s46
+; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB17_2
; GFX1032-DPP-NEXT: .LBB17_3:
; GFX1032-DPP-NEXT: s_endpgm
@@ -13807,11 +13807,11 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1164-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_default_scope_strictfp:
; GFX1164-DPP: ; %bb.0:
; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1164-DPP-NEXT: s_mov_b32 s43, s8
+; GFX1164-DPP-NEXT: s_mov_b32 s51, s8
; GFX1164-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1164-DPP-NEXT: s_mov_b32 s42, s9
+; GFX1164-DPP-NEXT: s_mov_b32 s50, s9
; GFX1164-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1164-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1164-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX1164-DPP-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
; GFX1164-DPP-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
@@ -13819,15 +13819,15 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1164-DPP-NEXT: s_mov_b32 s33, s10
; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1164-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1164-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1164-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1164-DPP-NEXT: s_mov_b32 s13, s50
; GFX1164-DPP-NEXT: s_mov_b32 s14, s33
; GFX1164-DPP-NEXT: s_mov_b32 s32, 32
; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0
-; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -13881,10 +13881,10 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1
; GFX1164-DPP-NEXT: s_cbranch_execz .LBB17_3
; GFX1164-DPP-NEXT: ; %bb.1:
-; GFX1164-DPP-NEXT: s_load_b64 s[44:45], s[34:35], 0x24
-; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], 0
+; GFX1164-DPP-NEXT: s_load_b64 s[52:53], s[34:35], 0x24
+; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[44:45]
+; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[52:53]
; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x1
; GFX1164-DPP-NEXT: .p2align 6
; GFX1164-DPP-NEXT: .LBB17_2: ; %atomicrmw.start
@@ -13901,18 +13901,18 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1164-DPP-NEXT: v_mov_b32_e32 v31, v40
; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s44
+; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s52
; GFX1164-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1164-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1164-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1164-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1164-DPP-NEXT: s_mov_b32 s13, s50
; GFX1164-DPP-NEXT: s_mov_b32 s14, s33
; GFX1164-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
@@ -13920,8 +13920,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[46:47]
+; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63]
; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB17_2
; GFX1164-DPP-NEXT: .LBB17_3:
; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2
@@ -13930,7 +13930,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1132-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_default_scope_strictfp:
; GFX1132-DPP: ; %bb.0:
; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1132-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1132-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1132-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1132-DPP-NEXT: s_addc_u32 s9, s35, 0
; GFX1132-DPP-NEXT: s_getpc_b64 s[0:1]
@@ -13939,9 +13939,9 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1132-DPP-NEXT: v_mov_b32_e32 v31, v0
; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1132-DPP-NEXT: s_mov_b32 s42, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s43, s13
-; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1132-DPP-NEXT: s_mov_b32 s50, s14
+; GFX1132-DPP-NEXT: s_mov_b32 s51, s13
+; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
@@ -13950,7 +13950,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1132-DPP-NEXT: s_mov_b32 s32, 32
; GFX1132-DPP-NEXT: s_mov_b32 s33, s15
; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0
-; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
@@ -13992,14 +13992,14 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX1132-DPP-NEXT: v_mov_b32_e32 v42, v9
-; GFX1132-DPP-NEXT: s_mov_b32 s46, 0
+; GFX1132-DPP-NEXT: s_mov_b32 s62, 0
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1
; GFX1132-DPP-NEXT: s_cbranch_execz .LBB17_3
; GFX1132-DPP-NEXT: ; %bb.1:
-; GFX1132-DPP-NEXT: s_load_b64 s[44:45], s[34:35], 0x24
+; GFX1132-DPP-NEXT: s_load_b64 s[52:53], s[34:35], 0x24
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[44:45]
+; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[52:53]
; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x1
; GFX1132-DPP-NEXT: .p2align 6
; GFX1132-DPP-NEXT: .LBB17_2: ; %atomicrmw.start
@@ -14014,25 +14014,25 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1132-DPP-NEXT: scratch_store_b64 off, v[1:2], off
; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1132-DPP-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44
+; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s52
; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1132-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1132-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1132-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1132-DPP-NEXT: s_mov_b32 s13, s50
; GFX1132-DPP-NEXT: s_mov_b32 s14, s33
; GFX1132-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0
+; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s53 :: v_dual_mov_b32 v4, 0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-DPP-NEXT: scratch_load_b64 v[1:2], off, off
; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-DPP-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46
+; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB17_2
; GFX1132-DPP-NEXT: .LBB17_3:
; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll
index a4410bb9ed2d0..cc9c310e5c059 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll
@@ -273,12 +273,12 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
; GFX7LESS-LABEL: global_atomic_fmax_uni_address_div_value_agent_scope_unsafe:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s38, -1
-; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s36, s36, s11
-; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0
+; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
+; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
; GFX7LESS-NEXT: s_mov_b32 s14, s10
; GFX7LESS-NEXT: s_mov_b32 s13, s9
; GFX7LESS-NEXT: s_mov_b32 s12, s8
@@ -296,8 +296,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -349,12 +349,12 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
;
; GFX9-LABEL: global_atomic_fmax_uni_address_div_value_agent_scope_unsafe:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s11
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s11
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-NEXT: s_mov_b32 s12, s8
; GFX9-NEXT: s_add_u32 s8, s34, 44
@@ -370,9 +370,9 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -420,13 +420,13 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
;
; GFX1064-LABEL: global_atomic_fmax_uni_address_div_value_agent_scope_unsafe:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s38, -1
-; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s36, s36, s11
+; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s50, -1
+; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s48, s48, s11
; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-NEXT: s_addc_u32 s49, s49, 0
; GFX1064-NEXT: s_mov_b32 s12, s8
; GFX1064-NEXT: s_add_u32 s8, s34, 44
; GFX1064-NEXT: s_mov_b32 s13, s9
@@ -442,8 +442,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1064-NEXT: s_mov_b32 s32, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -477,13 +477,13 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
;
; GFX1032-LABEL: global_atomic_fmax_uni_address_div_value_agent_scope_unsafe:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s38, -1
-; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s36, s36, s11
+; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s50, -1
+; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s48, s48, s11
; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-NEXT: s_mov_b32 s12, s8
; GFX1032-NEXT: s_add_u32 s8, s34, 44
; GFX1032-NEXT: s_mov_b32 s13, s9
@@ -499,8 +499,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1032-NEXT: s_mov_b32 s32, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -635,19 +635,19 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
; GFX7LESS-DPP-LABEL: global_atomic_fmax_uni_address_div_value_agent_scope_unsafe:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s42, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s43, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s40, s40, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s41, s41, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5]
@@ -660,11 +660,11 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[40:41]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[42:43]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[36:39], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0
; GFX7LESS-DPP-NEXT: v_mul_f32_e32 v2, 1.0, v0
; GFX7LESS-DPP-NEXT: .LBB1_1: ; %atomicrmw.start
@@ -675,7 +675,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v1
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v0
-; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc
+; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v1
; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
@@ -687,12 +687,12 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
;
; GFX9-DPP-LABEL: global_atomic_fmax_uni_address_div_value_agent_scope_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s38, -1
-; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11
-; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s50, -1
+; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-DPP-NEXT: s_mov_b32 s12, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44
@@ -708,9 +708,9 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -778,13 +778,13 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
;
; GFX1064-DPP-LABEL: global_atomic_fmax_uni_address_div_value_agent_scope_unsafe:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s38, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX1064-DPP-NEXT: s_mov_b32 s12, s8
; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1064-DPP-NEXT: s_mov_b32 s13, s9
@@ -800,8 +800,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -851,13 +851,13 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
;
; GFX1032-DPP-LABEL: global_atomic_fmax_uni_address_div_value_agent_scope_unsafe:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s38, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-DPP-NEXT: s_mov_b32 s12, s8
; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1032-DPP-NEXT: s_mov_b32 s13, s9
@@ -873,8 +873,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -1312,12 +1312,12 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
; GFX7LESS-LABEL: global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s38, -1
-; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s36, s36, s11
-; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0
+; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
+; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
; GFX7LESS-NEXT: s_mov_b32 s14, s10
; GFX7LESS-NEXT: s_mov_b32 s13, s9
; GFX7LESS-NEXT: s_mov_b32 s12, s8
@@ -1335,8 +1335,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -1388,12 +1388,12 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
;
; GFX9-LABEL: global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s11
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s11
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-NEXT: s_mov_b32 s12, s8
; GFX9-NEXT: s_add_u32 s8, s34, 44
@@ -1409,9 +1409,9 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -1459,13 +1459,13 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
;
; GFX1064-LABEL: global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s38, -1
-; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s36, s36, s11
+; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s50, -1
+; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s48, s48, s11
; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-NEXT: s_addc_u32 s49, s49, 0
; GFX1064-NEXT: s_mov_b32 s12, s8
; GFX1064-NEXT: s_add_u32 s8, s34, 44
; GFX1064-NEXT: s_mov_b32 s13, s9
@@ -1481,8 +1481,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1064-NEXT: s_mov_b32 s32, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -1516,13 +1516,13 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
;
; GFX1032-LABEL: global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s38, -1
-; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s36, s36, s11
+; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s50, -1
+; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s48, s48, s11
; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-NEXT: s_mov_b32 s12, s8
; GFX1032-NEXT: s_add_u32 s8, s34, 44
; GFX1032-NEXT: s_mov_b32 s13, s9
@@ -1538,8 +1538,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1032-NEXT: s_mov_b32 s32, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -1674,19 +1674,19 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
; GFX7LESS-DPP-LABEL: global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s42, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s43, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s40, s40, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s41, s41, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5]
@@ -1699,11 +1699,11 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[40:41]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[42:43]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[36:39], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0
; GFX7LESS-DPP-NEXT: v_mul_f32_e32 v2, 1.0, v0
; GFX7LESS-DPP-NEXT: .LBB3_1: ; %atomicrmw.start
@@ -1714,7 +1714,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v1
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v0
-; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc
+; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v1
; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
@@ -1726,12 +1726,12 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
;
; GFX9-DPP-LABEL: global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s38, -1
-; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11
-; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s50, -1
+; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-DPP-NEXT: s_mov_b32 s12, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44
@@ -1747,9 +1747,9 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -1817,13 +1817,13 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
;
; GFX1064-DPP-LABEL: global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s38, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX1064-DPP-NEXT: s_mov_b32 s12, s8
; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1064-DPP-NEXT: s_mov_b32 s13, s9
@@ -1839,8 +1839,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -1890,13 +1890,13 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
;
; GFX1032-DPP-LABEL: global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s38, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-DPP-NEXT: s_mov_b32 s12, s8
; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1032-DPP-NEXT: s_mov_b32 s13, s9
@@ -1912,8 +1912,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -2351,12 +2351,12 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
; GFX7LESS-LABEL: global_atomic_fmax_uni_address_div_value_default_scope_unsafe:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s38, -1
-; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s36, s36, s11
-; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0
+; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
+; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
; GFX7LESS-NEXT: s_mov_b32 s14, s10
; GFX7LESS-NEXT: s_mov_b32 s13, s9
; GFX7LESS-NEXT: s_mov_b32 s12, s8
@@ -2374,8 +2374,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -2427,12 +2427,12 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
;
; GFX9-LABEL: global_atomic_fmax_uni_address_div_value_default_scope_unsafe:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s11
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s11
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-NEXT: s_mov_b32 s12, s8
; GFX9-NEXT: s_add_u32 s8, s34, 44
@@ -2448,9 +2448,9 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -2498,13 +2498,13 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
;
; GFX1064-LABEL: global_atomic_fmax_uni_address_div_value_default_scope_unsafe:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s38, -1
-; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s36, s36, s11
+; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s50, -1
+; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s48, s48, s11
; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-NEXT: s_addc_u32 s49, s49, 0
; GFX1064-NEXT: s_mov_b32 s12, s8
; GFX1064-NEXT: s_add_u32 s8, s34, 44
; GFX1064-NEXT: s_mov_b32 s13, s9
@@ -2520,8 +2520,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1064-NEXT: s_mov_b32 s32, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -2555,13 +2555,13 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
;
; GFX1032-LABEL: global_atomic_fmax_uni_address_div_value_default_scope_unsafe:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s38, -1
-; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s36, s36, s11
+; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s50, -1
+; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s48, s48, s11
; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-NEXT: s_mov_b32 s12, s8
; GFX1032-NEXT: s_add_u32 s8, s34, 44
; GFX1032-NEXT: s_mov_b32 s13, s9
@@ -2577,8 +2577,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1032-NEXT: s_mov_b32 s32, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -2713,19 +2713,19 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
; GFX7LESS-DPP-LABEL: global_atomic_fmax_uni_address_div_value_default_scope_unsafe:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s42, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s43, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s40, s40, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s41, s41, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5]
@@ -2738,11 +2738,11 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[40:41]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[42:43]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[36:39], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0
; GFX7LESS-DPP-NEXT: v_mul_f32_e32 v2, 1.0, v0
; GFX7LESS-DPP-NEXT: .LBB5_1: ; %atomicrmw.start
@@ -2753,7 +2753,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v1
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v0
-; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc
+; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v1
; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
@@ -2765,12 +2765,12 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
;
; GFX9-DPP-LABEL: global_atomic_fmax_uni_address_div_value_default_scope_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s38, -1
-; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11
-; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s50, -1
+; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-DPP-NEXT: s_mov_b32 s12, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44
@@ -2786,9 +2786,9 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -2856,13 +2856,13 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
;
; GFX1064-DPP-LABEL: global_atomic_fmax_uni_address_div_value_default_scope_unsafe:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s38, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX1064-DPP-NEXT: s_mov_b32 s12, s8
; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1064-DPP-NEXT: s_mov_b32 s13, s9
@@ -2878,8 +2878,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -2929,13 +2929,13 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
;
; GFX1032-DPP-LABEL: global_atomic_fmax_uni_address_div_value_default_scope_unsafe:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s38, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-DPP-NEXT: s_mov_b32 s12, s8
; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1032-DPP-NEXT: s_mov_b32 s13, s9
@@ -2951,8 +2951,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -3135,13 +3135,13 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX7LESS-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
-; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
-; GFX7LESS-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX7LESS-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-NEXT: s_mov_b32 s67, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s64, s64, s11
+; GFX7LESS-NEXT: s_addc_u32 s65, s65, 0
+; GFX7LESS-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v3, exec_lo, 0
; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v3, exec_hi, v3
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
@@ -3149,15 +3149,15 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX7LESS-NEXT: s_cbranch_execz .LBB6_3
; GFX7LESS-NEXT: ; %bb.1:
; GFX7LESS-NEXT: s_mov_b32 s33, s10
-; GFX7LESS-NEXT: s_mov_b32 s42, s9
-; GFX7LESS-NEXT: s_mov_b32 s43, s8
+; GFX7LESS-NEXT: s_mov_b32 s50, s9
+; GFX7LESS-NEXT: s_mov_b32 s51, s8
; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX7LESS-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x9
+; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
-; GFX7LESS-NEXT: s_mov_b64 s[46:47], 0
+; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
+; GFX7LESS-NEXT: s_mov_b64 s[62:63], 0
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v3, v0, v1
@@ -3169,8 +3169,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7LESS-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-NEXT: v_max_f64 v[2:3], v[0:1], v[0:1]
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0
+; GFX7LESS-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:4
+; GFX7LESS-NEXT: buffer_store_dword v0, off, s[64:67], 0
; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
; GFX7LESS-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-NEXT: v_max_f64 v[0:1], v[2:3], 4.0
@@ -3178,8 +3178,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX7LESS-NEXT: s_getpc_b64 s[0:1]
; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX7LESS-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
+; GFX7LESS-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12
+; GFX7LESS-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8
; GFX7LESS-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX7LESS-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-NEXT: v_mov_b32_e32 v0, 8
@@ -3188,59 +3188,59 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX7LESS-NEXT: v_mov_b32_e32 v5, 8
; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-NEXT: s_mov_b32 s12, s43
-; GFX7LESS-NEXT: s_mov_b32 s13, s42
+; GFX7LESS-NEXT: s_mov_b32 s12, s51
+; GFX7LESS-NEXT: s_mov_b32 s13, s50
; GFX7LESS-NEXT: s_mov_b32 s14, s33
; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX7LESS-NEXT: v_mov_b32_e32 v2, s44
-; GFX7LESS-NEXT: v_mov_b32_e32 v3, s45
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX7LESS-NEXT: v_mov_b32_e32 v2, s52
+; GFX7LESS-NEXT: v_mov_b32_e32 v3, s53
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-NEXT: buffer_load_dword v0, off, s[48:51], 0
-; GFX7LESS-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4
+; GFX7LESS-NEXT: buffer_load_dword v0, off, s[64:67], 0
+; GFX7LESS-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX7LESS-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX7LESS-NEXT: s_cbranch_execnz .LBB6_2
; GFX7LESS-NEXT: .LBB6_3:
; GFX7LESS-NEXT: s_endpgm
;
; GFX9-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s66, -1
+; GFX9-NEXT: s_mov_b32 s67, 0xe00000
; GFX9-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX9-NEXT: s_add_u32 s48, s48, s11
+; GFX9-NEXT: s_add_u32 s64, s64, s11
; GFX9-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
-; GFX9-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX9-NEXT: s_addc_u32 s65, s65, 0
+; GFX9-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
; GFX9-NEXT: s_movk_i32 s32, 0x800
; GFX9-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9-NEXT: s_cbranch_execz .LBB6_3
; GFX9-NEXT: ; %bb.1:
-; GFX9-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24
+; GFX9-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24
; GFX9-NEXT: v_lshlrev_b32_e32 v3, 20, v2
; GFX9-NEXT: v_lshlrev_b32_e32 v4, 10, v1
; GFX9-NEXT: s_mov_b32 s33, s10
-; GFX9-NEXT: s_mov_b32 s42, s9
+; GFX9-NEXT: s_mov_b32 s50, s9
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
-; GFX9-NEXT: s_mov_b32 s43, s8
+; GFX9-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
+; GFX9-NEXT: s_mov_b32 s51, s8
; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX9-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v2, s1
-; GFX9-NEXT: s_mov_b64 s[46:47], 0
+; GFX9-NEXT: s_mov_b64 s[62:63], 0
; GFX9-NEXT: v_mov_b32_e32 v1, s0
; GFX9-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX9-NEXT: .LBB6_2: ; %atomicrmw.start
@@ -3253,36 +3253,36 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX9-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
; GFX9-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX9-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX9-NEXT: buffer_store_dword v1, off, s[64:67], 0
+; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-NEXT: v_max_f64 v[3:4], v[3:4], 4.0
-; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-NEXT: s_mov_b32 s12, s43
-; GFX9-NEXT: s_mov_b32 s13, s42
+; GFX9-NEXT: s_mov_b32 s12, s51
+; GFX9-NEXT: s_mov_b32 s13, s50
; GFX9-NEXT: s_mov_b32 s14, s33
; GFX9-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX9-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
+; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX9-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX9-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
; GFX9-NEXT: v_mov_b32_e32 v0, 8
; GFX9-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-NEXT: v_mov_b32_e32 v2, s44
-; GFX9-NEXT: v_mov_b32_e32 v3, s45
+; GFX9-NEXT: v_mov_b32_e32 v2, s52
+; GFX9-NEXT: v_mov_b32_e32 v3, s53
; GFX9-NEXT: v_mov_b32_e32 v4, 0
; GFX9-NEXT: v_mov_b32_e32 v5, 8
; GFX9-NEXT: v_mov_b32_e32 v6, 0
; GFX9-NEXT: v_mov_b32_e32 v7, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX9-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX9-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX9-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX9-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX9-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX9-NEXT: s_cbranch_execnz .LBB6_2
; GFX9-NEXT: .LBB6_3:
; GFX9-NEXT: s_endpgm
@@ -3290,32 +3290,32 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX1064-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe:
; GFX1064: ; %bb.0:
; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s50, -1
-; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
+; GFX1064-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s66, -1
+; GFX1064-NEXT: s_mov_b32 s67, 0x31e16000
; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3
-; GFX1064-NEXT: s_add_u32 s48, s48, s11
-; GFX1064-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1064-NEXT: s_add_u32 s64, s64, s11
+; GFX1064-NEXT: s_addc_u32 s65, s65, 0
+; GFX1064-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1064-NEXT: s_movk_i32 s32, 0x800
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
; GFX1064-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064-NEXT: s_cbranch_execz .LBB6_3
; GFX1064-NEXT: ; %bb.1:
-; GFX1064-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24
+; GFX1064-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24
; GFX1064-NEXT: v_lshlrev_b32_e32 v3, 20, v2
; GFX1064-NEXT: v_lshlrev_b32_e32 v4, 10, v1
; GFX1064-NEXT: s_mov_b32 s33, s10
-; GFX1064-NEXT: s_mov_b32 s42, s9
-; GFX1064-NEXT: s_mov_b32 s43, s8
+; GFX1064-NEXT: s_mov_b32 s50, s9
+; GFX1064-NEXT: s_mov_b32 s51, s8
; GFX1064-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1064-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX1064-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX1064-NEXT: s_mov_b64 s[46:47], 0
+; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1064-NEXT: s_mov_b64 s[62:63], 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
+; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: v_mov_b32_e32 v2, s1
; GFX1064-NEXT: v_mov_b32_e32 v1, s0
@@ -3328,38 +3328,38 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX1064-NEXT: s_getpc_b64 s[0:1]
; GFX1064-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1064-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-NEXT: buffer_store_dword v1, off, s[48:51], 0
+; GFX1064-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX1064-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX1064-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1064-NEXT: v_mov_b32_e32 v31, v40
; GFX1064-NEXT: v_mov_b32_e32 v0, 8
; GFX1064-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-NEXT: v_mov_b32_e32 v2, s44
+; GFX1064-NEXT: v_mov_b32_e32 v2, s52
; GFX1064-NEXT: v_mov_b32_e32 v5, 8
; GFX1064-NEXT: v_mov_b32_e32 v6, 0
; GFX1064-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1064-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1064-NEXT: s_mov_b32 s12, s43
-; GFX1064-NEXT: s_mov_b32 s13, s42
+; GFX1064-NEXT: s_mov_b32 s12, s51
+; GFX1064-NEXT: s_mov_b32 s13, s50
; GFX1064-NEXT: s_mov_b32 s14, s33
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1064-NEXT: v_max_f64 v[3:4], v[3:4], 4.0
-; GFX1064-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-NEXT: v_mov_b32_e32 v3, s45
+; GFX1064-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX1064-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX1064-NEXT: v_mov_b32_e32 v3, s53
; GFX1064-NEXT: v_mov_b32_e32 v4, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-NEXT: s_clause 0x1
-; GFX1064-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX1064-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX1064-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1064-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX1064-NEXT: s_cbranch_execnz .LBB6_2
; GFX1064-NEXT: .LBB6_3:
; GFX1064-NEXT: s_endpgm
@@ -3367,31 +3367,31 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX1032-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe:
; GFX1032: ; %bb.0:
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s50, -1
-; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX1032-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s66, -1
+; GFX1032-NEXT: s_mov_b32 s67, 0x31c16000
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
-; GFX1032-NEXT: s_add_u32 s48, s48, s11
-; GFX1032-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-NEXT: s_mov_b64 s[40:41], s[0:1]
-; GFX1032-NEXT: s_mov_b32 s46, 0
+; GFX1032-NEXT: s_add_u32 s64, s64, s11
+; GFX1032-NEXT: s_addc_u32 s65, s65, 0
+; GFX1032-NEXT: s_mov_b64 s[48:49], s[0:1]
+; GFX1032-NEXT: s_mov_b32 s62, 0
; GFX1032-NEXT: s_movk_i32 s32, 0x400
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-NEXT: s_cbranch_execz .LBB6_3
; GFX1032-NEXT: ; %bb.1:
-; GFX1032-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24
+; GFX1032-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24
; GFX1032-NEXT: v_lshlrev_b32_e32 v3, 20, v2
; GFX1032-NEXT: v_lshlrev_b32_e32 v4, 10, v1
; GFX1032-NEXT: s_mov_b32 s33, s10
-; GFX1032-NEXT: s_mov_b32 s42, s9
-; GFX1032-NEXT: s_mov_b32 s43, s8
+; GFX1032-NEXT: s_mov_b32 s50, s9
+; GFX1032-NEXT: s_mov_b32 s51, s8
; GFX1032-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1032-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX1032-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
+; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: v_mov_b32_e32 v2, s1
; GFX1032-NEXT: v_mov_b32_e32 v1, s0
@@ -3404,38 +3404,38 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX1032-NEXT: s_getpc_b64 s[0:1]
; GFX1032-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1032-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-NEXT: buffer_store_dword v1, off, s[48:51], 0
+; GFX1032-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX1032-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX1032-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1032-NEXT: v_mov_b32_e32 v31, v40
; GFX1032-NEXT: v_mov_b32_e32 v0, 8
; GFX1032-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-NEXT: v_mov_b32_e32 v2, s44
+; GFX1032-NEXT: v_mov_b32_e32 v2, s52
; GFX1032-NEXT: v_mov_b32_e32 v5, 8
; GFX1032-NEXT: v_mov_b32_e32 v6, 0
; GFX1032-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1032-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1032-NEXT: s_mov_b32 s12, s43
-; GFX1032-NEXT: s_mov_b32 s13, s42
+; GFX1032-NEXT: s_mov_b32 s12, s51
+; GFX1032-NEXT: s_mov_b32 s13, s50
; GFX1032-NEXT: s_mov_b32 s14, s33
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1032-NEXT: v_max_f64 v[3:4], v[3:4], 4.0
-; GFX1032-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1032-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1032-NEXT: v_mov_b32_e32 v3, s45
+; GFX1032-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX1032-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX1032-NEXT: v_mov_b32_e32 v3, s53
; GFX1032-NEXT: v_mov_b32_e32 v4, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-NEXT: s_clause 0x1
-; GFX1032-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1032-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX1032-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX1032-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s46
+; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
; GFX1032-NEXT: s_cbranch_execnz .LBB6_2
; GFX1032-NEXT: .LBB6_3:
; GFX1032-NEXT: s_endpgm
@@ -3444,7 +3444,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX1164: ; %bb.0:
; GFX1164-NEXT: v_mov_b32_e32 v40, v0
; GFX1164-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1164-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1164-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1164-NEXT: s_mov_b32 s32, 32
; GFX1164-NEXT: s_mov_b64 s[0:1], exec
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
@@ -3452,16 +3452,16 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX1164-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1164-NEXT: s_cbranch_execz .LBB6_3
; GFX1164-NEXT: ; %bb.1:
-; GFX1164-NEXT: s_load_b64 s[44:45], s[4:5], 0x24
+; GFX1164-NEXT: s_load_b64 s[52:53], s[4:5], 0x24
; GFX1164-NEXT: s_mov_b32 s33, s10
-; GFX1164-NEXT: s_mov_b32 s42, s9
-; GFX1164-NEXT: s_mov_b32 s43, s8
+; GFX1164-NEXT: s_mov_b32 s50, s9
+; GFX1164-NEXT: s_mov_b32 s51, s8
; GFX1164-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1164-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX1164-NEXT: s_mov_b64 s[46:47], 0
+; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1164-NEXT: s_mov_b64 s[62:63], 0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: s_load_b64 s[0:1], s[44:45], 0x0
+; GFX1164-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: v_mov_b32_e32 v2, s1
; GFX1164-NEXT: v_mov_b32_e32 v1, s0
@@ -3483,18 +3483,18 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX1164-NEXT: v_mov_b32_e32 v5, 8
; GFX1164-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1164-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1164-NEXT: s_mov_b32 s12, s43
-; GFX1164-NEXT: s_mov_b32 s13, s42
+; GFX1164-NEXT: s_mov_b32 s12, s51
+; GFX1164-NEXT: s_mov_b32 s13, s50
; GFX1164-NEXT: s_mov_b32 s14, s33
; GFX1164-NEXT: v_max_f64 v[3:4], v[3:4], 4.0
; GFX1164-NEXT: scratch_store_b64 off, v[1:2], off
; GFX1164-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-NEXT: v_mov_b32_e32 v2, s44
+; GFX1164-NEXT: v_mov_b32_e32 v2, s52
; GFX1164-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1164-NEXT: v_mov_b32_e32 v3, s45
+; GFX1164-NEXT: v_mov_b32_e32 v3, s53
; GFX1164-NEXT: v_mov_b32_e32 v4, 0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1]
@@ -3502,8 +3502,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[46:47]
+; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63]
; GFX1164-NEXT: s_cbranch_execnz .LBB6_2
; GFX1164-NEXT: .LBB6_3:
; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2
@@ -3513,22 +3513,22 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX1132: ; %bb.0:
; GFX1132-NEXT: v_mov_b32_e32 v40, v0
; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1132-NEXT: s_mov_b64 s[40:41], s[0:1]
-; GFX1132-NEXT: s_mov_b32 s46, 0
+; GFX1132-NEXT: s_mov_b64 s[48:49], s[0:1]
+; GFX1132-NEXT: s_mov_b32 s62, 0
; GFX1132-NEXT: s_mov_b32 s32, 32
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-NEXT: s_cbranch_execz .LBB6_3
; GFX1132-NEXT: ; %bb.1:
-; GFX1132-NEXT: s_load_b64 s[44:45], s[4:5], 0x24
+; GFX1132-NEXT: s_load_b64 s[52:53], s[4:5], 0x24
; GFX1132-NEXT: s_mov_b32 s33, s15
-; GFX1132-NEXT: s_mov_b32 s42, s14
-; GFX1132-NEXT: s_mov_b32 s43, s13
+; GFX1132-NEXT: s_mov_b32 s50, s14
+; GFX1132-NEXT: s_mov_b32 s51, s13
; GFX1132-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1132-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_load_b64 s[0:1], s[44:45], 0x0
+; GFX1132-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
; GFX1132-NEXT: s_set_inst_prefetch_distance 0x1
@@ -3547,26 +3547,26 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
; GFX1132-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1132-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1132-NEXT: s_mov_b32 s12, s43
-; GFX1132-NEXT: s_mov_b32 s13, s42
+; GFX1132-NEXT: s_mov_b32 s12, s51
+; GFX1132-NEXT: s_mov_b32 s13, s50
; GFX1132-NEXT: s_mov_b32 s14, s33
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX1132-NEXT: v_max_f64 v[3:4], v[3:4], 4.0
; GFX1132-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44
+; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s52
; GFX1132-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1132-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0
+; GFX1132-NEXT: v_dual_mov_b32 v3, s53 :: v_dual_mov_b32 v4, 0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-NEXT: scratch_load_b64 v[1:2], off, off
; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46
+; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
; GFX1132-NEXT: s_cbranch_execnz .LBB6_2
; GFX1132-NEXT: .LBB6_3:
; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2
@@ -3575,13 +3575,13 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX7LESS-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX7LESS-DPP-NEXT: v_mbcnt_lo_u32_b32_e64 v3, exec_lo, 0
; GFX7LESS-DPP-NEXT: v_mbcnt_hi_u32_b32_e32 v3, exec_hi, v3
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
@@ -3589,15 +3589,15 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX7LESS-DPP-NEXT: s_cbranch_execz .LBB6_3
; GFX7LESS-DPP-NEXT: ; %bb.1:
; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s10
-; GFX7LESS-DPP-NEXT: s_mov_b32 s42, s9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s43, s8
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, s9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], 0
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[62:63], 0
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v3, v0, v1
@@ -3609,8 +3609,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_max_f64 v[2:3], v[0:1], v[0:1]
-; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0
+; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:4
+; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[64:67], 0
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-DPP-NEXT: v_max_f64 v[0:1], v[2:3], 4.0
@@ -3618,8 +3618,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX7LESS-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
+; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12
+; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8
; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v0, 8
@@ -3628,59 +3628,59 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s43
-; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s42
+; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51
+; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s44
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s52
+; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-DPP-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[48:51], 0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4
+; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[64:67], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX7LESS-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB6_2
; GFX7LESS-DPP-NEXT: .LBB6_3:
; GFX7LESS-DPP-NEXT: s_endpgm
;
; GFX9-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s50, -1
-; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s66, -1
+; GFX9-DPP-NEXT: s_mov_b32 s67, 0xe00000
; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX9-DPP-NEXT: s_add_u32 s64, s64, s11
; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3
-; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX9-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX9-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX9-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800
; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9-DPP-NEXT: s_cbranch_execz .LBB6_3
; GFX9-DPP-NEXT: ; %bb.1:
-; GFX9-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24
+; GFX9-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1
; GFX9-DPP-NEXT: s_mov_b32 s33, s10
-; GFX9-DPP-NEXT: s_mov_b32 s42, s9
+; GFX9-DPP-NEXT: s_mov_b32 s50, s9
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
-; GFX9-DPP-NEXT: s_mov_b32 s43, s8
+; GFX9-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
+; GFX9-DPP-NEXT: s_mov_b32 s51, s8
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s1
-; GFX9-DPP-NEXT: s_mov_b64 s[46:47], 0
+; GFX9-DPP-NEXT: s_mov_b64 s[62:63], 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v1, s0
; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX9-DPP-NEXT: .LBB6_2: ; %atomicrmw.start
@@ -3693,36 +3693,36 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0
+; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-DPP-NEXT: v_max_f64 v[3:4], v[3:4], 4.0
-; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-DPP-NEXT: s_mov_b32 s12, s43
-; GFX9-DPP-NEXT: s_mov_b32 s13, s42
+; GFX9-DPP-NEXT: s_mov_b32 s12, s51
+; GFX9-DPP-NEXT: s_mov_b32 s13, s50
; GFX9-DPP-NEXT: s_mov_b32 s14, s33
; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s44
-; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s52
+; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX9-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX9-DPP-NEXT: s_cbranch_execnz .LBB6_2
; GFX9-DPP-NEXT: .LBB6_3:
; GFX9-DPP-NEXT: s_endpgm
@@ -3730,32 +3730,32 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX1064-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe:
; GFX1064-DPP: ; %bb.0:
; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
+; GFX1064-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s66, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s67, 0x31e16000
; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3
-; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
-; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1064-DPP-NEXT: s_add_u32 s64, s64, s11
+; GFX1064-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX1064-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1064-DPP-NEXT: s_movk_i32 s32, 0x800
; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064-DPP-NEXT: s_cbranch_execz .LBB6_3
; GFX1064-DPP-NEXT: ; %bb.1:
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24
+; GFX1064-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24
; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2
; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1
; GFX1064-DPP-NEXT: s_mov_b32 s33, s10
-; GFX1064-DPP-NEXT: s_mov_b32 s42, s9
-; GFX1064-DPP-NEXT: s_mov_b32 s43, s8
+; GFX1064-DPP-NEXT: s_mov_b32 s50, s9
+; GFX1064-DPP-NEXT: s_mov_b32 s51, s8
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], 0
+; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
+; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s1
; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, s0
@@ -3768,38 +3768,38 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX1064-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
+; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX1064-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40
; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s44
+; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s52
; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX1064-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1064-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1064-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1064-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1064-DPP-NEXT: s_mov_b32 s13, s50
; GFX1064-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1064-DPP-NEXT: v_max_f64 v[3:4], v[3:4], 4.0
-; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX1064-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-DPP-NEXT: s_clause 0x1
-; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB6_2
; GFX1064-DPP-NEXT: .LBB6_3:
; GFX1064-DPP-NEXT: s_endpgm
@@ -3807,31 +3807,31 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX1032-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe:
; GFX1032-DPP: ; %bb.0:
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX1032-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s66, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s67, 0x31c16000
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
-; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
-; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
-; GFX1032-DPP-NEXT: s_mov_b32 s46, 0
+; GFX1032-DPP-NEXT: s_add_u32 s64, s64, s11
+; GFX1032-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX1032-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
+; GFX1032-DPP-NEXT: s_mov_b32 s62, 0
; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB6_3
; GFX1032-DPP-NEXT: ; %bb.1:
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24
+; GFX1032-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24
; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2
; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1
; GFX1032-DPP-NEXT: s_mov_b32 s33, s10
-; GFX1032-DPP-NEXT: s_mov_b32 s42, s9
-; GFX1032-DPP-NEXT: s_mov_b32 s43, s8
+; GFX1032-DPP-NEXT: s_mov_b32 s50, s9
+; GFX1032-DPP-NEXT: s_mov_b32 s51, s8
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
+; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s1
; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, s0
@@ -3844,38 +3844,38 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX1032-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
+; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX1032-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s44
+; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s52
; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX1032-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1032-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1032-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1032-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1032-DPP-NEXT: s_mov_b32 s13, s50
; GFX1032-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1032-DPP-NEXT: v_max_f64 v[3:4], v[3:4], 4.0
-; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-DPP-NEXT: s_clause 0x1
-; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-DPP-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s46
+; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB6_2
; GFX1032-DPP-NEXT: .LBB6_3:
; GFX1032-DPP-NEXT: s_endpgm
@@ -3884,7 +3884,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX1164-DPP: ; %bb.0:
; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0
; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1164-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1164-DPP-NEXT: s_mov_b32 s32, 32
; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec
; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
@@ -3892,16 +3892,16 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1164-DPP-NEXT: s_cbranch_execz .LBB6_3
; GFX1164-DPP-NEXT: ; %bb.1:
-; GFX1164-DPP-NEXT: s_load_b64 s[44:45], s[4:5], 0x24
+; GFX1164-DPP-NEXT: s_load_b64 s[52:53], s[4:5], 0x24
; GFX1164-DPP-NEXT: s_mov_b32 s33, s10
-; GFX1164-DPP-NEXT: s_mov_b32 s42, s9
-; GFX1164-DPP-NEXT: s_mov_b32 s43, s8
+; GFX1164-DPP-NEXT: s_mov_b32 s50, s9
+; GFX1164-DPP-NEXT: s_mov_b32 s51, s8
; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], 0
+; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[44:45], 0x0
+; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s1
; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, s0
@@ -3923,18 +3923,18 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX1164-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1164-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1164-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1164-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1164-DPP-NEXT: s_mov_b32 s13, s50
; GFX1164-DPP-NEXT: s_mov_b32 s14, s33
; GFX1164-DPP-NEXT: v_max_f64 v[3:4], v[3:4], 4.0
; GFX1164-DPP-NEXT: scratch_store_b64 off, v[1:2], off
; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s44
+; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s52
; GFX1164-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
@@ -3942,8 +3942,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[46:47]
+; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63]
; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB6_2
; GFX1164-DPP-NEXT: .LBB6_3:
; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2
@@ -3953,22 +3953,22 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX1132-DPP: ; %bb.0:
; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0
; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1132-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
-; GFX1132-DPP-NEXT: s_mov_b32 s46, 0
+; GFX1132-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
+; GFX1132-DPP-NEXT: s_mov_b32 s62, 0
; GFX1132-DPP-NEXT: s_mov_b32 s32, 32
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-DPP-NEXT: s_cbranch_execz .LBB6_3
; GFX1132-DPP-NEXT: ; %bb.1:
-; GFX1132-DPP-NEXT: s_load_b64 s[44:45], s[4:5], 0x24
+; GFX1132-DPP-NEXT: s_load_b64 s[52:53], s[4:5], 0x24
; GFX1132-DPP-NEXT: s_mov_b32 s33, s15
-; GFX1132-DPP-NEXT: s_mov_b32 s42, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s43, s13
+; GFX1132-DPP-NEXT: s_mov_b32 s50, s14
+; GFX1132-DPP-NEXT: s_mov_b32 s51, s13
; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[44:45], 0x0
+; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x1
@@ -3987,26 +3987,26 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1132-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1132-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1132-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1132-DPP-NEXT: s_mov_b32 s13, s50
; GFX1132-DPP-NEXT: s_mov_b32 s14, s33
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX1132-DPP-NEXT: v_max_f64 v[3:4], v[3:4], 4.0
; GFX1132-DPP-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44
+; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s52
; GFX1132-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0
+; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s53 :: v_dual_mov_b32 v4, 0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-DPP-NEXT: scratch_load_b64 v[1:2], off, off
; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-DPP-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46
+; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB6_2
; GFX1132-DPP-NEXT: .LBB6_3:
; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2
@@ -4019,19 +4019,19 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX7LESS-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
-; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
+; GFX7LESS-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s82, -1
+; GFX7LESS-NEXT: s_mov_b32 s83, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s80, s80, s11
+; GFX7LESS-NEXT: s_addc_u32 s81, s81, 0
; GFX7LESS-NEXT: s_mov_b32 s33, s10
-; GFX7LESS-NEXT: s_mov_b32 s42, s9
-; GFX7LESS-NEXT: s_mov_b32 s43, s8
+; GFX7LESS-NEXT: s_mov_b32 s50, s9
+; GFX7LESS-NEXT: s_mov_b32 s51, s8
; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0
; GFX7LESS-NEXT: s_getpc_b64 s[0:1]
@@ -4042,15 +4042,15 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-NEXT: v_or_b32_e32 v40, v0, v2
-; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-NEXT: s_mov_b32 s12, s43
-; GFX7LESS-NEXT: s_mov_b32 s13, s42
+; GFX7LESS-NEXT: s_mov_b32 s12, s51
+; GFX7LESS-NEXT: s_mov_b32 s13, s50
; GFX7LESS-NEXT: s_mov_b32 s14, s33
; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -4077,19 +4077,19 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX7LESS-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX7LESS-NEXT: s_cbranch_execz .LBB7_5
; GFX7LESS-NEXT: ; %bb.3:
-; GFX7LESS-NEXT: s_load_dwordx2 s[44:45], s[36:37], 0x9
-; GFX7LESS-NEXT: s_mov_b32 s47, 0xf000
-; GFX7LESS-NEXT: s_mov_b32 s46, -1
+; GFX7LESS-NEXT: s_load_dwordx2 s[64:65], s[36:37], 0x9
+; GFX7LESS-NEXT: s_mov_b32 s67, 0xf000
+; GFX7LESS-NEXT: s_mov_b32 s66, -1
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[44:47], 0
-; GFX7LESS-NEXT: s_mov_b64 s[46:47], 0
+; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[64:67], 0
+; GFX7LESS-NEXT: s_mov_b64 s[52:53], 0
; GFX7LESS-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
; GFX7LESS-NEXT: .LBB7_4: ; %atomicrmw.start
; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7LESS-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-NEXT: v_max_f64 v[2:3], v[0:1], v[0:1]
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0
+; GFX7LESS-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:4
+; GFX7LESS-NEXT: buffer_store_dword v0, off, s[80:83], 0
; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
; GFX7LESS-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-NEXT: v_max_f64 v[0:1], v[2:3], v[41:42]
@@ -4097,8 +4097,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX7LESS-NEXT: s_getpc_b64 s[0:1]
; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX7LESS-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
+; GFX7LESS-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:12
+; GFX7LESS-NEXT: buffer_store_dword v0, off, s[80:83], 0 offset:8
; GFX7LESS-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX7LESS-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-NEXT: v_mov_b32_e32 v0, 8
@@ -4107,43 +4107,43 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX7LESS-NEXT: v_mov_b32_e32 v5, 8
; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-NEXT: s_mov_b32 s12, s43
-; GFX7LESS-NEXT: s_mov_b32 s13, s42
+; GFX7LESS-NEXT: s_mov_b32 s12, s51
+; GFX7LESS-NEXT: s_mov_b32 s13, s50
; GFX7LESS-NEXT: s_mov_b32 s14, s33
; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX7LESS-NEXT: v_mov_b32_e32 v2, s44
-; GFX7LESS-NEXT: v_mov_b32_e32 v3, s45
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83]
+; GFX7LESS-NEXT: v_mov_b32_e32 v2, s64
+; GFX7LESS-NEXT: v_mov_b32_e32 v3, s65
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-NEXT: buffer_load_dword v0, off, s[48:51], 0
-; GFX7LESS-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4
+; GFX7LESS-NEXT: buffer_load_dword v0, off, s[80:83], 0
+; GFX7LESS-NEXT: buffer_load_dword v1, off, s[80:83], 0 offset:4
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX7LESS-NEXT: s_or_b64 s[52:53], vcc, s[52:53]
+; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[52:53]
; GFX7LESS-NEXT: s_cbranch_execnz .LBB7_4
; GFX7LESS-NEXT: .LBB7_5:
; GFX7LESS-NEXT: s_endpgm
;
; GFX9-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s11
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s66, -1
+; GFX9-NEXT: s_mov_b32 s67, 0xe00000
+; GFX9-NEXT: s_add_u32 s64, s64, s11
+; GFX9-NEXT: s_addc_u32 s65, s65, 0
; GFX9-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX9-NEXT: s_mov_b32 s43, s8
+; GFX9-NEXT: s_mov_b32 s51, s8
; GFX9-NEXT: s_add_u32 s8, s36, 44
-; GFX9-NEXT: s_mov_b32 s42, s9
+; GFX9-NEXT: s_mov_b32 s50, s9
; GFX9-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX9-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX9-NEXT: s_getpc_b64 s[0:1]
; GFX9-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
@@ -4152,17 +4152,17 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b32 s33, s10
; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7]
-; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX9-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-NEXT: s_mov_b32 s12, s43
-; GFX9-NEXT: s_mov_b32 s13, s42
+; GFX9-NEXT: s_mov_b32 s12, s51
+; GFX9-NEXT: s_mov_b32 s13, s50
; GFX9-NEXT: s_mov_b32 s14, s33
; GFX9-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX9-NEXT: s_movk_i32 s32, 0x800
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -4189,12 +4189,12 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX9-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX9-NEXT: s_cbranch_execz .LBB7_5
; GFX9-NEXT: ; %bb.3:
-; GFX9-NEXT: s_load_dwordx2 s[44:45], s[36:37], 0x24
+; GFX9-NEXT: s_load_dwordx2 s[52:53], s[36:37], 0x24
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
-; GFX9-NEXT: s_mov_b64 s[46:47], 0
+; GFX9-NEXT: s_mov_b64 s[62:63], 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: global_load_dwordx2 v[4:5], v0, s[44:45]
+; GFX9-NEXT: global_load_dwordx2 v[4:5], v0, s[52:53]
; GFX9-NEXT: .LBB7_4: ; %atomicrmw.start
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-NEXT: s_waitcnt vmcnt(0)
@@ -4205,54 +4205,54 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX9-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
; GFX9-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-NEXT: buffer_store_dword v5, off, s[48:51], 0 offset:4
-; GFX9-NEXT: buffer_store_dword v4, off, s[48:51], 0
-; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX9-NEXT: buffer_store_dword v5, off, s[64:67], 0 offset:4
+; GFX9-NEXT: buffer_store_dword v4, off, s[64:67], 0
+; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[41:42]
-; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-NEXT: s_mov_b32 s12, s43
-; GFX9-NEXT: s_mov_b32 s13, s42
+; GFX9-NEXT: s_mov_b32 s12, s51
+; GFX9-NEXT: s_mov_b32 s13, s50
; GFX9-NEXT: s_mov_b32 s14, s33
; GFX9-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX9-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
+; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX9-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12
+; GFX9-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8
; GFX9-NEXT: v_mov_b32_e32 v0, 8
; GFX9-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-NEXT: v_mov_b32_e32 v2, s44
-; GFX9-NEXT: v_mov_b32_e32 v3, s45
+; GFX9-NEXT: v_mov_b32_e32 v2, s52
+; GFX9-NEXT: v_mov_b32_e32 v3, s53
; GFX9-NEXT: v_mov_b32_e32 v4, 0
; GFX9-NEXT: v_mov_b32_e32 v5, 8
; GFX9-NEXT: v_mov_b32_e32 v6, 0
; GFX9-NEXT: v_mov_b32_e32 v7, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX9-NEXT: buffer_load_dword v4, off, s[48:51], 0
-; GFX9-NEXT: buffer_load_dword v5, off, s[48:51], 0 offset:4
+; GFX9-NEXT: buffer_load_dword v4, off, s[64:67], 0
+; GFX9-NEXT: buffer_load_dword v5, off, s[64:67], 0 offset:4
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX9-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX9-NEXT: s_cbranch_execnz .LBB7_4
; GFX9-NEXT: .LBB7_5:
; GFX9-NEXT: s_endpgm
;
; GFX1064-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s50, -1
-; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s66, -1
+; GFX1064-NEXT: s_mov_b32 s67, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s64, s64, s11
; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-NEXT: s_mov_b32 s43, s8
+; GFX1064-NEXT: s_addc_u32 s65, s65, 0
+; GFX1064-NEXT: s_mov_b32 s51, s8
; GFX1064-NEXT: s_add_u32 s8, s34, 44
-; GFX1064-NEXT: s_mov_b32 s42, s9
+; GFX1064-NEXT: s_mov_b32 s50, s9
; GFX1064-NEXT: s_addc_u32 s9, s35, 0
-; GFX1064-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1064-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1064-NEXT: s_getpc_b64 s[0:1]
; GFX1064-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
; GFX1064-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
@@ -4261,17 +4261,17 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1064-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1064-NEXT: s_mov_b32 s33, s10
; GFX1064-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-NEXT: s_mov_b32 s12, s43
+; GFX1064-NEXT: s_mov_b32 s12, s51
; GFX1064-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-NEXT: s_mov_b32 s13, s42
+; GFX1064-NEXT: s_mov_b32 s13, s50
; GFX1064-NEXT: s_mov_b32 s14, s33
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1064-NEXT: s_movk_i32 s32, 0x800
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -4298,12 +4298,12 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1064-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX1064-NEXT: s_cbranch_execz .LBB7_5
; GFX1064-NEXT: ; %bb.3:
-; GFX1064-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24
+; GFX1064-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24
; GFX1064-NEXT: v_mov_b32_e32 v0, 0
; GFX1064-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
-; GFX1064-NEXT: s_mov_b64 s[46:47], 0
+; GFX1064-NEXT: s_mov_b64 s[62:63], 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: global_load_dwordx2 v[4:5], v0, s[44:45]
+; GFX1064-NEXT: global_load_dwordx2 v[4:5], v0, s[52:53]
; GFX1064-NEXT: .LBB7_4: ; %atomicrmw.start
; GFX1064-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1064-NEXT: s_waitcnt vmcnt(0)
@@ -4313,56 +4313,56 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1064-NEXT: s_getpc_b64 s[0:1]
; GFX1064-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1064-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-NEXT: buffer_store_dword v5, off, s[48:51], 0 offset:4
-; GFX1064-NEXT: buffer_store_dword v4, off, s[48:51], 0
+; GFX1064-NEXT: buffer_store_dword v5, off, s[64:67], 0 offset:4
+; GFX1064-NEXT: buffer_store_dword v4, off, s[64:67], 0
; GFX1064-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1064-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-NEXT: v_mov_b32_e32 v2, s44
-; GFX1064-NEXT: v_mov_b32_e32 v3, s45
+; GFX1064-NEXT: v_mov_b32_e32 v2, s52
+; GFX1064-NEXT: v_mov_b32_e32 v3, s53
; GFX1064-NEXT: v_mov_b32_e32 v4, 0
; GFX1064-NEXT: v_mov_b32_e32 v5, 8
; GFX1064-NEXT: v_mov_b32_e32 v6, 0
; GFX1064-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-NEXT: s_mov_b32 s12, s43
-; GFX1064-NEXT: s_mov_b32 s13, s42
+; GFX1064-NEXT: s_mov_b32 s12, s51
+; GFX1064-NEXT: s_mov_b32 s13, s50
; GFX1064-NEXT: s_mov_b32 s14, s33
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1064-NEXT: v_max_f64 v[0:1], v[0:1], v[41:42]
-; GFX1064-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX1064-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
+; GFX1064-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12
+; GFX1064-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8
; GFX1064-NEXT: v_mov_b32_e32 v0, 8
; GFX1064-NEXT: v_mov_b32_e32 v1, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-NEXT: s_clause 0x1
-; GFX1064-NEXT: buffer_load_dword v4, off, s[48:51], 0
-; GFX1064-NEXT: buffer_load_dword v5, off, s[48:51], 0 offset:4
+; GFX1064-NEXT: buffer_load_dword v4, off, s[64:67], 0
+; GFX1064-NEXT: buffer_load_dword v5, off, s[64:67], 0 offset:4
; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1064-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX1064-NEXT: s_cbranch_execnz .LBB7_4
; GFX1064-NEXT: .LBB7_5:
; GFX1064-NEXT: s_endpgm
;
; GFX1032-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s50, -1
-; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s66, -1
+; GFX1032-NEXT: s_mov_b32 s67, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s64, s64, s11
; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-NEXT: s_mov_b32 s43, s8
+; GFX1032-NEXT: s_addc_u32 s65, s65, 0
+; GFX1032-NEXT: s_mov_b32 s51, s8
; GFX1032-NEXT: s_add_u32 s8, s34, 44
-; GFX1032-NEXT: s_mov_b32 s42, s9
+; GFX1032-NEXT: s_mov_b32 s50, s9
; GFX1032-NEXT: s_addc_u32 s9, s35, 0
-; GFX1032-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1032-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1032-NEXT: s_getpc_b64 s[0:1]
; GFX1032-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
; GFX1032-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
@@ -4371,17 +4371,17 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1032-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1032-NEXT: s_mov_b32 s33, s10
; GFX1032-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-NEXT: s_mov_b32 s12, s43
+; GFX1032-NEXT: s_mov_b32 s12, s51
; GFX1032-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-NEXT: s_mov_b32 s13, s42
+; GFX1032-NEXT: s_mov_b32 s13, s50
; GFX1032-NEXT: s_mov_b32 s14, s33
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1032-NEXT: s_movk_i32 s32, 0x400
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -4402,17 +4402,17 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1032-NEXT: s_cbranch_scc1 .LBB7_1
; GFX1032-NEXT: ; %bb.2: ; %ComputeEnd
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1032-NEXT: s_mov_b32 s46, 0
+; GFX1032-NEXT: s_mov_b32 s62, 0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-NEXT: s_xor_b32 s0, exec_lo, s0
; GFX1032-NEXT: s_cbranch_execz .LBB7_5
; GFX1032-NEXT: ; %bb.3:
-; GFX1032-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24
+; GFX1032-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24
; GFX1032-NEXT: v_mov_b32_e32 v0, 0
; GFX1032-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: global_load_dwordx2 v[4:5], v0, s[44:45]
+; GFX1032-NEXT: global_load_dwordx2 v[4:5], v0, s[52:53]
; GFX1032-NEXT: .LBB7_4: ; %atomicrmw.start
; GFX1032-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1032-NEXT: s_waitcnt vmcnt(0)
@@ -4422,38 +4422,38 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1032-NEXT: s_getpc_b64 s[0:1]
; GFX1032-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1032-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-NEXT: buffer_store_dword v5, off, s[48:51], 0 offset:4
-; GFX1032-NEXT: buffer_store_dword v4, off, s[48:51], 0
+; GFX1032-NEXT: buffer_store_dword v5, off, s[64:67], 0 offset:4
+; GFX1032-NEXT: buffer_store_dword v4, off, s[64:67], 0
; GFX1032-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1032-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-NEXT: v_mov_b32_e32 v2, s44
-; GFX1032-NEXT: v_mov_b32_e32 v3, s45
+; GFX1032-NEXT: v_mov_b32_e32 v2, s52
+; GFX1032-NEXT: v_mov_b32_e32 v3, s53
; GFX1032-NEXT: v_mov_b32_e32 v4, 0
; GFX1032-NEXT: v_mov_b32_e32 v5, 8
; GFX1032-NEXT: v_mov_b32_e32 v6, 0
; GFX1032-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-NEXT: s_mov_b32 s12, s43
-; GFX1032-NEXT: s_mov_b32 s13, s42
+; GFX1032-NEXT: s_mov_b32 s12, s51
+; GFX1032-NEXT: s_mov_b32 s13, s50
; GFX1032-NEXT: s_mov_b32 s14, s33
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1032-NEXT: v_max_f64 v[0:1], v[0:1], v[41:42]
-; GFX1032-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX1032-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
+; GFX1032-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12
+; GFX1032-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8
; GFX1032-NEXT: v_mov_b32_e32 v0, 8
; GFX1032-NEXT: v_mov_b32_e32 v1, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-NEXT: s_clause 0x1
-; GFX1032-NEXT: buffer_load_dword v4, off, s[48:51], 0
-; GFX1032-NEXT: buffer_load_dword v5, off, s[48:51], 0 offset:4
+; GFX1032-NEXT: buffer_load_dword v4, off, s[64:67], 0
+; GFX1032-NEXT: buffer_load_dword v5, off, s[64:67], 0 offset:4
; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s46
+; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
; GFX1032-NEXT: s_cbranch_execnz .LBB7_4
; GFX1032-NEXT: .LBB7_5:
; GFX1032-NEXT: s_endpgm
@@ -4461,11 +4461,11 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1164-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe:
; GFX1164: ; %bb.0:
; GFX1164-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1164-NEXT: s_mov_b32 s43, s8
+; GFX1164-NEXT: s_mov_b32 s51, s8
; GFX1164-NEXT: s_add_u32 s8, s34, 44
-; GFX1164-NEXT: s_mov_b32 s42, s9
+; GFX1164-NEXT: s_mov_b32 s50, s9
; GFX1164-NEXT: s_addc_u32 s9, s35, 0
-; GFX1164-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1164-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1164-NEXT: s_getpc_b64 s[0:1]
; GFX1164-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
; GFX1164-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
@@ -4473,15 +4473,15 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1164-NEXT: s_mov_b32 s33, s10
; GFX1164-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1164-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1164-NEXT: s_mov_b32 s12, s43
-; GFX1164-NEXT: s_mov_b32 s13, s42
+; GFX1164-NEXT: s_mov_b32 s12, s51
+; GFX1164-NEXT: s_mov_b32 s13, s50
; GFX1164-NEXT: s_mov_b32 s14, s33
; GFX1164-NEXT: s_mov_b32 s32, 32
; GFX1164-NEXT: v_mov_b32_e32 v40, v0
-; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1164-NEXT: v_mov_b32_e32 v2, 0
@@ -4511,12 +4511,12 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1164-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX1164-NEXT: s_cbranch_execz .LBB7_5
; GFX1164-NEXT: ; %bb.3:
-; GFX1164-NEXT: s_load_b64 s[44:45], s[34:35], 0x24
+; GFX1164-NEXT: s_load_b64 s[52:53], s[34:35], 0x24
; GFX1164-NEXT: v_mov_b32_e32 v0, 0
; GFX1164-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
-; GFX1164-NEXT: s_mov_b64 s[46:47], 0
+; GFX1164-NEXT: s_mov_b64 s[62:63], 0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: global_load_b64 v[4:5], v0, s[44:45]
+; GFX1164-NEXT: global_load_b64 v[4:5], v0, s[52:53]
; GFX1164-NEXT: s_set_inst_prefetch_distance 0x1
; GFX1164-NEXT: .p2align 6
; GFX1164-NEXT: .LBB7_4: ; %atomicrmw.start
@@ -4530,15 +4530,15 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1164-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
; GFX1164-NEXT: v_mov_b32_e32 v31, v40
; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1164-NEXT: v_mov_b32_e32 v2, s44
-; GFX1164-NEXT: v_mov_b32_e32 v3, s45
+; GFX1164-NEXT: v_mov_b32_e32 v2, s52
+; GFX1164-NEXT: v_mov_b32_e32 v3, s53
; GFX1164-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1164-NEXT: s_mov_b32 s12, s43
-; GFX1164-NEXT: s_mov_b32 s13, s42
+; GFX1164-NEXT: s_mov_b32 s12, s51
+; GFX1164-NEXT: s_mov_b32 s13, s50
; GFX1164-NEXT: s_mov_b32 s14, s33
; GFX1164-NEXT: v_max_f64 v[0:1], v[0:1], v[41:42]
; GFX1164-NEXT: scratch_store_b64 off, v[4:5], off
@@ -4553,8 +4553,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[46:47]
+; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63]
; GFX1164-NEXT: s_cbranch_execnz .LBB7_4
; GFX1164-NEXT: .LBB7_5:
; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2
@@ -4563,7 +4563,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1132-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe:
; GFX1132: ; %bb.0:
; GFX1132-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1132-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1132-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1132-NEXT: s_add_u32 s8, s34, 44
; GFX1132-NEXT: s_addc_u32 s9, s35, 0
; GFX1132-NEXT: s_getpc_b64 s[0:1]
@@ -4572,9 +4572,9 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1132-NEXT: v_mov_b32_e32 v31, v0
; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1132-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1132-NEXT: s_mov_b32 s42, s14
-; GFX1132-NEXT: s_mov_b32 s43, s13
-; GFX1132-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1132-NEXT: s_mov_b32 s50, s14
+; GFX1132-NEXT: s_mov_b32 s51, s13
+; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1132-NEXT: s_mov_b32 s12, s13
@@ -4583,7 +4583,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1132-NEXT: s_mov_b32 s32, 32
; GFX1132-NEXT: s_mov_b32 s33, s15
; GFX1132-NEXT: v_mov_b32_e32 v40, v0
-; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-NEXT: v_mov_b32_e32 v2, 0
@@ -4606,19 +4606,19 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1132-NEXT: s_cbranch_scc1 .LBB7_1
; GFX1132-NEXT: ; %bb.2: ; %ComputeEnd
; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1132-NEXT: s_mov_b32 s46, 0
+; GFX1132-NEXT: s_mov_b32 s62, 0
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-NEXT: s_xor_b32 s0, exec_lo, s0
; GFX1132-NEXT: s_cbranch_execz .LBB7_5
; GFX1132-NEXT: ; %bb.3:
-; GFX1132-NEXT: s_load_b64 s[44:45], s[34:35], 0x24
+; GFX1132-NEXT: s_load_b64 s[52:53], s[34:35], 0x24
; GFX1132-NEXT: v_mov_b32_e32 v0, 0
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX1132-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: global_load_b64 v[4:5], v0, s[44:45]
+; GFX1132-NEXT: global_load_b64 v[4:5], v0, s[52:53]
; GFX1132-NEXT: s_set_inst_prefetch_distance 0x1
; GFX1132-NEXT: .p2align 6
; GFX1132-NEXT: .LBB7_4: ; %atomicrmw.start
@@ -4632,16 +4632,16 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1132-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
; GFX1132-NEXT: v_mov_b32_e32 v31, v40
; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1132-NEXT: v_mov_b32_e32 v3, s45
+; GFX1132-NEXT: v_mov_b32_e32 v3, s53
; GFX1132-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1132-NEXT: s_mov_b32 s12, s43
-; GFX1132-NEXT: s_mov_b32 s13, s42
+; GFX1132-NEXT: s_mov_b32 s12, s51
+; GFX1132-NEXT: s_mov_b32 s13, s50
; GFX1132-NEXT: s_mov_b32 s14, s33
; GFX1132-NEXT: v_mov_b32_e32 v6, 0
-; GFX1132-NEXT: v_mov_b32_e32 v2, s44
+; GFX1132-NEXT: v_mov_b32_e32 v2, s52
; GFX1132-NEXT: v_max_f64 v[0:1], v[0:1], v[41:42]
; GFX1132-NEXT: scratch_store_b64 off, v[4:5], off
; GFX1132-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v5, 8
@@ -4653,8 +4653,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46
+; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
; GFX1132-NEXT: s_cbranch_execnz .LBB7_4
; GFX1132-NEXT: .LBB7_5:
; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2
@@ -4663,22 +4663,22 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX7LESS-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s82, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s83, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s80, s80, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s81, s81, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s10
-; GFX7LESS-DPP-NEXT: s_mov_b32 s42, s9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s43, s8
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, s9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s47, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s46, -1
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[64:65], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
@@ -4689,34 +4689,34 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v40, v0, v2
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s43
-; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s42
+; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51
+; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[2:3], off, s[44:47], 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[2:3], off, s[64:67], 0
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[52:53], 0
; GFX7LESS-DPP-NEXT: v_max_f64 v[41:42], v[0:1], v[0:1]
; GFX7LESS-DPP-NEXT: .LBB7_1: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_max_f64 v[0:1], v[2:3], v[2:3]
-; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:4
-; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0
+; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:4
+; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
; GFX7LESS-DPP-NEXT: v_max_f64 v[0:1], v[0:1], v[41:42]
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX7LESS-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
+; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:12
+; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[80:83], 0 offset:8
; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v0, 8
@@ -4725,43 +4725,43 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s43
-; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s42
+; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51
+; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s44
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
+; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s64
+; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s65
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v3, off, s[48:51], 0 offset:4
+; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[80:83], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dword v3, off, s[80:83], 0 offset:4
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX7LESS-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX7LESS-DPP-NEXT: s_or_b64 s[52:53], vcc, s[52:53]
+; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[52:53]
; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB7_1
; GFX7LESS-DPP-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX7LESS-DPP-NEXT: s_endpgm
;
; GFX9-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s54, -1
-; GFX9-DPP-NEXT: s_mov_b32 s55, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s52, s52, s11
-; GFX9-DPP-NEXT: s_addc_u32 s53, s53, 0
+; GFX9-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s82, -1
+; GFX9-DPP-NEXT: s_mov_b32 s83, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s80, s80, s11
+; GFX9-DPP-NEXT: s_addc_u32 s81, s81, 0
; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX9-DPP-NEXT: s_mov_b32 s43, s8
+; GFX9-DPP-NEXT: s_mov_b32 s51, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX9-DPP-NEXT: s_mov_b32 s42, s9
+; GFX9-DPP-NEXT: s_mov_b32 s50, s9
; GFX9-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX9-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX9-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX9-DPP-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
; GFX9-DPP-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
@@ -4770,17 +4770,17 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b32 s33, s10
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
-; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[52:53]
-; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
+; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-DPP-NEXT: s_mov_b32 s12, s43
-; GFX9-DPP-NEXT: s_mov_b32 s13, s42
+; GFX9-DPP-NEXT: s_mov_b32 s12, s51
+; GFX9-DPP-NEXT: s_mov_b32 s13, s50
; GFX9-DPP-NEXT: s_mov_b32 s14, s33
; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[54:55]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -4834,20 +4834,20 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v1, exec_hi, v1
; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX9-DPP-NEXT: v_readlane_b32 s45, v9, 63
-; GFX9-DPP-NEXT: v_readlane_b32 s44, v8, 63
+; GFX9-DPP-NEXT: v_readlane_b32 s53, v9, 63
+; GFX9-DPP-NEXT: v_readlane_b32 s52, v8, 63
; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1]
; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9-DPP-NEXT: s_cbranch_execz .LBB7_3
; GFX9-DPP-NEXT: ; %bb.1:
-; GFX9-DPP-NEXT: s_load_dwordx2 s[46:47], s[36:37], 0x24
-; GFX9-DPP-NEXT: s_mov_b64 s[48:49], 0
+; GFX9-DPP-NEXT: s_load_dwordx2 s[62:63], s[36:37], 0x24
+; GFX9-DPP-NEXT: s_mov_b64 s[64:65], 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[46:47]
+; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[62:63]
; GFX9-DPP-NEXT: .LBB7_2: ; %atomicrmw.start
; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX9-DPP-NEXT: v_max_f64 v[3:4], s[44:45], s[44:45]
+; GFX9-DPP-NEXT: v_max_f64 v[3:4], s[52:53], s[52:53]
; GFX9-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX9-DPP-NEXT: v_max_f64 v[5:6], v[1:2], v[1:2]
; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44
@@ -4856,54 +4856,54 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[52:53]
-; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[52:55], 0 offset:4
-; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[52:55], 0
-; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
+; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:4
+; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0
+; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-DPP-NEXT: v_max_f64 v[3:4], v[5:6], v[3:4]
-; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-DPP-NEXT: s_mov_b32 s12, s43
-; GFX9-DPP-NEXT: s_mov_b32 s13, s42
+; GFX9-DPP-NEXT: s_mov_b32 s12, s51
+; GFX9-DPP-NEXT: s_mov_b32 s13, s50
; GFX9-DPP-NEXT: s_mov_b32 s14, s33
; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[52:55], 0 offset:12
-; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[52:55], 0 offset:8
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[54:55]
+; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[80:83], 0 offset:12
+; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:8
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s46
-; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s47
+; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s62
+; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s63
; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[52:55], 0
-; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[52:55], 0 offset:4
+; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[80:83], 0
+; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[80:83], 0 offset:4
; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-DPP-NEXT: s_or_b64 s[48:49], vcc, s[48:49]
-; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[48:49]
+; GFX9-DPP-NEXT: s_or_b64 s[64:65], vcc, s[64:65]
+; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[64:65]
; GFX9-DPP-NEXT: s_cbranch_execnz .LBB7_2
; GFX9-DPP-NEXT: .LBB7_3:
; GFX9-DPP-NEXT: s_endpgm
;
; GFX1064-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s66, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s67, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s64, s64, s11
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-DPP-NEXT: s_mov_b32 s43, s8
+; GFX1064-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX1064-DPP-NEXT: s_mov_b32 s51, s8
; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1064-DPP-NEXT: s_mov_b32 s42, s9
+; GFX1064-DPP-NEXT: s_mov_b32 s50, s9
; GFX1064-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1064-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX1064-DPP-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
@@ -4912,17 +4912,17 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1064-DPP-NEXT: s_mov_b32 s33, s10
; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-DPP-NEXT: s_mov_b32 s12, s43
+; GFX1064-DPP-NEXT: s_mov_b32 s12, s51
; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1064-DPP-NEXT: s_mov_b32 s13, s50
; GFX1064-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1064-DPP-NEXT: s_movk_i32 s32, 0x800
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -4975,10 +4975,10 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064-DPP-NEXT: s_cbranch_execz .LBB7_3
; GFX1064-DPP-NEXT: ; %bb.1:
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24
-; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], 0
+; GFX1064-DPP-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24
+; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45]
+; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53]
; GFX1064-DPP-NEXT: .LBB7_2: ; %atomicrmw.start
; GFX1064-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1064-DPP-NEXT: v_max_f64 v[3:4], v[41:42], v[41:42]
@@ -4989,56 +4989,56 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX1064-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
+; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX1064-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40
; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s44
+; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s52
; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1064-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1064-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1064-DPP-NEXT: s_mov_b32 s13, s50
; GFX1064-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1064-DPP-NEXT: v_max_f64 v[3:4], v[5:6], v[3:4]
; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX1064-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX1064-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-DPP-NEXT: s_clause 0x1
-; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB7_2
; GFX1064-DPP-NEXT: .LBB7_3:
; GFX1064-DPP-NEXT: s_endpgm
;
; GFX1032-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s66, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s67, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s64, s64, s11
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-DPP-NEXT: s_mov_b32 s43, s8
+; GFX1032-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX1032-DPP-NEXT: s_mov_b32 s51, s8
; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1032-DPP-NEXT: s_mov_b32 s42, s9
+; GFX1032-DPP-NEXT: s_mov_b32 s50, s9
; GFX1032-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1032-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX1032-DPP-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
@@ -5047,17 +5047,17 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1032-DPP-NEXT: s_mov_b32 s33, s10
; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-DPP-NEXT: s_mov_b32 s12, s43
+; GFX1032-DPP-NEXT: s_mov_b32 s12, s51
; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1032-DPP-NEXT: s_mov_b32 s13, s50
; GFX1032-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -5098,15 +5098,15 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, v8
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, v9
-; GFX1032-DPP-NEXT: s_mov_b32 s46, 0
+; GFX1032-DPP-NEXT: s_mov_b32 s62, 0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB7_3
; GFX1032-DPP-NEXT: ; %bb.1:
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24
+; GFX1032-DPP-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24
; GFX1032-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4]
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45]
+; GFX1032-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53]
; GFX1032-DPP-NEXT: .LBB7_2: ; %atomicrmw.start
; GFX1032-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1032-DPP-NEXT: s_waitcnt vmcnt(0)
@@ -5116,38 +5116,38 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX1032-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
+; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX1032-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s44
+; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s52
; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX1032-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1032-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1032-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1032-DPP-NEXT: s_mov_b32 s13, s50
; GFX1032-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1032-DPP-NEXT: v_max_f64 v[3:4], v[3:4], v[41:42]
-; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-DPP-NEXT: s_clause 0x1
-; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-DPP-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s46
+; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB7_2
; GFX1032-DPP-NEXT: .LBB7_3:
; GFX1032-DPP-NEXT: s_endpgm
@@ -5155,11 +5155,11 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1164-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe:
; GFX1164-DPP: ; %bb.0:
; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1164-DPP-NEXT: s_mov_b32 s43, s8
+; GFX1164-DPP-NEXT: s_mov_b32 s51, s8
; GFX1164-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1164-DPP-NEXT: s_mov_b32 s42, s9
+; GFX1164-DPP-NEXT: s_mov_b32 s50, s9
; GFX1164-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1164-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1164-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX1164-DPP-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
; GFX1164-DPP-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
@@ -5167,15 +5167,15 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1164-DPP-NEXT: s_mov_b32 s33, s10
; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1164-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1164-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1164-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1164-DPP-NEXT: s_mov_b32 s13, s50
; GFX1164-DPP-NEXT: s_mov_b32 s14, s33
; GFX1164-DPP-NEXT: s_mov_b32 s32, 32
; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0
-; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -5239,11 +5239,11 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1
; GFX1164-DPP-NEXT: s_cbranch_execz .LBB7_3
; GFX1164-DPP-NEXT: ; %bb.1:
-; GFX1164-DPP-NEXT: s_load_b64 s[44:45], s[34:35], 0x24
+; GFX1164-DPP-NEXT: s_load_b64 s[52:53], s[34:35], 0x24
; GFX1164-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4]
-; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], 0
+; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[44:45]
+; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[52:53]
; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x1
; GFX1164-DPP-NEXT: .p2align 6
; GFX1164-DPP-NEXT: .LBB7_2: ; %atomicrmw.start
@@ -5261,18 +5261,18 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1164-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1164-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1164-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1164-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1164-DPP-NEXT: s_mov_b32 s13, s50
; GFX1164-DPP-NEXT: s_mov_b32 s14, s33
; GFX1164-DPP-NEXT: v_max_f64 v[3:4], v[3:4], v[41:42]
; GFX1164-DPP-NEXT: scratch_store_b64 off, v[1:2], off
; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s44
+; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s52
; GFX1164-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
@@ -5280,8 +5280,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[46:47]
+; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63]
; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB7_2
; GFX1164-DPP-NEXT: .LBB7_3:
; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2
@@ -5290,7 +5290,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1132-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe:
; GFX1132-DPP: ; %bb.0:
; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1132-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1132-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1132-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1132-DPP-NEXT: s_addc_u32 s9, s35, 0
; GFX1132-DPP-NEXT: s_getpc_b64 s[0:1]
@@ -5299,9 +5299,9 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1132-DPP-NEXT: v_mov_b32_e32 v31, v0
; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1132-DPP-NEXT: s_mov_b32 s42, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s43, s13
-; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1132-DPP-NEXT: s_mov_b32 s50, s14
+; GFX1132-DPP-NEXT: s_mov_b32 s51, s13
+; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
@@ -5310,7 +5310,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1132-DPP-NEXT: s_mov_b32 s32, 32
; GFX1132-DPP-NEXT: s_mov_b32 s33, s15
; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0
-; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
@@ -5356,16 +5356,16 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1132-DPP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v3, v8
; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
; GFX1132-DPP-NEXT: v_mov_b32_e32 v4, v9
-; GFX1132-DPP-NEXT: s_mov_b32 s46, 0
+; GFX1132-DPP-NEXT: s_mov_b32 s62, 0
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1
; GFX1132-DPP-NEXT: s_cbranch_execz .LBB7_3
; GFX1132-DPP-NEXT: ; %bb.1:
-; GFX1132-DPP-NEXT: s_load_b64 s[44:45], s[34:35], 0x24
+; GFX1132-DPP-NEXT: s_load_b64 s[52:53], s[34:35], 0x24
; GFX1132-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[44:45]
+; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[52:53]
; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x1
; GFX1132-DPP-NEXT: .p2align 6
; GFX1132-DPP-NEXT: .LBB7_2: ; %atomicrmw.start
@@ -5381,26 +5381,26 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1132-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1132-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1132-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1132-DPP-NEXT: s_mov_b32 s13, s50
; GFX1132-DPP-NEXT: s_mov_b32 s14, s33
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX1132-DPP-NEXT: v_max_f64 v[3:4], v[3:4], v[41:42]
; GFX1132-DPP-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44
+; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s52
; GFX1132-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0
+; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s53 :: v_dual_mov_b32 v4, 0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-DPP-NEXT: scratch_load_b64 v[1:2], off, off
; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-DPP-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46
+; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB7_2
; GFX1132-DPP-NEXT: .LBB7_3:
; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2
@@ -5750,12 +5750,12 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
; GFX7LESS-LABEL: global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s38, -1
-; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s36, s36, s11
-; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0
+; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
+; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
; GFX7LESS-NEXT: s_mov_b32 s14, s10
; GFX7LESS-NEXT: s_mov_b32 s13, s9
; GFX7LESS-NEXT: s_mov_b32 s12, s8
@@ -5773,8 +5773,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -5831,12 +5831,12 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
;
; GFX9-LABEL: global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s11
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s11
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-NEXT: s_mov_b32 s12, s8
; GFX9-NEXT: s_add_u32 s8, s34, 44
@@ -5852,9 +5852,9 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -5905,13 +5905,13 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
;
; GFX1064-LABEL: global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s38, -1
-; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s36, s36, s11
+; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s50, -1
+; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s48, s48, s11
; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-NEXT: s_addc_u32 s49, s49, 0
; GFX1064-NEXT: s_mov_b32 s12, s8
; GFX1064-NEXT: s_add_u32 s8, s34, 44
; GFX1064-NEXT: s_mov_b32 s13, s9
@@ -5927,8 +5927,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1064-NEXT: s_mov_b32 s32, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -5964,13 +5964,13 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
;
; GFX1032-LABEL: global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s38, -1
-; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s36, s36, s11
+; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s50, -1
+; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s48, s48, s11
; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-NEXT: s_mov_b32 s12, s8
; GFX1032-NEXT: s_add_u32 s8, s34, 44
; GFX1032-NEXT: s_mov_b32 s13, s9
@@ -5986,8 +5986,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1032-NEXT: s_mov_b32 s32, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -6162,19 +6162,19 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
; GFX7LESS-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s42, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s43, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s40, s40, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s41, s41, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5]
@@ -6187,11 +6187,11 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[40:41]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[42:43]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[2:3], off, s[36:39], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[2:3], off, s[48:51], 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0
; GFX7LESS-DPP-NEXT: v_max_f64 v[4:5], v[0:1], v[0:1]
; GFX7LESS-DPP-NEXT: .LBB9_1: ; %atomicrmw.start
@@ -6204,7 +6204,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v8, v2
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, v1
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, v0
-; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[36:39], 0 glc
+; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[48:51], 0 glc
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_cmp_eq_u64_e32 vcc, v[6:7], v[2:3]
; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
@@ -6217,12 +6217,12 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
;
; GFX9-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s38, -1
-; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11
-; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s50, -1
+; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-DPP-NEXT: s_mov_b32 s12, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44
@@ -6238,9 +6238,9 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -6325,13 +6325,13 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
;
; GFX1064-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s38, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX1064-DPP-NEXT: s_mov_b32 s12, s8
; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1064-DPP-NEXT: s_mov_b32 s13, s9
@@ -6347,8 +6347,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -6409,13 +6409,13 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
;
; GFX1032-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s38, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-DPP-NEXT: s_mov_b32 s12, s8
; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1032-DPP-NEXT: s_mov_b32 s13, s9
@@ -6431,8 +6431,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -6683,13 +6683,13 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX7LESS-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
-; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
-; GFX7LESS-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX7LESS-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-NEXT: s_mov_b32 s67, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s64, s64, s11
+; GFX7LESS-NEXT: s_addc_u32 s65, s65, 0
+; GFX7LESS-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v3, exec_lo, 0
; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v3, exec_hi, v3
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
@@ -6697,15 +6697,15 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX7LESS-NEXT: s_cbranch_execz .LBB10_3
; GFX7LESS-NEXT: ; %bb.1:
; GFX7LESS-NEXT: s_mov_b32 s33, s10
-; GFX7LESS-NEXT: s_mov_b32 s42, s9
-; GFX7LESS-NEXT: s_mov_b32 s43, s8
+; GFX7LESS-NEXT: s_mov_b32 s50, s9
+; GFX7LESS-NEXT: s_mov_b32 s51, s8
; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX7LESS-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x9
+; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
-; GFX7LESS-NEXT: s_mov_b64 s[46:47], 0
+; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
+; GFX7LESS-NEXT: s_mov_b64 s[62:63], 0
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v3, v0, v1
@@ -6717,8 +6717,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7LESS-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-NEXT: v_max_f64 v[2:3], v[0:1], v[0:1]
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0
+; GFX7LESS-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:4
+; GFX7LESS-NEXT: buffer_store_dword v0, off, s[64:67], 0
; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
; GFX7LESS-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-NEXT: v_max_f64 v[0:1], v[2:3], 4.0
@@ -6726,8 +6726,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX7LESS-NEXT: s_getpc_b64 s[0:1]
; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX7LESS-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
+; GFX7LESS-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12
+; GFX7LESS-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8
; GFX7LESS-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX7LESS-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-NEXT: v_mov_b32_e32 v0, 8
@@ -6736,59 +6736,59 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX7LESS-NEXT: v_mov_b32_e32 v5, 8
; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-NEXT: s_mov_b32 s12, s43
-; GFX7LESS-NEXT: s_mov_b32 s13, s42
+; GFX7LESS-NEXT: s_mov_b32 s12, s51
+; GFX7LESS-NEXT: s_mov_b32 s13, s50
; GFX7LESS-NEXT: s_mov_b32 s14, s33
; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX7LESS-NEXT: v_mov_b32_e32 v2, s44
-; GFX7LESS-NEXT: v_mov_b32_e32 v3, s45
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX7LESS-NEXT: v_mov_b32_e32 v2, s52
+; GFX7LESS-NEXT: v_mov_b32_e32 v3, s53
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-NEXT: buffer_load_dword v0, off, s[48:51], 0
-; GFX7LESS-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4
+; GFX7LESS-NEXT: buffer_load_dword v0, off, s[64:67], 0
+; GFX7LESS-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX7LESS-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX7LESS-NEXT: s_cbranch_execnz .LBB10_2
; GFX7LESS-NEXT: .LBB10_3:
; GFX7LESS-NEXT: s_endpgm
;
; GFX9-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s66, -1
+; GFX9-NEXT: s_mov_b32 s67, 0xe00000
; GFX9-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX9-NEXT: s_add_u32 s48, s48, s11
+; GFX9-NEXT: s_add_u32 s64, s64, s11
; GFX9-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
-; GFX9-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX9-NEXT: s_addc_u32 s65, s65, 0
+; GFX9-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
; GFX9-NEXT: s_movk_i32 s32, 0x800
; GFX9-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9-NEXT: s_cbranch_execz .LBB10_3
; GFX9-NEXT: ; %bb.1:
-; GFX9-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24
+; GFX9-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24
; GFX9-NEXT: v_lshlrev_b32_e32 v3, 20, v2
; GFX9-NEXT: v_lshlrev_b32_e32 v4, 10, v1
; GFX9-NEXT: s_mov_b32 s33, s10
-; GFX9-NEXT: s_mov_b32 s42, s9
+; GFX9-NEXT: s_mov_b32 s50, s9
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
-; GFX9-NEXT: s_mov_b32 s43, s8
+; GFX9-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
+; GFX9-NEXT: s_mov_b32 s51, s8
; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX9-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v2, s1
-; GFX9-NEXT: s_mov_b64 s[46:47], 0
+; GFX9-NEXT: s_mov_b64 s[62:63], 0
; GFX9-NEXT: v_mov_b32_e32 v1, s0
; GFX9-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX9-NEXT: .LBB10_2: ; %atomicrmw.start
@@ -6801,36 +6801,36 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX9-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
; GFX9-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX9-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX9-NEXT: buffer_store_dword v1, off, s[64:67], 0
+; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-NEXT: v_max_f64 v[3:4], v[3:4], 4.0
-; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-NEXT: s_mov_b32 s12, s43
-; GFX9-NEXT: s_mov_b32 s13, s42
+; GFX9-NEXT: s_mov_b32 s12, s51
+; GFX9-NEXT: s_mov_b32 s13, s50
; GFX9-NEXT: s_mov_b32 s14, s33
; GFX9-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX9-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
+; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX9-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX9-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
; GFX9-NEXT: v_mov_b32_e32 v0, 8
; GFX9-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-NEXT: v_mov_b32_e32 v2, s44
-; GFX9-NEXT: v_mov_b32_e32 v3, s45
+; GFX9-NEXT: v_mov_b32_e32 v2, s52
+; GFX9-NEXT: v_mov_b32_e32 v3, s53
; GFX9-NEXT: v_mov_b32_e32 v4, 0
; GFX9-NEXT: v_mov_b32_e32 v5, 8
; GFX9-NEXT: v_mov_b32_e32 v6, 0
; GFX9-NEXT: v_mov_b32_e32 v7, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX9-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX9-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX9-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX9-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX9-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX9-NEXT: s_cbranch_execnz .LBB10_2
; GFX9-NEXT: .LBB10_3:
; GFX9-NEXT: s_endpgm
@@ -6838,32 +6838,32 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX1064-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe:
; GFX1064: ; %bb.0:
; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s50, -1
-; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
+; GFX1064-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s66, -1
+; GFX1064-NEXT: s_mov_b32 s67, 0x31e16000
; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3
-; GFX1064-NEXT: s_add_u32 s48, s48, s11
-; GFX1064-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1064-NEXT: s_add_u32 s64, s64, s11
+; GFX1064-NEXT: s_addc_u32 s65, s65, 0
+; GFX1064-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1064-NEXT: s_movk_i32 s32, 0x800
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
; GFX1064-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064-NEXT: s_cbranch_execz .LBB10_3
; GFX1064-NEXT: ; %bb.1:
-; GFX1064-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24
+; GFX1064-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24
; GFX1064-NEXT: v_lshlrev_b32_e32 v3, 20, v2
; GFX1064-NEXT: v_lshlrev_b32_e32 v4, 10, v1
; GFX1064-NEXT: s_mov_b32 s33, s10
-; GFX1064-NEXT: s_mov_b32 s42, s9
-; GFX1064-NEXT: s_mov_b32 s43, s8
+; GFX1064-NEXT: s_mov_b32 s50, s9
+; GFX1064-NEXT: s_mov_b32 s51, s8
; GFX1064-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1064-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX1064-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX1064-NEXT: s_mov_b64 s[46:47], 0
+; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1064-NEXT: s_mov_b64 s[62:63], 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
+; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: v_mov_b32_e32 v2, s1
; GFX1064-NEXT: v_mov_b32_e32 v1, s0
@@ -6876,38 +6876,38 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX1064-NEXT: s_getpc_b64 s[0:1]
; GFX1064-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1064-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-NEXT: buffer_store_dword v1, off, s[48:51], 0
+; GFX1064-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX1064-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX1064-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1064-NEXT: v_mov_b32_e32 v31, v40
; GFX1064-NEXT: v_mov_b32_e32 v0, 8
; GFX1064-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-NEXT: v_mov_b32_e32 v2, s44
+; GFX1064-NEXT: v_mov_b32_e32 v2, s52
; GFX1064-NEXT: v_mov_b32_e32 v5, 8
; GFX1064-NEXT: v_mov_b32_e32 v6, 0
; GFX1064-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1064-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1064-NEXT: s_mov_b32 s12, s43
-; GFX1064-NEXT: s_mov_b32 s13, s42
+; GFX1064-NEXT: s_mov_b32 s12, s51
+; GFX1064-NEXT: s_mov_b32 s13, s50
; GFX1064-NEXT: s_mov_b32 s14, s33
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1064-NEXT: v_max_f64 v[3:4], v[3:4], 4.0
-; GFX1064-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-NEXT: v_mov_b32_e32 v3, s45
+; GFX1064-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX1064-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX1064-NEXT: v_mov_b32_e32 v3, s53
; GFX1064-NEXT: v_mov_b32_e32 v4, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-NEXT: s_clause 0x1
-; GFX1064-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX1064-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX1064-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1064-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX1064-NEXT: s_cbranch_execnz .LBB10_2
; GFX1064-NEXT: .LBB10_3:
; GFX1064-NEXT: s_endpgm
@@ -6915,31 +6915,31 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX1032-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe:
; GFX1032: ; %bb.0:
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s50, -1
-; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX1032-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s66, -1
+; GFX1032-NEXT: s_mov_b32 s67, 0x31c16000
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
-; GFX1032-NEXT: s_add_u32 s48, s48, s11
-; GFX1032-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-NEXT: s_mov_b64 s[40:41], s[0:1]
-; GFX1032-NEXT: s_mov_b32 s46, 0
+; GFX1032-NEXT: s_add_u32 s64, s64, s11
+; GFX1032-NEXT: s_addc_u32 s65, s65, 0
+; GFX1032-NEXT: s_mov_b64 s[48:49], s[0:1]
+; GFX1032-NEXT: s_mov_b32 s62, 0
; GFX1032-NEXT: s_movk_i32 s32, 0x400
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-NEXT: s_cbranch_execz .LBB10_3
; GFX1032-NEXT: ; %bb.1:
-; GFX1032-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24
+; GFX1032-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24
; GFX1032-NEXT: v_lshlrev_b32_e32 v3, 20, v2
; GFX1032-NEXT: v_lshlrev_b32_e32 v4, 10, v1
; GFX1032-NEXT: s_mov_b32 s33, s10
-; GFX1032-NEXT: s_mov_b32 s42, s9
-; GFX1032-NEXT: s_mov_b32 s43, s8
+; GFX1032-NEXT: s_mov_b32 s50, s9
+; GFX1032-NEXT: s_mov_b32 s51, s8
; GFX1032-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1032-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX1032-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
+; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: v_mov_b32_e32 v2, s1
; GFX1032-NEXT: v_mov_b32_e32 v1, s0
@@ -6952,38 +6952,38 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX1032-NEXT: s_getpc_b64 s[0:1]
; GFX1032-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1032-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-NEXT: buffer_store_dword v1, off, s[48:51], 0
+; GFX1032-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX1032-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX1032-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1032-NEXT: v_mov_b32_e32 v31, v40
; GFX1032-NEXT: v_mov_b32_e32 v0, 8
; GFX1032-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-NEXT: v_mov_b32_e32 v2, s44
+; GFX1032-NEXT: v_mov_b32_e32 v2, s52
; GFX1032-NEXT: v_mov_b32_e32 v5, 8
; GFX1032-NEXT: v_mov_b32_e32 v6, 0
; GFX1032-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1032-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1032-NEXT: s_mov_b32 s12, s43
-; GFX1032-NEXT: s_mov_b32 s13, s42
+; GFX1032-NEXT: s_mov_b32 s12, s51
+; GFX1032-NEXT: s_mov_b32 s13, s50
; GFX1032-NEXT: s_mov_b32 s14, s33
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1032-NEXT: v_max_f64 v[3:4], v[3:4], 4.0
-; GFX1032-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1032-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1032-NEXT: v_mov_b32_e32 v3, s45
+; GFX1032-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX1032-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX1032-NEXT: v_mov_b32_e32 v3, s53
; GFX1032-NEXT: v_mov_b32_e32 v4, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-NEXT: s_clause 0x1
-; GFX1032-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1032-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX1032-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX1032-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s46
+; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
; GFX1032-NEXT: s_cbranch_execnz .LBB10_2
; GFX1032-NEXT: .LBB10_3:
; GFX1032-NEXT: s_endpgm
@@ -6992,7 +6992,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX1164: ; %bb.0:
; GFX1164-NEXT: v_mov_b32_e32 v40, v0
; GFX1164-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1164-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1164-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1164-NEXT: s_mov_b32 s32, 32
; GFX1164-NEXT: s_mov_b64 s[0:1], exec
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
@@ -7000,16 +7000,16 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX1164-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1164-NEXT: s_cbranch_execz .LBB10_3
; GFX1164-NEXT: ; %bb.1:
-; GFX1164-NEXT: s_load_b64 s[44:45], s[4:5], 0x24
+; GFX1164-NEXT: s_load_b64 s[52:53], s[4:5], 0x24
; GFX1164-NEXT: s_mov_b32 s33, s10
-; GFX1164-NEXT: s_mov_b32 s42, s9
-; GFX1164-NEXT: s_mov_b32 s43, s8
+; GFX1164-NEXT: s_mov_b32 s50, s9
+; GFX1164-NEXT: s_mov_b32 s51, s8
; GFX1164-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1164-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX1164-NEXT: s_mov_b64 s[46:47], 0
+; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1164-NEXT: s_mov_b64 s[62:63], 0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: s_load_b64 s[0:1], s[44:45], 0x0
+; GFX1164-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: v_mov_b32_e32 v2, s1
; GFX1164-NEXT: v_mov_b32_e32 v1, s0
@@ -7031,18 +7031,18 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX1164-NEXT: v_mov_b32_e32 v5, 8
; GFX1164-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1164-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1164-NEXT: s_mov_b32 s12, s43
-; GFX1164-NEXT: s_mov_b32 s13, s42
+; GFX1164-NEXT: s_mov_b32 s12, s51
+; GFX1164-NEXT: s_mov_b32 s13, s50
; GFX1164-NEXT: s_mov_b32 s14, s33
; GFX1164-NEXT: v_max_f64 v[3:4], v[3:4], 4.0
; GFX1164-NEXT: scratch_store_b64 off, v[1:2], off
; GFX1164-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-NEXT: v_mov_b32_e32 v2, s44
+; GFX1164-NEXT: v_mov_b32_e32 v2, s52
; GFX1164-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1164-NEXT: v_mov_b32_e32 v3, s45
+; GFX1164-NEXT: v_mov_b32_e32 v3, s53
; GFX1164-NEXT: v_mov_b32_e32 v4, 0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1]
@@ -7050,8 +7050,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[46:47]
+; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63]
; GFX1164-NEXT: s_cbranch_execnz .LBB10_2
; GFX1164-NEXT: .LBB10_3:
; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2
@@ -7061,22 +7061,22 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX1132: ; %bb.0:
; GFX1132-NEXT: v_mov_b32_e32 v40, v0
; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1132-NEXT: s_mov_b64 s[40:41], s[0:1]
-; GFX1132-NEXT: s_mov_b32 s46, 0
+; GFX1132-NEXT: s_mov_b64 s[48:49], s[0:1]
+; GFX1132-NEXT: s_mov_b32 s62, 0
; GFX1132-NEXT: s_mov_b32 s32, 32
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-NEXT: s_cbranch_execz .LBB10_3
; GFX1132-NEXT: ; %bb.1:
-; GFX1132-NEXT: s_load_b64 s[44:45], s[4:5], 0x24
+; GFX1132-NEXT: s_load_b64 s[52:53], s[4:5], 0x24
; GFX1132-NEXT: s_mov_b32 s33, s15
-; GFX1132-NEXT: s_mov_b32 s42, s14
-; GFX1132-NEXT: s_mov_b32 s43, s13
+; GFX1132-NEXT: s_mov_b32 s50, s14
+; GFX1132-NEXT: s_mov_b32 s51, s13
; GFX1132-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1132-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_load_b64 s[0:1], s[44:45], 0x0
+; GFX1132-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
; GFX1132-NEXT: s_set_inst_prefetch_distance 0x1
@@ -7095,26 +7095,26 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
; GFX1132-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1132-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1132-NEXT: s_mov_b32 s12, s43
-; GFX1132-NEXT: s_mov_b32 s13, s42
+; GFX1132-NEXT: s_mov_b32 s12, s51
+; GFX1132-NEXT: s_mov_b32 s13, s50
; GFX1132-NEXT: s_mov_b32 s14, s33
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX1132-NEXT: v_max_f64 v[3:4], v[3:4], 4.0
; GFX1132-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44
+; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s52
; GFX1132-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1132-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0
+; GFX1132-NEXT: v_dual_mov_b32 v3, s53 :: v_dual_mov_b32 v4, 0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-NEXT: scratch_load_b64 v[1:2], off, off
; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46
+; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
; GFX1132-NEXT: s_cbranch_execnz .LBB10_2
; GFX1132-NEXT: .LBB10_3:
; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2
@@ -7123,13 +7123,13 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX7LESS-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX7LESS-DPP-NEXT: v_mbcnt_lo_u32_b32_e64 v3, exec_lo, 0
; GFX7LESS-DPP-NEXT: v_mbcnt_hi_u32_b32_e32 v3, exec_hi, v3
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
@@ -7137,15 +7137,15 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX7LESS-DPP-NEXT: s_cbranch_execz .LBB10_3
; GFX7LESS-DPP-NEXT: ; %bb.1:
; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s10
-; GFX7LESS-DPP-NEXT: s_mov_b32 s42, s9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s43, s8
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, s9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], 0
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[62:63], 0
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v3, v0, v1
@@ -7157,8 +7157,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_max_f64 v[2:3], v[0:1], v[0:1]
-; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0
+; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:4
+; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[64:67], 0
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-DPP-NEXT: v_max_f64 v[0:1], v[2:3], 4.0
@@ -7166,8 +7166,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX7LESS-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
+; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12
+; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8
; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v0, 8
@@ -7176,59 +7176,59 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s43
-; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s42
+; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51
+; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s44
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s52
+; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-DPP-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[48:51], 0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4
+; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[64:67], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX7LESS-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB10_2
; GFX7LESS-DPP-NEXT: .LBB10_3:
; GFX7LESS-DPP-NEXT: s_endpgm
;
; GFX9-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s50, -1
-; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s66, -1
+; GFX9-DPP-NEXT: s_mov_b32 s67, 0xe00000
; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX9-DPP-NEXT: s_add_u32 s64, s64, s11
; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3
-; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX9-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX9-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX9-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800
; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9-DPP-NEXT: s_cbranch_execz .LBB10_3
; GFX9-DPP-NEXT: ; %bb.1:
-; GFX9-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24
+; GFX9-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1
; GFX9-DPP-NEXT: s_mov_b32 s33, s10
-; GFX9-DPP-NEXT: s_mov_b32 s42, s9
+; GFX9-DPP-NEXT: s_mov_b32 s50, s9
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
-; GFX9-DPP-NEXT: s_mov_b32 s43, s8
+; GFX9-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
+; GFX9-DPP-NEXT: s_mov_b32 s51, s8
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s1
-; GFX9-DPP-NEXT: s_mov_b64 s[46:47], 0
+; GFX9-DPP-NEXT: s_mov_b64 s[62:63], 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v1, s0
; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX9-DPP-NEXT: .LBB10_2: ; %atomicrmw.start
@@ -7241,36 +7241,36 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0
+; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-DPP-NEXT: v_max_f64 v[3:4], v[3:4], 4.0
-; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-DPP-NEXT: s_mov_b32 s12, s43
-; GFX9-DPP-NEXT: s_mov_b32 s13, s42
+; GFX9-DPP-NEXT: s_mov_b32 s12, s51
+; GFX9-DPP-NEXT: s_mov_b32 s13, s50
; GFX9-DPP-NEXT: s_mov_b32 s14, s33
; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s44
-; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s52
+; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX9-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX9-DPP-NEXT: s_cbranch_execnz .LBB10_2
; GFX9-DPP-NEXT: .LBB10_3:
; GFX9-DPP-NEXT: s_endpgm
@@ -7278,32 +7278,32 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX1064-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe:
; GFX1064-DPP: ; %bb.0:
; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
+; GFX1064-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s66, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s67, 0x31e16000
; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3
-; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
-; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1064-DPP-NEXT: s_add_u32 s64, s64, s11
+; GFX1064-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX1064-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1064-DPP-NEXT: s_movk_i32 s32, 0x800
; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064-DPP-NEXT: s_cbranch_execz .LBB10_3
; GFX1064-DPP-NEXT: ; %bb.1:
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24
+; GFX1064-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24
; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2
; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1
; GFX1064-DPP-NEXT: s_mov_b32 s33, s10
-; GFX1064-DPP-NEXT: s_mov_b32 s42, s9
-; GFX1064-DPP-NEXT: s_mov_b32 s43, s8
+; GFX1064-DPP-NEXT: s_mov_b32 s50, s9
+; GFX1064-DPP-NEXT: s_mov_b32 s51, s8
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], 0
+; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
+; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s1
; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, s0
@@ -7316,38 +7316,38 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX1064-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
+; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX1064-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40
; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s44
+; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s52
; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX1064-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1064-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1064-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1064-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1064-DPP-NEXT: s_mov_b32 s13, s50
; GFX1064-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1064-DPP-NEXT: v_max_f64 v[3:4], v[3:4], 4.0
-; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX1064-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-DPP-NEXT: s_clause 0x1
-; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB10_2
; GFX1064-DPP-NEXT: .LBB10_3:
; GFX1064-DPP-NEXT: s_endpgm
@@ -7355,31 +7355,31 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX1032-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe:
; GFX1032-DPP: ; %bb.0:
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX1032-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s66, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s67, 0x31c16000
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
-; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
-; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
-; GFX1032-DPP-NEXT: s_mov_b32 s46, 0
+; GFX1032-DPP-NEXT: s_add_u32 s64, s64, s11
+; GFX1032-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX1032-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
+; GFX1032-DPP-NEXT: s_mov_b32 s62, 0
; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB10_3
; GFX1032-DPP-NEXT: ; %bb.1:
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24
+; GFX1032-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24
; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2
; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1
; GFX1032-DPP-NEXT: s_mov_b32 s33, s10
-; GFX1032-DPP-NEXT: s_mov_b32 s42, s9
-; GFX1032-DPP-NEXT: s_mov_b32 s43, s8
+; GFX1032-DPP-NEXT: s_mov_b32 s50, s9
+; GFX1032-DPP-NEXT: s_mov_b32 s51, s8
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
+; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s1
; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, s0
@@ -7392,38 +7392,38 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX1032-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
+; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX1032-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s44
+; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s52
; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX1032-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1032-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1032-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1032-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1032-DPP-NEXT: s_mov_b32 s13, s50
; GFX1032-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1032-DPP-NEXT: v_max_f64 v[3:4], v[3:4], 4.0
-; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-DPP-NEXT: s_clause 0x1
-; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-DPP-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s46
+; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB10_2
; GFX1032-DPP-NEXT: .LBB10_3:
; GFX1032-DPP-NEXT: s_endpgm
@@ -7432,7 +7432,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX1164-DPP: ; %bb.0:
; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0
; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1164-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1164-DPP-NEXT: s_mov_b32 s32, 32
; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec
; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
@@ -7440,16 +7440,16 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1164-DPP-NEXT: s_cbranch_execz .LBB10_3
; GFX1164-DPP-NEXT: ; %bb.1:
-; GFX1164-DPP-NEXT: s_load_b64 s[44:45], s[4:5], 0x24
+; GFX1164-DPP-NEXT: s_load_b64 s[52:53], s[4:5], 0x24
; GFX1164-DPP-NEXT: s_mov_b32 s33, s10
-; GFX1164-DPP-NEXT: s_mov_b32 s42, s9
-; GFX1164-DPP-NEXT: s_mov_b32 s43, s8
+; GFX1164-DPP-NEXT: s_mov_b32 s50, s9
+; GFX1164-DPP-NEXT: s_mov_b32 s51, s8
; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], 0
+; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[44:45], 0x0
+; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s1
; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, s0
@@ -7471,18 +7471,18 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX1164-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1164-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1164-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1164-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1164-DPP-NEXT: s_mov_b32 s13, s50
; GFX1164-DPP-NEXT: s_mov_b32 s14, s33
; GFX1164-DPP-NEXT: v_max_f64 v[3:4], v[3:4], 4.0
; GFX1164-DPP-NEXT: scratch_store_b64 off, v[1:2], off
; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s44
+; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s52
; GFX1164-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
@@ -7490,8 +7490,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[46:47]
+; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63]
; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB10_2
; GFX1164-DPP-NEXT: .LBB10_3:
; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2
@@ -7501,22 +7501,22 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX1132-DPP: ; %bb.0:
; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0
; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1132-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
-; GFX1132-DPP-NEXT: s_mov_b32 s46, 0
+; GFX1132-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
+; GFX1132-DPP-NEXT: s_mov_b32 s62, 0
; GFX1132-DPP-NEXT: s_mov_b32 s32, 32
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-DPP-NEXT: s_cbranch_execz .LBB10_3
; GFX1132-DPP-NEXT: ; %bb.1:
-; GFX1132-DPP-NEXT: s_load_b64 s[44:45], s[4:5], 0x24
+; GFX1132-DPP-NEXT: s_load_b64 s[52:53], s[4:5], 0x24
; GFX1132-DPP-NEXT: s_mov_b32 s33, s15
-; GFX1132-DPP-NEXT: s_mov_b32 s42, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s43, s13
+; GFX1132-DPP-NEXT: s_mov_b32 s50, s14
+; GFX1132-DPP-NEXT: s_mov_b32 s51, s13
; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[44:45], 0x0
+; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x1
@@ -7535,26 +7535,26 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1132-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1132-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1132-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1132-DPP-NEXT: s_mov_b32 s13, s50
; GFX1132-DPP-NEXT: s_mov_b32 s14, s33
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX1132-DPP-NEXT: v_max_f64 v[3:4], v[3:4], 4.0
; GFX1132-DPP-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44
+; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s52
; GFX1132-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0
+; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s53 :: v_dual_mov_b32 v4, 0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-DPP-NEXT: scratch_load_b64 v[1:2], off, off
; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-DPP-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46
+; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB10_2
; GFX1132-DPP-NEXT: .LBB10_3:
; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2
@@ -7567,19 +7567,19 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX7LESS-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
-; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
+; GFX7LESS-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s82, -1
+; GFX7LESS-NEXT: s_mov_b32 s83, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s80, s80, s11
+; GFX7LESS-NEXT: s_addc_u32 s81, s81, 0
; GFX7LESS-NEXT: s_mov_b32 s33, s10
-; GFX7LESS-NEXT: s_mov_b32 s42, s9
-; GFX7LESS-NEXT: s_mov_b32 s43, s8
+; GFX7LESS-NEXT: s_mov_b32 s50, s9
+; GFX7LESS-NEXT: s_mov_b32 s51, s8
; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0
; GFX7LESS-NEXT: s_getpc_b64 s[0:1]
@@ -7590,15 +7590,15 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-NEXT: v_or_b32_e32 v40, v0, v2
-; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-NEXT: s_mov_b32 s12, s43
-; GFX7LESS-NEXT: s_mov_b32 s13, s42
+; GFX7LESS-NEXT: s_mov_b32 s12, s51
+; GFX7LESS-NEXT: s_mov_b32 s13, s50
; GFX7LESS-NEXT: s_mov_b32 s14, s33
; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -7625,19 +7625,19 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX7LESS-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX7LESS-NEXT: s_cbranch_execz .LBB11_5
; GFX7LESS-NEXT: ; %bb.3:
-; GFX7LESS-NEXT: s_load_dwordx2 s[44:45], s[36:37], 0x9
-; GFX7LESS-NEXT: s_mov_b32 s47, 0xf000
-; GFX7LESS-NEXT: s_mov_b32 s46, -1
+; GFX7LESS-NEXT: s_load_dwordx2 s[64:65], s[36:37], 0x9
+; GFX7LESS-NEXT: s_mov_b32 s67, 0xf000
+; GFX7LESS-NEXT: s_mov_b32 s66, -1
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[44:47], 0
-; GFX7LESS-NEXT: s_mov_b64 s[46:47], 0
+; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[64:67], 0
+; GFX7LESS-NEXT: s_mov_b64 s[52:53], 0
; GFX7LESS-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
; GFX7LESS-NEXT: .LBB11_4: ; %atomicrmw.start
; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7LESS-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-NEXT: v_max_f64 v[2:3], v[0:1], v[0:1]
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0
+; GFX7LESS-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:4
+; GFX7LESS-NEXT: buffer_store_dword v0, off, s[80:83], 0
; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
; GFX7LESS-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-NEXT: v_max_f64 v[0:1], v[2:3], v[41:42]
@@ -7645,8 +7645,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX7LESS-NEXT: s_getpc_b64 s[0:1]
; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX7LESS-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
+; GFX7LESS-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:12
+; GFX7LESS-NEXT: buffer_store_dword v0, off, s[80:83], 0 offset:8
; GFX7LESS-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX7LESS-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-NEXT: v_mov_b32_e32 v0, 8
@@ -7655,43 +7655,43 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX7LESS-NEXT: v_mov_b32_e32 v5, 8
; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-NEXT: s_mov_b32 s12, s43
-; GFX7LESS-NEXT: s_mov_b32 s13, s42
+; GFX7LESS-NEXT: s_mov_b32 s12, s51
+; GFX7LESS-NEXT: s_mov_b32 s13, s50
; GFX7LESS-NEXT: s_mov_b32 s14, s33
; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX7LESS-NEXT: v_mov_b32_e32 v2, s44
-; GFX7LESS-NEXT: v_mov_b32_e32 v3, s45
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83]
+; GFX7LESS-NEXT: v_mov_b32_e32 v2, s64
+; GFX7LESS-NEXT: v_mov_b32_e32 v3, s65
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-NEXT: buffer_load_dword v0, off, s[48:51], 0
-; GFX7LESS-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4
+; GFX7LESS-NEXT: buffer_load_dword v0, off, s[80:83], 0
+; GFX7LESS-NEXT: buffer_load_dword v1, off, s[80:83], 0 offset:4
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX7LESS-NEXT: s_or_b64 s[52:53], vcc, s[52:53]
+; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[52:53]
; GFX7LESS-NEXT: s_cbranch_execnz .LBB11_4
; GFX7LESS-NEXT: .LBB11_5:
; GFX7LESS-NEXT: s_endpgm
;
; GFX9-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s11
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s66, -1
+; GFX9-NEXT: s_mov_b32 s67, 0xe00000
+; GFX9-NEXT: s_add_u32 s64, s64, s11
+; GFX9-NEXT: s_addc_u32 s65, s65, 0
; GFX9-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX9-NEXT: s_mov_b32 s43, s8
+; GFX9-NEXT: s_mov_b32 s51, s8
; GFX9-NEXT: s_add_u32 s8, s36, 44
-; GFX9-NEXT: s_mov_b32 s42, s9
+; GFX9-NEXT: s_mov_b32 s50, s9
; GFX9-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX9-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX9-NEXT: s_getpc_b64 s[0:1]
; GFX9-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
@@ -7700,17 +7700,17 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b32 s33, s10
; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7]
-; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX9-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-NEXT: s_mov_b32 s12, s43
-; GFX9-NEXT: s_mov_b32 s13, s42
+; GFX9-NEXT: s_mov_b32 s12, s51
+; GFX9-NEXT: s_mov_b32 s13, s50
; GFX9-NEXT: s_mov_b32 s14, s33
; GFX9-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX9-NEXT: s_movk_i32 s32, 0x800
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -7737,12 +7737,12 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX9-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX9-NEXT: s_cbranch_execz .LBB11_5
; GFX9-NEXT: ; %bb.3:
-; GFX9-NEXT: s_load_dwordx2 s[44:45], s[36:37], 0x24
+; GFX9-NEXT: s_load_dwordx2 s[52:53], s[36:37], 0x24
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
-; GFX9-NEXT: s_mov_b64 s[46:47], 0
+; GFX9-NEXT: s_mov_b64 s[62:63], 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: global_load_dwordx2 v[4:5], v0, s[44:45]
+; GFX9-NEXT: global_load_dwordx2 v[4:5], v0, s[52:53]
; GFX9-NEXT: .LBB11_4: ; %atomicrmw.start
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-NEXT: s_waitcnt vmcnt(0)
@@ -7753,54 +7753,54 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX9-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
; GFX9-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-NEXT: buffer_store_dword v5, off, s[48:51], 0 offset:4
-; GFX9-NEXT: buffer_store_dword v4, off, s[48:51], 0
-; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX9-NEXT: buffer_store_dword v5, off, s[64:67], 0 offset:4
+; GFX9-NEXT: buffer_store_dword v4, off, s[64:67], 0
+; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[41:42]
-; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-NEXT: s_mov_b32 s12, s43
-; GFX9-NEXT: s_mov_b32 s13, s42
+; GFX9-NEXT: s_mov_b32 s12, s51
+; GFX9-NEXT: s_mov_b32 s13, s50
; GFX9-NEXT: s_mov_b32 s14, s33
; GFX9-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX9-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
+; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX9-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12
+; GFX9-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8
; GFX9-NEXT: v_mov_b32_e32 v0, 8
; GFX9-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-NEXT: v_mov_b32_e32 v2, s44
-; GFX9-NEXT: v_mov_b32_e32 v3, s45
+; GFX9-NEXT: v_mov_b32_e32 v2, s52
+; GFX9-NEXT: v_mov_b32_e32 v3, s53
; GFX9-NEXT: v_mov_b32_e32 v4, 0
; GFX9-NEXT: v_mov_b32_e32 v5, 8
; GFX9-NEXT: v_mov_b32_e32 v6, 0
; GFX9-NEXT: v_mov_b32_e32 v7, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX9-NEXT: buffer_load_dword v4, off, s[48:51], 0
-; GFX9-NEXT: buffer_load_dword v5, off, s[48:51], 0 offset:4
+; GFX9-NEXT: buffer_load_dword v4, off, s[64:67], 0
+; GFX9-NEXT: buffer_load_dword v5, off, s[64:67], 0 offset:4
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX9-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX9-NEXT: s_cbranch_execnz .LBB11_4
; GFX9-NEXT: .LBB11_5:
; GFX9-NEXT: s_endpgm
;
; GFX1064-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s50, -1
-; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s66, -1
+; GFX1064-NEXT: s_mov_b32 s67, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s64, s64, s11
; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-NEXT: s_mov_b32 s43, s8
+; GFX1064-NEXT: s_addc_u32 s65, s65, 0
+; GFX1064-NEXT: s_mov_b32 s51, s8
; GFX1064-NEXT: s_add_u32 s8, s34, 44
-; GFX1064-NEXT: s_mov_b32 s42, s9
+; GFX1064-NEXT: s_mov_b32 s50, s9
; GFX1064-NEXT: s_addc_u32 s9, s35, 0
-; GFX1064-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1064-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1064-NEXT: s_getpc_b64 s[0:1]
; GFX1064-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
; GFX1064-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
@@ -7809,17 +7809,17 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1064-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1064-NEXT: s_mov_b32 s33, s10
; GFX1064-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-NEXT: s_mov_b32 s12, s43
+; GFX1064-NEXT: s_mov_b32 s12, s51
; GFX1064-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-NEXT: s_mov_b32 s13, s42
+; GFX1064-NEXT: s_mov_b32 s13, s50
; GFX1064-NEXT: s_mov_b32 s14, s33
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1064-NEXT: s_movk_i32 s32, 0x800
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -7846,12 +7846,12 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1064-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX1064-NEXT: s_cbranch_execz .LBB11_5
; GFX1064-NEXT: ; %bb.3:
-; GFX1064-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24
+; GFX1064-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24
; GFX1064-NEXT: v_mov_b32_e32 v0, 0
; GFX1064-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
-; GFX1064-NEXT: s_mov_b64 s[46:47], 0
+; GFX1064-NEXT: s_mov_b64 s[62:63], 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: global_load_dwordx2 v[4:5], v0, s[44:45]
+; GFX1064-NEXT: global_load_dwordx2 v[4:5], v0, s[52:53]
; GFX1064-NEXT: .LBB11_4: ; %atomicrmw.start
; GFX1064-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1064-NEXT: s_waitcnt vmcnt(0)
@@ -7861,56 +7861,56 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1064-NEXT: s_getpc_b64 s[0:1]
; GFX1064-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1064-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-NEXT: buffer_store_dword v5, off, s[48:51], 0 offset:4
-; GFX1064-NEXT: buffer_store_dword v4, off, s[48:51], 0
+; GFX1064-NEXT: buffer_store_dword v5, off, s[64:67], 0 offset:4
+; GFX1064-NEXT: buffer_store_dword v4, off, s[64:67], 0
; GFX1064-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1064-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-NEXT: v_mov_b32_e32 v2, s44
-; GFX1064-NEXT: v_mov_b32_e32 v3, s45
+; GFX1064-NEXT: v_mov_b32_e32 v2, s52
+; GFX1064-NEXT: v_mov_b32_e32 v3, s53
; GFX1064-NEXT: v_mov_b32_e32 v4, 0
; GFX1064-NEXT: v_mov_b32_e32 v5, 8
; GFX1064-NEXT: v_mov_b32_e32 v6, 0
; GFX1064-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-NEXT: s_mov_b32 s12, s43
-; GFX1064-NEXT: s_mov_b32 s13, s42
+; GFX1064-NEXT: s_mov_b32 s12, s51
+; GFX1064-NEXT: s_mov_b32 s13, s50
; GFX1064-NEXT: s_mov_b32 s14, s33
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1064-NEXT: v_max_f64 v[0:1], v[0:1], v[41:42]
-; GFX1064-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX1064-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
+; GFX1064-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12
+; GFX1064-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8
; GFX1064-NEXT: v_mov_b32_e32 v0, 8
; GFX1064-NEXT: v_mov_b32_e32 v1, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-NEXT: s_clause 0x1
-; GFX1064-NEXT: buffer_load_dword v4, off, s[48:51], 0
-; GFX1064-NEXT: buffer_load_dword v5, off, s[48:51], 0 offset:4
+; GFX1064-NEXT: buffer_load_dword v4, off, s[64:67], 0
+; GFX1064-NEXT: buffer_load_dword v5, off, s[64:67], 0 offset:4
; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1064-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX1064-NEXT: s_cbranch_execnz .LBB11_4
; GFX1064-NEXT: .LBB11_5:
; GFX1064-NEXT: s_endpgm
;
; GFX1032-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s50, -1
-; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s66, -1
+; GFX1032-NEXT: s_mov_b32 s67, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s64, s64, s11
; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-NEXT: s_mov_b32 s43, s8
+; GFX1032-NEXT: s_addc_u32 s65, s65, 0
+; GFX1032-NEXT: s_mov_b32 s51, s8
; GFX1032-NEXT: s_add_u32 s8, s34, 44
-; GFX1032-NEXT: s_mov_b32 s42, s9
+; GFX1032-NEXT: s_mov_b32 s50, s9
; GFX1032-NEXT: s_addc_u32 s9, s35, 0
-; GFX1032-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1032-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1032-NEXT: s_getpc_b64 s[0:1]
; GFX1032-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
; GFX1032-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
@@ -7919,17 +7919,17 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1032-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1032-NEXT: s_mov_b32 s33, s10
; GFX1032-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-NEXT: s_mov_b32 s12, s43
+; GFX1032-NEXT: s_mov_b32 s12, s51
; GFX1032-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-NEXT: s_mov_b32 s13, s42
+; GFX1032-NEXT: s_mov_b32 s13, s50
; GFX1032-NEXT: s_mov_b32 s14, s33
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1032-NEXT: s_movk_i32 s32, 0x400
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -7950,17 +7950,17 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1032-NEXT: s_cbranch_scc1 .LBB11_1
; GFX1032-NEXT: ; %bb.2: ; %ComputeEnd
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1032-NEXT: s_mov_b32 s46, 0
+; GFX1032-NEXT: s_mov_b32 s62, 0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-NEXT: s_xor_b32 s0, exec_lo, s0
; GFX1032-NEXT: s_cbranch_execz .LBB11_5
; GFX1032-NEXT: ; %bb.3:
-; GFX1032-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24
+; GFX1032-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24
; GFX1032-NEXT: v_mov_b32_e32 v0, 0
; GFX1032-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: global_load_dwordx2 v[4:5], v0, s[44:45]
+; GFX1032-NEXT: global_load_dwordx2 v[4:5], v0, s[52:53]
; GFX1032-NEXT: .LBB11_4: ; %atomicrmw.start
; GFX1032-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1032-NEXT: s_waitcnt vmcnt(0)
@@ -7970,38 +7970,38 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1032-NEXT: s_getpc_b64 s[0:1]
; GFX1032-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1032-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-NEXT: buffer_store_dword v5, off, s[48:51], 0 offset:4
-; GFX1032-NEXT: buffer_store_dword v4, off, s[48:51], 0
+; GFX1032-NEXT: buffer_store_dword v5, off, s[64:67], 0 offset:4
+; GFX1032-NEXT: buffer_store_dword v4, off, s[64:67], 0
; GFX1032-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1032-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-NEXT: v_mov_b32_e32 v2, s44
-; GFX1032-NEXT: v_mov_b32_e32 v3, s45
+; GFX1032-NEXT: v_mov_b32_e32 v2, s52
+; GFX1032-NEXT: v_mov_b32_e32 v3, s53
; GFX1032-NEXT: v_mov_b32_e32 v4, 0
; GFX1032-NEXT: v_mov_b32_e32 v5, 8
; GFX1032-NEXT: v_mov_b32_e32 v6, 0
; GFX1032-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-NEXT: s_mov_b32 s12, s43
-; GFX1032-NEXT: s_mov_b32 s13, s42
+; GFX1032-NEXT: s_mov_b32 s12, s51
+; GFX1032-NEXT: s_mov_b32 s13, s50
; GFX1032-NEXT: s_mov_b32 s14, s33
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1032-NEXT: v_max_f64 v[0:1], v[0:1], v[41:42]
-; GFX1032-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX1032-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
+; GFX1032-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12
+; GFX1032-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8
; GFX1032-NEXT: v_mov_b32_e32 v0, 8
; GFX1032-NEXT: v_mov_b32_e32 v1, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-NEXT: s_clause 0x1
-; GFX1032-NEXT: buffer_load_dword v4, off, s[48:51], 0
-; GFX1032-NEXT: buffer_load_dword v5, off, s[48:51], 0 offset:4
+; GFX1032-NEXT: buffer_load_dword v4, off, s[64:67], 0
+; GFX1032-NEXT: buffer_load_dword v5, off, s[64:67], 0 offset:4
; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s46
+; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
; GFX1032-NEXT: s_cbranch_execnz .LBB11_4
; GFX1032-NEXT: .LBB11_5:
; GFX1032-NEXT: s_endpgm
@@ -8009,11 +8009,11 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1164-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe:
; GFX1164: ; %bb.0:
; GFX1164-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1164-NEXT: s_mov_b32 s43, s8
+; GFX1164-NEXT: s_mov_b32 s51, s8
; GFX1164-NEXT: s_add_u32 s8, s34, 44
-; GFX1164-NEXT: s_mov_b32 s42, s9
+; GFX1164-NEXT: s_mov_b32 s50, s9
; GFX1164-NEXT: s_addc_u32 s9, s35, 0
-; GFX1164-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1164-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1164-NEXT: s_getpc_b64 s[0:1]
; GFX1164-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
; GFX1164-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
@@ -8021,15 +8021,15 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1164-NEXT: s_mov_b32 s33, s10
; GFX1164-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1164-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1164-NEXT: s_mov_b32 s12, s43
-; GFX1164-NEXT: s_mov_b32 s13, s42
+; GFX1164-NEXT: s_mov_b32 s12, s51
+; GFX1164-NEXT: s_mov_b32 s13, s50
; GFX1164-NEXT: s_mov_b32 s14, s33
; GFX1164-NEXT: s_mov_b32 s32, 32
; GFX1164-NEXT: v_mov_b32_e32 v40, v0
-; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1164-NEXT: v_mov_b32_e32 v2, 0
@@ -8059,12 +8059,12 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1164-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX1164-NEXT: s_cbranch_execz .LBB11_5
; GFX1164-NEXT: ; %bb.3:
-; GFX1164-NEXT: s_load_b64 s[44:45], s[34:35], 0x24
+; GFX1164-NEXT: s_load_b64 s[52:53], s[34:35], 0x24
; GFX1164-NEXT: v_mov_b32_e32 v0, 0
; GFX1164-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
-; GFX1164-NEXT: s_mov_b64 s[46:47], 0
+; GFX1164-NEXT: s_mov_b64 s[62:63], 0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: global_load_b64 v[4:5], v0, s[44:45]
+; GFX1164-NEXT: global_load_b64 v[4:5], v0, s[52:53]
; GFX1164-NEXT: s_set_inst_prefetch_distance 0x1
; GFX1164-NEXT: .p2align 6
; GFX1164-NEXT: .LBB11_4: ; %atomicrmw.start
@@ -8078,15 +8078,15 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1164-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
; GFX1164-NEXT: v_mov_b32_e32 v31, v40
; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1164-NEXT: v_mov_b32_e32 v2, s44
-; GFX1164-NEXT: v_mov_b32_e32 v3, s45
+; GFX1164-NEXT: v_mov_b32_e32 v2, s52
+; GFX1164-NEXT: v_mov_b32_e32 v3, s53
; GFX1164-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1164-NEXT: s_mov_b32 s12, s43
-; GFX1164-NEXT: s_mov_b32 s13, s42
+; GFX1164-NEXT: s_mov_b32 s12, s51
+; GFX1164-NEXT: s_mov_b32 s13, s50
; GFX1164-NEXT: s_mov_b32 s14, s33
; GFX1164-NEXT: v_max_f64 v[0:1], v[0:1], v[41:42]
; GFX1164-NEXT: scratch_store_b64 off, v[4:5], off
@@ -8101,8 +8101,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[46:47]
+; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63]
; GFX1164-NEXT: s_cbranch_execnz .LBB11_4
; GFX1164-NEXT: .LBB11_5:
; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2
@@ -8111,7 +8111,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1132-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe:
; GFX1132: ; %bb.0:
; GFX1132-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1132-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1132-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1132-NEXT: s_add_u32 s8, s34, 44
; GFX1132-NEXT: s_addc_u32 s9, s35, 0
; GFX1132-NEXT: s_getpc_b64 s[0:1]
@@ -8120,9 +8120,9 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1132-NEXT: v_mov_b32_e32 v31, v0
; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1132-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1132-NEXT: s_mov_b32 s42, s14
-; GFX1132-NEXT: s_mov_b32 s43, s13
-; GFX1132-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1132-NEXT: s_mov_b32 s50, s14
+; GFX1132-NEXT: s_mov_b32 s51, s13
+; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1132-NEXT: s_mov_b32 s12, s13
@@ -8131,7 +8131,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1132-NEXT: s_mov_b32 s32, 32
; GFX1132-NEXT: s_mov_b32 s33, s15
; GFX1132-NEXT: v_mov_b32_e32 v40, v0
-; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-NEXT: v_mov_b32_e32 v2, 0
@@ -8154,19 +8154,19 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1132-NEXT: s_cbranch_scc1 .LBB11_1
; GFX1132-NEXT: ; %bb.2: ; %ComputeEnd
; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1132-NEXT: s_mov_b32 s46, 0
+; GFX1132-NEXT: s_mov_b32 s62, 0
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-NEXT: s_xor_b32 s0, exec_lo, s0
; GFX1132-NEXT: s_cbranch_execz .LBB11_5
; GFX1132-NEXT: ; %bb.3:
-; GFX1132-NEXT: s_load_b64 s[44:45], s[34:35], 0x24
+; GFX1132-NEXT: s_load_b64 s[52:53], s[34:35], 0x24
; GFX1132-NEXT: v_mov_b32_e32 v0, 0
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX1132-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: global_load_b64 v[4:5], v0, s[44:45]
+; GFX1132-NEXT: global_load_b64 v[4:5], v0, s[52:53]
; GFX1132-NEXT: s_set_inst_prefetch_distance 0x1
; GFX1132-NEXT: .p2align 6
; GFX1132-NEXT: .LBB11_4: ; %atomicrmw.start
@@ -8180,16 +8180,16 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1132-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
; GFX1132-NEXT: v_mov_b32_e32 v31, v40
; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1132-NEXT: v_mov_b32_e32 v3, s45
+; GFX1132-NEXT: v_mov_b32_e32 v3, s53
; GFX1132-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1132-NEXT: s_mov_b32 s12, s43
-; GFX1132-NEXT: s_mov_b32 s13, s42
+; GFX1132-NEXT: s_mov_b32 s12, s51
+; GFX1132-NEXT: s_mov_b32 s13, s50
; GFX1132-NEXT: s_mov_b32 s14, s33
; GFX1132-NEXT: v_mov_b32_e32 v6, 0
-; GFX1132-NEXT: v_mov_b32_e32 v2, s44
+; GFX1132-NEXT: v_mov_b32_e32 v2, s52
; GFX1132-NEXT: v_max_f64 v[0:1], v[0:1], v[41:42]
; GFX1132-NEXT: scratch_store_b64 off, v[4:5], off
; GFX1132-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v5, 8
@@ -8201,8 +8201,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46
+; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
; GFX1132-NEXT: s_cbranch_execnz .LBB11_4
; GFX1132-NEXT: .LBB11_5:
; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2
@@ -8211,22 +8211,22 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX7LESS-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s82, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s83, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s80, s80, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s81, s81, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s10
-; GFX7LESS-DPP-NEXT: s_mov_b32 s42, s9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s43, s8
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, s9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s47, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s46, -1
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[64:65], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
@@ -8237,34 +8237,34 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v40, v0, v2
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s43
-; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s42
+; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51
+; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[2:3], off, s[44:47], 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[2:3], off, s[64:67], 0
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[52:53], 0
; GFX7LESS-DPP-NEXT: v_max_f64 v[41:42], v[0:1], v[0:1]
; GFX7LESS-DPP-NEXT: .LBB11_1: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_max_f64 v[0:1], v[2:3], v[2:3]
-; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:4
-; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0
+; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:4
+; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
; GFX7LESS-DPP-NEXT: v_max_f64 v[0:1], v[0:1], v[41:42]
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX7LESS-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
+; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:12
+; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[80:83], 0 offset:8
; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v0, 8
@@ -8273,43 +8273,43 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s43
-; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s42
+; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51
+; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s44
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
+; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s64
+; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s65
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v3, off, s[48:51], 0 offset:4
+; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[80:83], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dword v3, off, s[80:83], 0 offset:4
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX7LESS-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX7LESS-DPP-NEXT: s_or_b64 s[52:53], vcc, s[52:53]
+; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[52:53]
; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB11_1
; GFX7LESS-DPP-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX7LESS-DPP-NEXT: s_endpgm
;
; GFX9-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s54, -1
-; GFX9-DPP-NEXT: s_mov_b32 s55, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s52, s52, s11
-; GFX9-DPP-NEXT: s_addc_u32 s53, s53, 0
+; GFX9-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s82, -1
+; GFX9-DPP-NEXT: s_mov_b32 s83, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s80, s80, s11
+; GFX9-DPP-NEXT: s_addc_u32 s81, s81, 0
; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX9-DPP-NEXT: s_mov_b32 s43, s8
+; GFX9-DPP-NEXT: s_mov_b32 s51, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX9-DPP-NEXT: s_mov_b32 s42, s9
+; GFX9-DPP-NEXT: s_mov_b32 s50, s9
; GFX9-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX9-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX9-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX9-DPP-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
; GFX9-DPP-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
@@ -8318,17 +8318,17 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b32 s33, s10
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
-; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[52:53]
-; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
+; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-DPP-NEXT: s_mov_b32 s12, s43
-; GFX9-DPP-NEXT: s_mov_b32 s13, s42
+; GFX9-DPP-NEXT: s_mov_b32 s12, s51
+; GFX9-DPP-NEXT: s_mov_b32 s13, s50
; GFX9-DPP-NEXT: s_mov_b32 s14, s33
; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[54:55]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -8382,20 +8382,20 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v1, exec_hi, v1
; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX9-DPP-NEXT: v_readlane_b32 s45, v9, 63
-; GFX9-DPP-NEXT: v_readlane_b32 s44, v8, 63
+; GFX9-DPP-NEXT: v_readlane_b32 s53, v9, 63
+; GFX9-DPP-NEXT: v_readlane_b32 s52, v8, 63
; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1]
; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9-DPP-NEXT: s_cbranch_execz .LBB11_3
; GFX9-DPP-NEXT: ; %bb.1:
-; GFX9-DPP-NEXT: s_load_dwordx2 s[46:47], s[36:37], 0x24
-; GFX9-DPP-NEXT: s_mov_b64 s[48:49], 0
+; GFX9-DPP-NEXT: s_load_dwordx2 s[62:63], s[36:37], 0x24
+; GFX9-DPP-NEXT: s_mov_b64 s[64:65], 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[46:47]
+; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[62:63]
; GFX9-DPP-NEXT: .LBB11_2: ; %atomicrmw.start
; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX9-DPP-NEXT: v_max_f64 v[3:4], s[44:45], s[44:45]
+; GFX9-DPP-NEXT: v_max_f64 v[3:4], s[52:53], s[52:53]
; GFX9-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX9-DPP-NEXT: v_max_f64 v[5:6], v[1:2], v[1:2]
; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44
@@ -8404,54 +8404,54 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[52:53]
-; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[52:55], 0 offset:4
-; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[52:55], 0
-; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
+; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:4
+; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0
+; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-DPP-NEXT: v_max_f64 v[3:4], v[5:6], v[3:4]
-; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-DPP-NEXT: s_mov_b32 s12, s43
-; GFX9-DPP-NEXT: s_mov_b32 s13, s42
+; GFX9-DPP-NEXT: s_mov_b32 s12, s51
+; GFX9-DPP-NEXT: s_mov_b32 s13, s50
; GFX9-DPP-NEXT: s_mov_b32 s14, s33
; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[52:55], 0 offset:12
-; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[52:55], 0 offset:8
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[54:55]
+; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[80:83], 0 offset:12
+; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:8
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s46
-; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s47
+; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s62
+; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s63
; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[52:55], 0
-; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[52:55], 0 offset:4
+; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[80:83], 0
+; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[80:83], 0 offset:4
; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-DPP-NEXT: s_or_b64 s[48:49], vcc, s[48:49]
-; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[48:49]
+; GFX9-DPP-NEXT: s_or_b64 s[64:65], vcc, s[64:65]
+; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[64:65]
; GFX9-DPP-NEXT: s_cbranch_execnz .LBB11_2
; GFX9-DPP-NEXT: .LBB11_3:
; GFX9-DPP-NEXT: s_endpgm
;
; GFX1064-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s66, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s67, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s64, s64, s11
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-DPP-NEXT: s_mov_b32 s43, s8
+; GFX1064-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX1064-DPP-NEXT: s_mov_b32 s51, s8
; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1064-DPP-NEXT: s_mov_b32 s42, s9
+; GFX1064-DPP-NEXT: s_mov_b32 s50, s9
; GFX1064-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1064-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX1064-DPP-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
@@ -8460,17 +8460,17 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1064-DPP-NEXT: s_mov_b32 s33, s10
; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-DPP-NEXT: s_mov_b32 s12, s43
+; GFX1064-DPP-NEXT: s_mov_b32 s12, s51
; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1064-DPP-NEXT: s_mov_b32 s13, s50
; GFX1064-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1064-DPP-NEXT: s_movk_i32 s32, 0x800
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -8523,10 +8523,10 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064-DPP-NEXT: s_cbranch_execz .LBB11_3
; GFX1064-DPP-NEXT: ; %bb.1:
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24
-; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], 0
+; GFX1064-DPP-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24
+; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45]
+; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53]
; GFX1064-DPP-NEXT: .LBB11_2: ; %atomicrmw.start
; GFX1064-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1064-DPP-NEXT: v_max_f64 v[3:4], v[41:42], v[41:42]
@@ -8537,56 +8537,56 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX1064-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
+; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX1064-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40
; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s44
+; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s52
; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1064-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1064-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1064-DPP-NEXT: s_mov_b32 s13, s50
; GFX1064-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1064-DPP-NEXT: v_max_f64 v[3:4], v[5:6], v[3:4]
; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX1064-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX1064-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-DPP-NEXT: s_clause 0x1
-; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB11_2
; GFX1064-DPP-NEXT: .LBB11_3:
; GFX1064-DPP-NEXT: s_endpgm
;
; GFX1032-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s66, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s67, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s64, s64, s11
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-DPP-NEXT: s_mov_b32 s43, s8
+; GFX1032-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX1032-DPP-NEXT: s_mov_b32 s51, s8
; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1032-DPP-NEXT: s_mov_b32 s42, s9
+; GFX1032-DPP-NEXT: s_mov_b32 s50, s9
; GFX1032-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1032-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX1032-DPP-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
@@ -8595,17 +8595,17 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1032-DPP-NEXT: s_mov_b32 s33, s10
; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-DPP-NEXT: s_mov_b32 s12, s43
+; GFX1032-DPP-NEXT: s_mov_b32 s12, s51
; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1032-DPP-NEXT: s_mov_b32 s13, s50
; GFX1032-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -8646,15 +8646,15 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, v8
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, v9
-; GFX1032-DPP-NEXT: s_mov_b32 s46, 0
+; GFX1032-DPP-NEXT: s_mov_b32 s62, 0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB11_3
; GFX1032-DPP-NEXT: ; %bb.1:
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24
+; GFX1032-DPP-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24
; GFX1032-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4]
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45]
+; GFX1032-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53]
; GFX1032-DPP-NEXT: .LBB11_2: ; %atomicrmw.start
; GFX1032-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1032-DPP-NEXT: s_waitcnt vmcnt(0)
@@ -8664,38 +8664,38 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX1032-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
+; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX1032-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s44
+; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s52
; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX1032-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1032-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1032-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1032-DPP-NEXT: s_mov_b32 s13, s50
; GFX1032-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1032-DPP-NEXT: v_max_f64 v[3:4], v[3:4], v[41:42]
-; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-DPP-NEXT: s_clause 0x1
-; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-DPP-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s46
+; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB11_2
; GFX1032-DPP-NEXT: .LBB11_3:
; GFX1032-DPP-NEXT: s_endpgm
@@ -8703,11 +8703,11 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1164-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe:
; GFX1164-DPP: ; %bb.0:
; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1164-DPP-NEXT: s_mov_b32 s43, s8
+; GFX1164-DPP-NEXT: s_mov_b32 s51, s8
; GFX1164-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1164-DPP-NEXT: s_mov_b32 s42, s9
+; GFX1164-DPP-NEXT: s_mov_b32 s50, s9
; GFX1164-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1164-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1164-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX1164-DPP-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
; GFX1164-DPP-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
@@ -8715,15 +8715,15 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1164-DPP-NEXT: s_mov_b32 s33, s10
; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1164-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1164-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1164-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1164-DPP-NEXT: s_mov_b32 s13, s50
; GFX1164-DPP-NEXT: s_mov_b32 s14, s33
; GFX1164-DPP-NEXT: s_mov_b32 s32, 32
; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0
-; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -8787,11 +8787,11 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1
; GFX1164-DPP-NEXT: s_cbranch_execz .LBB11_3
; GFX1164-DPP-NEXT: ; %bb.1:
-; GFX1164-DPP-NEXT: s_load_b64 s[44:45], s[34:35], 0x24
+; GFX1164-DPP-NEXT: s_load_b64 s[52:53], s[34:35], 0x24
; GFX1164-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4]
-; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], 0
+; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[44:45]
+; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[52:53]
; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x1
; GFX1164-DPP-NEXT: .p2align 6
; GFX1164-DPP-NEXT: .LBB11_2: ; %atomicrmw.start
@@ -8809,18 +8809,18 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1164-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1164-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1164-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1164-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1164-DPP-NEXT: s_mov_b32 s13, s50
; GFX1164-DPP-NEXT: s_mov_b32 s14, s33
; GFX1164-DPP-NEXT: v_max_f64 v[3:4], v[3:4], v[41:42]
; GFX1164-DPP-NEXT: scratch_store_b64 off, v[1:2], off
; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s44
+; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s52
; GFX1164-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
@@ -8828,8 +8828,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[46:47]
+; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63]
; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB11_2
; GFX1164-DPP-NEXT: .LBB11_3:
; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2
@@ -8838,7 +8838,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1132-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe:
; GFX1132-DPP: ; %bb.0:
; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1132-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1132-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1132-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1132-DPP-NEXT: s_addc_u32 s9, s35, 0
; GFX1132-DPP-NEXT: s_getpc_b64 s[0:1]
@@ -8847,9 +8847,9 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1132-DPP-NEXT: v_mov_b32_e32 v31, v0
; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1132-DPP-NEXT: s_mov_b32 s42, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s43, s13
-; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1132-DPP-NEXT: s_mov_b32 s50, s14
+; GFX1132-DPP-NEXT: s_mov_b32 s51, s13
+; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
@@ -8858,7 +8858,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1132-DPP-NEXT: s_mov_b32 s32, 32
; GFX1132-DPP-NEXT: s_mov_b32 s33, s15
; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0
-; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
@@ -8904,16 +8904,16 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1132-DPP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v3, v8
; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
; GFX1132-DPP-NEXT: v_mov_b32_e32 v4, v9
-; GFX1132-DPP-NEXT: s_mov_b32 s46, 0
+; GFX1132-DPP-NEXT: s_mov_b32 s62, 0
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1
; GFX1132-DPP-NEXT: s_cbranch_execz .LBB11_3
; GFX1132-DPP-NEXT: ; %bb.1:
-; GFX1132-DPP-NEXT: s_load_b64 s[44:45], s[34:35], 0x24
+; GFX1132-DPP-NEXT: s_load_b64 s[52:53], s[34:35], 0x24
; GFX1132-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[44:45]
+; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[52:53]
; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x1
; GFX1132-DPP-NEXT: .p2align 6
; GFX1132-DPP-NEXT: .LBB11_2: ; %atomicrmw.start
@@ -8929,26 +8929,26 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1132-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1132-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1132-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1132-DPP-NEXT: s_mov_b32 s13, s50
; GFX1132-DPP-NEXT: s_mov_b32 s14, s33
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX1132-DPP-NEXT: v_max_f64 v[3:4], v[3:4], v[41:42]
; GFX1132-DPP-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44
+; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s52
; GFX1132-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0
+; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s53 :: v_dual_mov_b32 v4, 0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-DPP-NEXT: scratch_load_b64 v[1:2], off, off
; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-DPP-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46
+; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB11_2
; GFX1132-DPP-NEXT: .LBB11_3:
; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll
index 68d7dcc60506c..81a16df17c728 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll
@@ -273,12 +273,12 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
; GFX7LESS-LABEL: global_atomic_fmin_uni_address_div_value_agent_scope_unsafe:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s38, -1
-; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s36, s36, s11
-; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0
+; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
+; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
; GFX7LESS-NEXT: s_mov_b32 s14, s10
; GFX7LESS-NEXT: s_mov_b32 s13, s9
; GFX7LESS-NEXT: s_mov_b32 s12, s8
@@ -296,8 +296,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -349,12 +349,12 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
;
; GFX9-LABEL: global_atomic_fmin_uni_address_div_value_agent_scope_unsafe:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s11
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s11
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-NEXT: s_mov_b32 s12, s8
; GFX9-NEXT: s_add_u32 s8, s34, 44
@@ -370,9 +370,9 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -420,13 +420,13 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
;
; GFX1064-LABEL: global_atomic_fmin_uni_address_div_value_agent_scope_unsafe:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s38, -1
-; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s36, s36, s11
+; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s50, -1
+; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s48, s48, s11
; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-NEXT: s_addc_u32 s49, s49, 0
; GFX1064-NEXT: s_mov_b32 s12, s8
; GFX1064-NEXT: s_add_u32 s8, s34, 44
; GFX1064-NEXT: s_mov_b32 s13, s9
@@ -442,8 +442,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1064-NEXT: s_mov_b32 s32, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -477,13 +477,13 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
;
; GFX1032-LABEL: global_atomic_fmin_uni_address_div_value_agent_scope_unsafe:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s38, -1
-; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s36, s36, s11
+; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s50, -1
+; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s48, s48, s11
; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-NEXT: s_mov_b32 s12, s8
; GFX1032-NEXT: s_add_u32 s8, s34, 44
; GFX1032-NEXT: s_mov_b32 s13, s9
@@ -499,8 +499,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1032-NEXT: s_mov_b32 s32, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -635,19 +635,19 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
; GFX7LESS-DPP-LABEL: global_atomic_fmin_uni_address_div_value_agent_scope_unsafe:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s42, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s43, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s40, s40, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s41, s41, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5]
@@ -660,11 +660,11 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[40:41]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[42:43]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[36:39], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0
; GFX7LESS-DPP-NEXT: v_mul_f32_e32 v2, 1.0, v0
; GFX7LESS-DPP-NEXT: .LBB1_1: ; %atomicrmw.start
@@ -675,7 +675,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v1
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v0
-; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc
+; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v1
; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
@@ -687,12 +687,12 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
;
; GFX9-DPP-LABEL: global_atomic_fmin_uni_address_div_value_agent_scope_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s38, -1
-; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11
-; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s50, -1
+; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-DPP-NEXT: s_mov_b32 s12, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44
@@ -708,9 +708,9 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -778,13 +778,13 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
;
; GFX1064-DPP-LABEL: global_atomic_fmin_uni_address_div_value_agent_scope_unsafe:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s38, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX1064-DPP-NEXT: s_mov_b32 s12, s8
; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1064-DPP-NEXT: s_mov_b32 s13, s9
@@ -800,8 +800,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -851,13 +851,13 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
;
; GFX1032-DPP-LABEL: global_atomic_fmin_uni_address_div_value_agent_scope_unsafe:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s38, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-DPP-NEXT: s_mov_b32 s12, s8
; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1032-DPP-NEXT: s_mov_b32 s13, s9
@@ -873,8 +873,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -1312,12 +1312,12 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
; GFX7LESS-LABEL: global_atomic_fmin_uni_address_div_value_one_as_scope_unsafe:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s38, -1
-; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s36, s36, s11
-; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0
+; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
+; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
; GFX7LESS-NEXT: s_mov_b32 s14, s10
; GFX7LESS-NEXT: s_mov_b32 s13, s9
; GFX7LESS-NEXT: s_mov_b32 s12, s8
@@ -1335,8 +1335,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -1388,12 +1388,12 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
;
; GFX9-LABEL: global_atomic_fmin_uni_address_div_value_one_as_scope_unsafe:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s11
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s11
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-NEXT: s_mov_b32 s12, s8
; GFX9-NEXT: s_add_u32 s8, s34, 44
@@ -1409,9 +1409,9 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -1459,13 +1459,13 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
;
; GFX1064-LABEL: global_atomic_fmin_uni_address_div_value_one_as_scope_unsafe:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s38, -1
-; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s36, s36, s11
+; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s50, -1
+; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s48, s48, s11
; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-NEXT: s_addc_u32 s49, s49, 0
; GFX1064-NEXT: s_mov_b32 s12, s8
; GFX1064-NEXT: s_add_u32 s8, s34, 44
; GFX1064-NEXT: s_mov_b32 s13, s9
@@ -1481,8 +1481,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1064-NEXT: s_mov_b32 s32, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -1516,13 +1516,13 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
;
; GFX1032-LABEL: global_atomic_fmin_uni_address_div_value_one_as_scope_unsafe:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s38, -1
-; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s36, s36, s11
+; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s50, -1
+; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s48, s48, s11
; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-NEXT: s_mov_b32 s12, s8
; GFX1032-NEXT: s_add_u32 s8, s34, 44
; GFX1032-NEXT: s_mov_b32 s13, s9
@@ -1538,8 +1538,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1032-NEXT: s_mov_b32 s32, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -1674,19 +1674,19 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
; GFX7LESS-DPP-LABEL: global_atomic_fmin_uni_address_div_value_one_as_scope_unsafe:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s42, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s43, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s40, s40, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s41, s41, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5]
@@ -1699,11 +1699,11 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[40:41]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[42:43]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[36:39], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0
; GFX7LESS-DPP-NEXT: v_mul_f32_e32 v2, 1.0, v0
; GFX7LESS-DPP-NEXT: .LBB3_1: ; %atomicrmw.start
@@ -1714,7 +1714,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v1
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v0
-; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc
+; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v1
; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
@@ -1726,12 +1726,12 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
;
; GFX9-DPP-LABEL: global_atomic_fmin_uni_address_div_value_one_as_scope_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s38, -1
-; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11
-; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s50, -1
+; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-DPP-NEXT: s_mov_b32 s12, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44
@@ -1747,9 +1747,9 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -1817,13 +1817,13 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
;
; GFX1064-DPP-LABEL: global_atomic_fmin_uni_address_div_value_one_as_scope_unsafe:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s38, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX1064-DPP-NEXT: s_mov_b32 s12, s8
; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1064-DPP-NEXT: s_mov_b32 s13, s9
@@ -1839,8 +1839,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -1890,13 +1890,13 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
;
; GFX1032-DPP-LABEL: global_atomic_fmin_uni_address_div_value_one_as_scope_unsafe:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s38, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-DPP-NEXT: s_mov_b32 s12, s8
; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1032-DPP-NEXT: s_mov_b32 s13, s9
@@ -1912,8 +1912,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -2351,12 +2351,12 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
; GFX7LESS-LABEL: global_atomic_fmin_uni_address_div_value_default_scope_unsafe:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s38, -1
-; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s36, s36, s11
-; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0
+; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
+; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
; GFX7LESS-NEXT: s_mov_b32 s14, s10
; GFX7LESS-NEXT: s_mov_b32 s13, s9
; GFX7LESS-NEXT: s_mov_b32 s12, s8
@@ -2374,8 +2374,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -2427,12 +2427,12 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
;
; GFX9-LABEL: global_atomic_fmin_uni_address_div_value_default_scope_unsafe:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s11
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s11
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-NEXT: s_mov_b32 s12, s8
; GFX9-NEXT: s_add_u32 s8, s34, 44
@@ -2448,9 +2448,9 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -2498,13 +2498,13 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
;
; GFX1064-LABEL: global_atomic_fmin_uni_address_div_value_default_scope_unsafe:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s38, -1
-; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s36, s36, s11
+; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s50, -1
+; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s48, s48, s11
; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-NEXT: s_addc_u32 s49, s49, 0
; GFX1064-NEXT: s_mov_b32 s12, s8
; GFX1064-NEXT: s_add_u32 s8, s34, 44
; GFX1064-NEXT: s_mov_b32 s13, s9
@@ -2520,8 +2520,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1064-NEXT: s_mov_b32 s32, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -2555,13 +2555,13 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
;
; GFX1032-LABEL: global_atomic_fmin_uni_address_div_value_default_scope_unsafe:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s38, -1
-; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s36, s36, s11
+; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s50, -1
+; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s48, s48, s11
; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-NEXT: s_mov_b32 s12, s8
; GFX1032-NEXT: s_add_u32 s8, s34, 44
; GFX1032-NEXT: s_mov_b32 s13, s9
@@ -2577,8 +2577,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1032-NEXT: s_mov_b32 s32, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -2713,19 +2713,19 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
; GFX7LESS-DPP-LABEL: global_atomic_fmin_uni_address_div_value_default_scope_unsafe:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s42, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s43, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s40, s40, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s41, s41, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5]
@@ -2738,11 +2738,11 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[40:41]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[42:43]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[36:39], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0
; GFX7LESS-DPP-NEXT: v_mul_f32_e32 v2, 1.0, v0
; GFX7LESS-DPP-NEXT: .LBB5_1: ; %atomicrmw.start
@@ -2753,7 +2753,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v1
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v0
-; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc
+; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v1
; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
@@ -2765,12 +2765,12 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
;
; GFX9-DPP-LABEL: global_atomic_fmin_uni_address_div_value_default_scope_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s38, -1
-; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11
-; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s50, -1
+; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-DPP-NEXT: s_mov_b32 s12, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44
@@ -2786,9 +2786,9 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -2856,13 +2856,13 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
;
; GFX1064-DPP-LABEL: global_atomic_fmin_uni_address_div_value_default_scope_unsafe:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s38, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX1064-DPP-NEXT: s_mov_b32 s12, s8
; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1064-DPP-NEXT: s_mov_b32 s13, s9
@@ -2878,8 +2878,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -2929,13 +2929,13 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
;
; GFX1032-DPP-LABEL: global_atomic_fmin_uni_address_div_value_default_scope_unsafe:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s38, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-DPP-NEXT: s_mov_b32 s12, s8
; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1032-DPP-NEXT: s_mov_b32 s13, s9
@@ -2951,8 +2951,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -3135,13 +3135,13 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX7LESS-LABEL: global_atomic_fmin_double_uni_address_uni_value_agent_scope_unsafe:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
-; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
-; GFX7LESS-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX7LESS-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-NEXT: s_mov_b32 s67, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s64, s64, s11
+; GFX7LESS-NEXT: s_addc_u32 s65, s65, 0
+; GFX7LESS-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v3, exec_lo, 0
; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v3, exec_hi, v3
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
@@ -3149,15 +3149,15 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX7LESS-NEXT: s_cbranch_execz .LBB6_3
; GFX7LESS-NEXT: ; %bb.1:
; GFX7LESS-NEXT: s_mov_b32 s33, s10
-; GFX7LESS-NEXT: s_mov_b32 s42, s9
-; GFX7LESS-NEXT: s_mov_b32 s43, s8
+; GFX7LESS-NEXT: s_mov_b32 s50, s9
+; GFX7LESS-NEXT: s_mov_b32 s51, s8
; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX7LESS-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x9
+; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
-; GFX7LESS-NEXT: s_mov_b64 s[46:47], 0
+; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
+; GFX7LESS-NEXT: s_mov_b64 s[62:63], 0
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v3, v0, v1
@@ -3169,8 +3169,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7LESS-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-NEXT: v_max_f64 v[2:3], v[0:1], v[0:1]
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0
+; GFX7LESS-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:4
+; GFX7LESS-NEXT: buffer_store_dword v0, off, s[64:67], 0
; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
; GFX7LESS-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-NEXT: v_min_f64 v[0:1], v[2:3], 4.0
@@ -3178,8 +3178,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX7LESS-NEXT: s_getpc_b64 s[0:1]
; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX7LESS-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
+; GFX7LESS-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12
+; GFX7LESS-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8
; GFX7LESS-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX7LESS-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-NEXT: v_mov_b32_e32 v0, 8
@@ -3188,59 +3188,59 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX7LESS-NEXT: v_mov_b32_e32 v5, 8
; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-NEXT: s_mov_b32 s12, s43
-; GFX7LESS-NEXT: s_mov_b32 s13, s42
+; GFX7LESS-NEXT: s_mov_b32 s12, s51
+; GFX7LESS-NEXT: s_mov_b32 s13, s50
; GFX7LESS-NEXT: s_mov_b32 s14, s33
; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX7LESS-NEXT: v_mov_b32_e32 v2, s44
-; GFX7LESS-NEXT: v_mov_b32_e32 v3, s45
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX7LESS-NEXT: v_mov_b32_e32 v2, s52
+; GFX7LESS-NEXT: v_mov_b32_e32 v3, s53
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-NEXT: buffer_load_dword v0, off, s[48:51], 0
-; GFX7LESS-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4
+; GFX7LESS-NEXT: buffer_load_dword v0, off, s[64:67], 0
+; GFX7LESS-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX7LESS-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX7LESS-NEXT: s_cbranch_execnz .LBB6_2
; GFX7LESS-NEXT: .LBB6_3:
; GFX7LESS-NEXT: s_endpgm
;
; GFX9-LABEL: global_atomic_fmin_double_uni_address_uni_value_agent_scope_unsafe:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s66, -1
+; GFX9-NEXT: s_mov_b32 s67, 0xe00000
; GFX9-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX9-NEXT: s_add_u32 s48, s48, s11
+; GFX9-NEXT: s_add_u32 s64, s64, s11
; GFX9-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
-; GFX9-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX9-NEXT: s_addc_u32 s65, s65, 0
+; GFX9-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
; GFX9-NEXT: s_movk_i32 s32, 0x800
; GFX9-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9-NEXT: s_cbranch_execz .LBB6_3
; GFX9-NEXT: ; %bb.1:
-; GFX9-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24
+; GFX9-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24
; GFX9-NEXT: v_lshlrev_b32_e32 v3, 20, v2
; GFX9-NEXT: v_lshlrev_b32_e32 v4, 10, v1
; GFX9-NEXT: s_mov_b32 s33, s10
-; GFX9-NEXT: s_mov_b32 s42, s9
+; GFX9-NEXT: s_mov_b32 s50, s9
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
-; GFX9-NEXT: s_mov_b32 s43, s8
+; GFX9-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
+; GFX9-NEXT: s_mov_b32 s51, s8
; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX9-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v2, s1
-; GFX9-NEXT: s_mov_b64 s[46:47], 0
+; GFX9-NEXT: s_mov_b64 s[62:63], 0
; GFX9-NEXT: v_mov_b32_e32 v1, s0
; GFX9-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX9-NEXT: .LBB6_2: ; %atomicrmw.start
@@ -3253,36 +3253,36 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX9-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
; GFX9-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX9-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX9-NEXT: buffer_store_dword v1, off, s[64:67], 0
+; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-NEXT: v_min_f64 v[3:4], v[3:4], 4.0
-; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-NEXT: s_mov_b32 s12, s43
-; GFX9-NEXT: s_mov_b32 s13, s42
+; GFX9-NEXT: s_mov_b32 s12, s51
+; GFX9-NEXT: s_mov_b32 s13, s50
; GFX9-NEXT: s_mov_b32 s14, s33
; GFX9-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX9-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
+; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX9-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX9-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
; GFX9-NEXT: v_mov_b32_e32 v0, 8
; GFX9-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-NEXT: v_mov_b32_e32 v2, s44
-; GFX9-NEXT: v_mov_b32_e32 v3, s45
+; GFX9-NEXT: v_mov_b32_e32 v2, s52
+; GFX9-NEXT: v_mov_b32_e32 v3, s53
; GFX9-NEXT: v_mov_b32_e32 v4, 0
; GFX9-NEXT: v_mov_b32_e32 v5, 8
; GFX9-NEXT: v_mov_b32_e32 v6, 0
; GFX9-NEXT: v_mov_b32_e32 v7, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX9-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX9-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX9-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX9-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX9-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX9-NEXT: s_cbranch_execnz .LBB6_2
; GFX9-NEXT: .LBB6_3:
; GFX9-NEXT: s_endpgm
@@ -3290,32 +3290,32 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX1064-LABEL: global_atomic_fmin_double_uni_address_uni_value_agent_scope_unsafe:
; GFX1064: ; %bb.0:
; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s50, -1
-; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
+; GFX1064-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s66, -1
+; GFX1064-NEXT: s_mov_b32 s67, 0x31e16000
; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3
-; GFX1064-NEXT: s_add_u32 s48, s48, s11
-; GFX1064-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1064-NEXT: s_add_u32 s64, s64, s11
+; GFX1064-NEXT: s_addc_u32 s65, s65, 0
+; GFX1064-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1064-NEXT: s_movk_i32 s32, 0x800
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
; GFX1064-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064-NEXT: s_cbranch_execz .LBB6_3
; GFX1064-NEXT: ; %bb.1:
-; GFX1064-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24
+; GFX1064-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24
; GFX1064-NEXT: v_lshlrev_b32_e32 v3, 20, v2
; GFX1064-NEXT: v_lshlrev_b32_e32 v4, 10, v1
; GFX1064-NEXT: s_mov_b32 s33, s10
-; GFX1064-NEXT: s_mov_b32 s42, s9
-; GFX1064-NEXT: s_mov_b32 s43, s8
+; GFX1064-NEXT: s_mov_b32 s50, s9
+; GFX1064-NEXT: s_mov_b32 s51, s8
; GFX1064-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1064-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX1064-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX1064-NEXT: s_mov_b64 s[46:47], 0
+; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1064-NEXT: s_mov_b64 s[62:63], 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
+; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: v_mov_b32_e32 v2, s1
; GFX1064-NEXT: v_mov_b32_e32 v1, s0
@@ -3328,38 +3328,38 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX1064-NEXT: s_getpc_b64 s[0:1]
; GFX1064-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1064-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-NEXT: buffer_store_dword v1, off, s[48:51], 0
+; GFX1064-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX1064-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX1064-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1064-NEXT: v_mov_b32_e32 v31, v40
; GFX1064-NEXT: v_mov_b32_e32 v0, 8
; GFX1064-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-NEXT: v_mov_b32_e32 v2, s44
+; GFX1064-NEXT: v_mov_b32_e32 v2, s52
; GFX1064-NEXT: v_mov_b32_e32 v5, 8
; GFX1064-NEXT: v_mov_b32_e32 v6, 0
; GFX1064-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1064-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1064-NEXT: s_mov_b32 s12, s43
-; GFX1064-NEXT: s_mov_b32 s13, s42
+; GFX1064-NEXT: s_mov_b32 s12, s51
+; GFX1064-NEXT: s_mov_b32 s13, s50
; GFX1064-NEXT: s_mov_b32 s14, s33
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1064-NEXT: v_min_f64 v[3:4], v[3:4], 4.0
-; GFX1064-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-NEXT: v_mov_b32_e32 v3, s45
+; GFX1064-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX1064-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX1064-NEXT: v_mov_b32_e32 v3, s53
; GFX1064-NEXT: v_mov_b32_e32 v4, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-NEXT: s_clause 0x1
-; GFX1064-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX1064-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX1064-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1064-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX1064-NEXT: s_cbranch_execnz .LBB6_2
; GFX1064-NEXT: .LBB6_3:
; GFX1064-NEXT: s_endpgm
@@ -3367,31 +3367,31 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX1032-LABEL: global_atomic_fmin_double_uni_address_uni_value_agent_scope_unsafe:
; GFX1032: ; %bb.0:
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s50, -1
-; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX1032-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s66, -1
+; GFX1032-NEXT: s_mov_b32 s67, 0x31c16000
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
-; GFX1032-NEXT: s_add_u32 s48, s48, s11
-; GFX1032-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-NEXT: s_mov_b64 s[40:41], s[0:1]
-; GFX1032-NEXT: s_mov_b32 s46, 0
+; GFX1032-NEXT: s_add_u32 s64, s64, s11
+; GFX1032-NEXT: s_addc_u32 s65, s65, 0
+; GFX1032-NEXT: s_mov_b64 s[48:49], s[0:1]
+; GFX1032-NEXT: s_mov_b32 s62, 0
; GFX1032-NEXT: s_movk_i32 s32, 0x400
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-NEXT: s_cbranch_execz .LBB6_3
; GFX1032-NEXT: ; %bb.1:
-; GFX1032-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24
+; GFX1032-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24
; GFX1032-NEXT: v_lshlrev_b32_e32 v3, 20, v2
; GFX1032-NEXT: v_lshlrev_b32_e32 v4, 10, v1
; GFX1032-NEXT: s_mov_b32 s33, s10
-; GFX1032-NEXT: s_mov_b32 s42, s9
-; GFX1032-NEXT: s_mov_b32 s43, s8
+; GFX1032-NEXT: s_mov_b32 s50, s9
+; GFX1032-NEXT: s_mov_b32 s51, s8
; GFX1032-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1032-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX1032-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
+; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: v_mov_b32_e32 v2, s1
; GFX1032-NEXT: v_mov_b32_e32 v1, s0
@@ -3404,38 +3404,38 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX1032-NEXT: s_getpc_b64 s[0:1]
; GFX1032-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1032-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-NEXT: buffer_store_dword v1, off, s[48:51], 0
+; GFX1032-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX1032-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX1032-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1032-NEXT: v_mov_b32_e32 v31, v40
; GFX1032-NEXT: v_mov_b32_e32 v0, 8
; GFX1032-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-NEXT: v_mov_b32_e32 v2, s44
+; GFX1032-NEXT: v_mov_b32_e32 v2, s52
; GFX1032-NEXT: v_mov_b32_e32 v5, 8
; GFX1032-NEXT: v_mov_b32_e32 v6, 0
; GFX1032-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1032-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1032-NEXT: s_mov_b32 s12, s43
-; GFX1032-NEXT: s_mov_b32 s13, s42
+; GFX1032-NEXT: s_mov_b32 s12, s51
+; GFX1032-NEXT: s_mov_b32 s13, s50
; GFX1032-NEXT: s_mov_b32 s14, s33
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1032-NEXT: v_min_f64 v[3:4], v[3:4], 4.0
-; GFX1032-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1032-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1032-NEXT: v_mov_b32_e32 v3, s45
+; GFX1032-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX1032-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX1032-NEXT: v_mov_b32_e32 v3, s53
; GFX1032-NEXT: v_mov_b32_e32 v4, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-NEXT: s_clause 0x1
-; GFX1032-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1032-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX1032-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX1032-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s46
+; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
; GFX1032-NEXT: s_cbranch_execnz .LBB6_2
; GFX1032-NEXT: .LBB6_3:
; GFX1032-NEXT: s_endpgm
@@ -3444,7 +3444,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX1164: ; %bb.0:
; GFX1164-NEXT: v_mov_b32_e32 v40, v0
; GFX1164-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1164-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1164-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1164-NEXT: s_mov_b32 s32, 32
; GFX1164-NEXT: s_mov_b64 s[0:1], exec
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
@@ -3452,16 +3452,16 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX1164-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1164-NEXT: s_cbranch_execz .LBB6_3
; GFX1164-NEXT: ; %bb.1:
-; GFX1164-NEXT: s_load_b64 s[44:45], s[4:5], 0x24
+; GFX1164-NEXT: s_load_b64 s[52:53], s[4:5], 0x24
; GFX1164-NEXT: s_mov_b32 s33, s10
-; GFX1164-NEXT: s_mov_b32 s42, s9
-; GFX1164-NEXT: s_mov_b32 s43, s8
+; GFX1164-NEXT: s_mov_b32 s50, s9
+; GFX1164-NEXT: s_mov_b32 s51, s8
; GFX1164-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1164-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX1164-NEXT: s_mov_b64 s[46:47], 0
+; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1164-NEXT: s_mov_b64 s[62:63], 0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: s_load_b64 s[0:1], s[44:45], 0x0
+; GFX1164-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: v_mov_b32_e32 v2, s1
; GFX1164-NEXT: v_mov_b32_e32 v1, s0
@@ -3483,18 +3483,18 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX1164-NEXT: v_mov_b32_e32 v5, 8
; GFX1164-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1164-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1164-NEXT: s_mov_b32 s12, s43
-; GFX1164-NEXT: s_mov_b32 s13, s42
+; GFX1164-NEXT: s_mov_b32 s12, s51
+; GFX1164-NEXT: s_mov_b32 s13, s50
; GFX1164-NEXT: s_mov_b32 s14, s33
; GFX1164-NEXT: v_min_f64 v[3:4], v[3:4], 4.0
; GFX1164-NEXT: scratch_store_b64 off, v[1:2], off
; GFX1164-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-NEXT: v_mov_b32_e32 v2, s44
+; GFX1164-NEXT: v_mov_b32_e32 v2, s52
; GFX1164-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1164-NEXT: v_mov_b32_e32 v3, s45
+; GFX1164-NEXT: v_mov_b32_e32 v3, s53
; GFX1164-NEXT: v_mov_b32_e32 v4, 0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1]
@@ -3502,8 +3502,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[46:47]
+; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63]
; GFX1164-NEXT: s_cbranch_execnz .LBB6_2
; GFX1164-NEXT: .LBB6_3:
; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2
@@ -3513,22 +3513,22 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX1132: ; %bb.0:
; GFX1132-NEXT: v_mov_b32_e32 v40, v0
; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1132-NEXT: s_mov_b64 s[40:41], s[0:1]
-; GFX1132-NEXT: s_mov_b32 s46, 0
+; GFX1132-NEXT: s_mov_b64 s[48:49], s[0:1]
+; GFX1132-NEXT: s_mov_b32 s62, 0
; GFX1132-NEXT: s_mov_b32 s32, 32
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-NEXT: s_cbranch_execz .LBB6_3
; GFX1132-NEXT: ; %bb.1:
-; GFX1132-NEXT: s_load_b64 s[44:45], s[4:5], 0x24
+; GFX1132-NEXT: s_load_b64 s[52:53], s[4:5], 0x24
; GFX1132-NEXT: s_mov_b32 s33, s15
-; GFX1132-NEXT: s_mov_b32 s42, s14
-; GFX1132-NEXT: s_mov_b32 s43, s13
+; GFX1132-NEXT: s_mov_b32 s50, s14
+; GFX1132-NEXT: s_mov_b32 s51, s13
; GFX1132-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1132-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_load_b64 s[0:1], s[44:45], 0x0
+; GFX1132-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
; GFX1132-NEXT: s_set_inst_prefetch_distance 0x1
@@ -3547,26 +3547,26 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
; GFX1132-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1132-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1132-NEXT: s_mov_b32 s12, s43
-; GFX1132-NEXT: s_mov_b32 s13, s42
+; GFX1132-NEXT: s_mov_b32 s12, s51
+; GFX1132-NEXT: s_mov_b32 s13, s50
; GFX1132-NEXT: s_mov_b32 s14, s33
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX1132-NEXT: v_min_f64 v[3:4], v[3:4], 4.0
; GFX1132-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44
+; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s52
; GFX1132-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1132-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0
+; GFX1132-NEXT: v_dual_mov_b32 v3, s53 :: v_dual_mov_b32 v4, 0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-NEXT: scratch_load_b64 v[1:2], off, off
; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46
+; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
; GFX1132-NEXT: s_cbranch_execnz .LBB6_2
; GFX1132-NEXT: .LBB6_3:
; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2
@@ -3575,13 +3575,13 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX7LESS-DPP-LABEL: global_atomic_fmin_double_uni_address_uni_value_agent_scope_unsafe:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX7LESS-DPP-NEXT: v_mbcnt_lo_u32_b32_e64 v3, exec_lo, 0
; GFX7LESS-DPP-NEXT: v_mbcnt_hi_u32_b32_e32 v3, exec_hi, v3
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
@@ -3589,15 +3589,15 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX7LESS-DPP-NEXT: s_cbranch_execz .LBB6_3
; GFX7LESS-DPP-NEXT: ; %bb.1:
; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s10
-; GFX7LESS-DPP-NEXT: s_mov_b32 s42, s9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s43, s8
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, s9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], 0
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[62:63], 0
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v3, v0, v1
@@ -3609,8 +3609,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_max_f64 v[2:3], v[0:1], v[0:1]
-; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0
+; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:4
+; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[64:67], 0
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-DPP-NEXT: v_min_f64 v[0:1], v[2:3], 4.0
@@ -3618,8 +3618,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX7LESS-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
+; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12
+; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8
; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v0, 8
@@ -3628,59 +3628,59 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s43
-; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s42
+; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51
+; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s44
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s52
+; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-DPP-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[48:51], 0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4
+; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[64:67], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX7LESS-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB6_2
; GFX7LESS-DPP-NEXT: .LBB6_3:
; GFX7LESS-DPP-NEXT: s_endpgm
;
; GFX9-DPP-LABEL: global_atomic_fmin_double_uni_address_uni_value_agent_scope_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s50, -1
-; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s66, -1
+; GFX9-DPP-NEXT: s_mov_b32 s67, 0xe00000
; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX9-DPP-NEXT: s_add_u32 s64, s64, s11
; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3
-; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX9-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX9-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX9-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800
; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9-DPP-NEXT: s_cbranch_execz .LBB6_3
; GFX9-DPP-NEXT: ; %bb.1:
-; GFX9-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24
+; GFX9-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1
; GFX9-DPP-NEXT: s_mov_b32 s33, s10
-; GFX9-DPP-NEXT: s_mov_b32 s42, s9
+; GFX9-DPP-NEXT: s_mov_b32 s50, s9
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
-; GFX9-DPP-NEXT: s_mov_b32 s43, s8
+; GFX9-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
+; GFX9-DPP-NEXT: s_mov_b32 s51, s8
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s1
-; GFX9-DPP-NEXT: s_mov_b64 s[46:47], 0
+; GFX9-DPP-NEXT: s_mov_b64 s[62:63], 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v1, s0
; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX9-DPP-NEXT: .LBB6_2: ; %atomicrmw.start
@@ -3693,36 +3693,36 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0
+; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-DPP-NEXT: v_min_f64 v[3:4], v[3:4], 4.0
-; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-DPP-NEXT: s_mov_b32 s12, s43
-; GFX9-DPP-NEXT: s_mov_b32 s13, s42
+; GFX9-DPP-NEXT: s_mov_b32 s12, s51
+; GFX9-DPP-NEXT: s_mov_b32 s13, s50
; GFX9-DPP-NEXT: s_mov_b32 s14, s33
; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s44
-; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s52
+; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX9-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX9-DPP-NEXT: s_cbranch_execnz .LBB6_2
; GFX9-DPP-NEXT: .LBB6_3:
; GFX9-DPP-NEXT: s_endpgm
@@ -3730,32 +3730,32 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX1064-DPP-LABEL: global_atomic_fmin_double_uni_address_uni_value_agent_scope_unsafe:
; GFX1064-DPP: ; %bb.0:
; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
+; GFX1064-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s66, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s67, 0x31e16000
; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3
-; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
-; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1064-DPP-NEXT: s_add_u32 s64, s64, s11
+; GFX1064-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX1064-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1064-DPP-NEXT: s_movk_i32 s32, 0x800
; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064-DPP-NEXT: s_cbranch_execz .LBB6_3
; GFX1064-DPP-NEXT: ; %bb.1:
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24
+; GFX1064-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24
; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2
; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1
; GFX1064-DPP-NEXT: s_mov_b32 s33, s10
-; GFX1064-DPP-NEXT: s_mov_b32 s42, s9
-; GFX1064-DPP-NEXT: s_mov_b32 s43, s8
+; GFX1064-DPP-NEXT: s_mov_b32 s50, s9
+; GFX1064-DPP-NEXT: s_mov_b32 s51, s8
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], 0
+; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
+; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s1
; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, s0
@@ -3768,38 +3768,38 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX1064-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
+; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX1064-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40
; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s44
+; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s52
; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX1064-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1064-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1064-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1064-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1064-DPP-NEXT: s_mov_b32 s13, s50
; GFX1064-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1064-DPP-NEXT: v_min_f64 v[3:4], v[3:4], 4.0
-; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX1064-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-DPP-NEXT: s_clause 0x1
-; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB6_2
; GFX1064-DPP-NEXT: .LBB6_3:
; GFX1064-DPP-NEXT: s_endpgm
@@ -3807,31 +3807,31 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX1032-DPP-LABEL: global_atomic_fmin_double_uni_address_uni_value_agent_scope_unsafe:
; GFX1032-DPP: ; %bb.0:
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX1032-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s66, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s67, 0x31c16000
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
-; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
-; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
-; GFX1032-DPP-NEXT: s_mov_b32 s46, 0
+; GFX1032-DPP-NEXT: s_add_u32 s64, s64, s11
+; GFX1032-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX1032-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
+; GFX1032-DPP-NEXT: s_mov_b32 s62, 0
; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB6_3
; GFX1032-DPP-NEXT: ; %bb.1:
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24
+; GFX1032-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24
; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2
; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1
; GFX1032-DPP-NEXT: s_mov_b32 s33, s10
-; GFX1032-DPP-NEXT: s_mov_b32 s42, s9
-; GFX1032-DPP-NEXT: s_mov_b32 s43, s8
+; GFX1032-DPP-NEXT: s_mov_b32 s50, s9
+; GFX1032-DPP-NEXT: s_mov_b32 s51, s8
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
+; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s1
; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, s0
@@ -3844,38 +3844,38 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX1032-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
+; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX1032-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s44
+; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s52
; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX1032-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1032-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1032-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1032-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1032-DPP-NEXT: s_mov_b32 s13, s50
; GFX1032-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1032-DPP-NEXT: v_min_f64 v[3:4], v[3:4], 4.0
-; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-DPP-NEXT: s_clause 0x1
-; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-DPP-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s46
+; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB6_2
; GFX1032-DPP-NEXT: .LBB6_3:
; GFX1032-DPP-NEXT: s_endpgm
@@ -3884,7 +3884,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX1164-DPP: ; %bb.0:
; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0
; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1164-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1164-DPP-NEXT: s_mov_b32 s32, 32
; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec
; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
@@ -3892,16 +3892,16 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1164-DPP-NEXT: s_cbranch_execz .LBB6_3
; GFX1164-DPP-NEXT: ; %bb.1:
-; GFX1164-DPP-NEXT: s_load_b64 s[44:45], s[4:5], 0x24
+; GFX1164-DPP-NEXT: s_load_b64 s[52:53], s[4:5], 0x24
; GFX1164-DPP-NEXT: s_mov_b32 s33, s10
-; GFX1164-DPP-NEXT: s_mov_b32 s42, s9
-; GFX1164-DPP-NEXT: s_mov_b32 s43, s8
+; GFX1164-DPP-NEXT: s_mov_b32 s50, s9
+; GFX1164-DPP-NEXT: s_mov_b32 s51, s8
; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], 0
+; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[44:45], 0x0
+; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s1
; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, s0
@@ -3923,18 +3923,18 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX1164-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1164-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1164-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1164-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1164-DPP-NEXT: s_mov_b32 s13, s50
; GFX1164-DPP-NEXT: s_mov_b32 s14, s33
; GFX1164-DPP-NEXT: v_min_f64 v[3:4], v[3:4], 4.0
; GFX1164-DPP-NEXT: scratch_store_b64 off, v[1:2], off
; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s44
+; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s52
; GFX1164-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
@@ -3942,8 +3942,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[46:47]
+; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63]
; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB6_2
; GFX1164-DPP-NEXT: .LBB6_3:
; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2
@@ -3953,22 +3953,22 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX1132-DPP: ; %bb.0:
; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0
; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1132-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
-; GFX1132-DPP-NEXT: s_mov_b32 s46, 0
+; GFX1132-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
+; GFX1132-DPP-NEXT: s_mov_b32 s62, 0
; GFX1132-DPP-NEXT: s_mov_b32 s32, 32
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-DPP-NEXT: s_cbranch_execz .LBB6_3
; GFX1132-DPP-NEXT: ; %bb.1:
-; GFX1132-DPP-NEXT: s_load_b64 s[44:45], s[4:5], 0x24
+; GFX1132-DPP-NEXT: s_load_b64 s[52:53], s[4:5], 0x24
; GFX1132-DPP-NEXT: s_mov_b32 s33, s15
-; GFX1132-DPP-NEXT: s_mov_b32 s42, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s43, s13
+; GFX1132-DPP-NEXT: s_mov_b32 s50, s14
+; GFX1132-DPP-NEXT: s_mov_b32 s51, s13
; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[44:45], 0x0
+; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x1
@@ -3987,26 +3987,26 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1132-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1132-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1132-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1132-DPP-NEXT: s_mov_b32 s13, s50
; GFX1132-DPP-NEXT: s_mov_b32 s14, s33
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX1132-DPP-NEXT: v_min_f64 v[3:4], v[3:4], 4.0
; GFX1132-DPP-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44
+; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s52
; GFX1132-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0
+; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s53 :: v_dual_mov_b32 v4, 0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-DPP-NEXT: scratch_load_b64 v[1:2], off, off
; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-DPP-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46
+; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB6_2
; GFX1132-DPP-NEXT: .LBB6_3:
; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2
@@ -4019,19 +4019,19 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX7LESS-LABEL: global_atomic_fmin_double_uni_address_div_value_agent_scope_unsafe:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
-; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
+; GFX7LESS-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s82, -1
+; GFX7LESS-NEXT: s_mov_b32 s83, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s80, s80, s11
+; GFX7LESS-NEXT: s_addc_u32 s81, s81, 0
; GFX7LESS-NEXT: s_mov_b32 s33, s10
-; GFX7LESS-NEXT: s_mov_b32 s42, s9
-; GFX7LESS-NEXT: s_mov_b32 s43, s8
+; GFX7LESS-NEXT: s_mov_b32 s50, s9
+; GFX7LESS-NEXT: s_mov_b32 s51, s8
; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0
; GFX7LESS-NEXT: s_getpc_b64 s[0:1]
@@ -4042,15 +4042,15 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-NEXT: v_or_b32_e32 v40, v0, v2
-; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-NEXT: s_mov_b32 s12, s43
-; GFX7LESS-NEXT: s_mov_b32 s13, s42
+; GFX7LESS-NEXT: s_mov_b32 s12, s51
+; GFX7LESS-NEXT: s_mov_b32 s13, s50
; GFX7LESS-NEXT: s_mov_b32 s14, s33
; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -4077,19 +4077,19 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX7LESS-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX7LESS-NEXT: s_cbranch_execz .LBB7_5
; GFX7LESS-NEXT: ; %bb.3:
-; GFX7LESS-NEXT: s_load_dwordx2 s[44:45], s[36:37], 0x9
-; GFX7LESS-NEXT: s_mov_b32 s47, 0xf000
-; GFX7LESS-NEXT: s_mov_b32 s46, -1
+; GFX7LESS-NEXT: s_load_dwordx2 s[64:65], s[36:37], 0x9
+; GFX7LESS-NEXT: s_mov_b32 s67, 0xf000
+; GFX7LESS-NEXT: s_mov_b32 s66, -1
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[44:47], 0
-; GFX7LESS-NEXT: s_mov_b64 s[46:47], 0
+; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[64:67], 0
+; GFX7LESS-NEXT: s_mov_b64 s[52:53], 0
; GFX7LESS-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
; GFX7LESS-NEXT: .LBB7_4: ; %atomicrmw.start
; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7LESS-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-NEXT: v_max_f64 v[2:3], v[0:1], v[0:1]
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0
+; GFX7LESS-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:4
+; GFX7LESS-NEXT: buffer_store_dword v0, off, s[80:83], 0
; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
; GFX7LESS-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-NEXT: v_min_f64 v[0:1], v[2:3], v[41:42]
@@ -4097,8 +4097,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX7LESS-NEXT: s_getpc_b64 s[0:1]
; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX7LESS-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
+; GFX7LESS-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:12
+; GFX7LESS-NEXT: buffer_store_dword v0, off, s[80:83], 0 offset:8
; GFX7LESS-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX7LESS-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-NEXT: v_mov_b32_e32 v0, 8
@@ -4107,43 +4107,43 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX7LESS-NEXT: v_mov_b32_e32 v5, 8
; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-NEXT: s_mov_b32 s12, s43
-; GFX7LESS-NEXT: s_mov_b32 s13, s42
+; GFX7LESS-NEXT: s_mov_b32 s12, s51
+; GFX7LESS-NEXT: s_mov_b32 s13, s50
; GFX7LESS-NEXT: s_mov_b32 s14, s33
; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX7LESS-NEXT: v_mov_b32_e32 v2, s44
-; GFX7LESS-NEXT: v_mov_b32_e32 v3, s45
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83]
+; GFX7LESS-NEXT: v_mov_b32_e32 v2, s64
+; GFX7LESS-NEXT: v_mov_b32_e32 v3, s65
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-NEXT: buffer_load_dword v0, off, s[48:51], 0
-; GFX7LESS-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4
+; GFX7LESS-NEXT: buffer_load_dword v0, off, s[80:83], 0
+; GFX7LESS-NEXT: buffer_load_dword v1, off, s[80:83], 0 offset:4
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX7LESS-NEXT: s_or_b64 s[52:53], vcc, s[52:53]
+; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[52:53]
; GFX7LESS-NEXT: s_cbranch_execnz .LBB7_4
; GFX7LESS-NEXT: .LBB7_5:
; GFX7LESS-NEXT: s_endpgm
;
; GFX9-LABEL: global_atomic_fmin_double_uni_address_div_value_agent_scope_unsafe:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s11
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s66, -1
+; GFX9-NEXT: s_mov_b32 s67, 0xe00000
+; GFX9-NEXT: s_add_u32 s64, s64, s11
+; GFX9-NEXT: s_addc_u32 s65, s65, 0
; GFX9-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX9-NEXT: s_mov_b32 s43, s8
+; GFX9-NEXT: s_mov_b32 s51, s8
; GFX9-NEXT: s_add_u32 s8, s36, 44
-; GFX9-NEXT: s_mov_b32 s42, s9
+; GFX9-NEXT: s_mov_b32 s50, s9
; GFX9-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX9-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX9-NEXT: s_getpc_b64 s[0:1]
; GFX9-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
@@ -4152,17 +4152,17 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b32 s33, s10
; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7]
-; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX9-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-NEXT: s_mov_b32 s12, s43
-; GFX9-NEXT: s_mov_b32 s13, s42
+; GFX9-NEXT: s_mov_b32 s12, s51
+; GFX9-NEXT: s_mov_b32 s13, s50
; GFX9-NEXT: s_mov_b32 s14, s33
; GFX9-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX9-NEXT: s_movk_i32 s32, 0x800
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -4189,12 +4189,12 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX9-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX9-NEXT: s_cbranch_execz .LBB7_5
; GFX9-NEXT: ; %bb.3:
-; GFX9-NEXT: s_load_dwordx2 s[44:45], s[36:37], 0x24
+; GFX9-NEXT: s_load_dwordx2 s[52:53], s[36:37], 0x24
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
-; GFX9-NEXT: s_mov_b64 s[46:47], 0
+; GFX9-NEXT: s_mov_b64 s[62:63], 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: global_load_dwordx2 v[4:5], v0, s[44:45]
+; GFX9-NEXT: global_load_dwordx2 v[4:5], v0, s[52:53]
; GFX9-NEXT: .LBB7_4: ; %atomicrmw.start
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-NEXT: s_waitcnt vmcnt(0)
@@ -4205,54 +4205,54 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX9-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
; GFX9-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-NEXT: buffer_store_dword v5, off, s[48:51], 0 offset:4
-; GFX9-NEXT: buffer_store_dword v4, off, s[48:51], 0
-; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX9-NEXT: buffer_store_dword v5, off, s[64:67], 0 offset:4
+; GFX9-NEXT: buffer_store_dword v4, off, s[64:67], 0
+; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[41:42]
-; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-NEXT: s_mov_b32 s12, s43
-; GFX9-NEXT: s_mov_b32 s13, s42
+; GFX9-NEXT: s_mov_b32 s12, s51
+; GFX9-NEXT: s_mov_b32 s13, s50
; GFX9-NEXT: s_mov_b32 s14, s33
; GFX9-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX9-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
+; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX9-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12
+; GFX9-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8
; GFX9-NEXT: v_mov_b32_e32 v0, 8
; GFX9-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-NEXT: v_mov_b32_e32 v2, s44
-; GFX9-NEXT: v_mov_b32_e32 v3, s45
+; GFX9-NEXT: v_mov_b32_e32 v2, s52
+; GFX9-NEXT: v_mov_b32_e32 v3, s53
; GFX9-NEXT: v_mov_b32_e32 v4, 0
; GFX9-NEXT: v_mov_b32_e32 v5, 8
; GFX9-NEXT: v_mov_b32_e32 v6, 0
; GFX9-NEXT: v_mov_b32_e32 v7, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX9-NEXT: buffer_load_dword v4, off, s[48:51], 0
-; GFX9-NEXT: buffer_load_dword v5, off, s[48:51], 0 offset:4
+; GFX9-NEXT: buffer_load_dword v4, off, s[64:67], 0
+; GFX9-NEXT: buffer_load_dword v5, off, s[64:67], 0 offset:4
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX9-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX9-NEXT: s_cbranch_execnz .LBB7_4
; GFX9-NEXT: .LBB7_5:
; GFX9-NEXT: s_endpgm
;
; GFX1064-LABEL: global_atomic_fmin_double_uni_address_div_value_agent_scope_unsafe:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s50, -1
-; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s66, -1
+; GFX1064-NEXT: s_mov_b32 s67, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s64, s64, s11
; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-NEXT: s_mov_b32 s43, s8
+; GFX1064-NEXT: s_addc_u32 s65, s65, 0
+; GFX1064-NEXT: s_mov_b32 s51, s8
; GFX1064-NEXT: s_add_u32 s8, s34, 44
-; GFX1064-NEXT: s_mov_b32 s42, s9
+; GFX1064-NEXT: s_mov_b32 s50, s9
; GFX1064-NEXT: s_addc_u32 s9, s35, 0
-; GFX1064-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1064-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1064-NEXT: s_getpc_b64 s[0:1]
; GFX1064-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
; GFX1064-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
@@ -4261,17 +4261,17 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1064-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1064-NEXT: s_mov_b32 s33, s10
; GFX1064-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-NEXT: s_mov_b32 s12, s43
+; GFX1064-NEXT: s_mov_b32 s12, s51
; GFX1064-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-NEXT: s_mov_b32 s13, s42
+; GFX1064-NEXT: s_mov_b32 s13, s50
; GFX1064-NEXT: s_mov_b32 s14, s33
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1064-NEXT: s_movk_i32 s32, 0x800
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -4298,12 +4298,12 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1064-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX1064-NEXT: s_cbranch_execz .LBB7_5
; GFX1064-NEXT: ; %bb.3:
-; GFX1064-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24
+; GFX1064-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24
; GFX1064-NEXT: v_mov_b32_e32 v0, 0
; GFX1064-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
-; GFX1064-NEXT: s_mov_b64 s[46:47], 0
+; GFX1064-NEXT: s_mov_b64 s[62:63], 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: global_load_dwordx2 v[4:5], v0, s[44:45]
+; GFX1064-NEXT: global_load_dwordx2 v[4:5], v0, s[52:53]
; GFX1064-NEXT: .LBB7_4: ; %atomicrmw.start
; GFX1064-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1064-NEXT: s_waitcnt vmcnt(0)
@@ -4313,56 +4313,56 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1064-NEXT: s_getpc_b64 s[0:1]
; GFX1064-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1064-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-NEXT: buffer_store_dword v5, off, s[48:51], 0 offset:4
-; GFX1064-NEXT: buffer_store_dword v4, off, s[48:51], 0
+; GFX1064-NEXT: buffer_store_dword v5, off, s[64:67], 0 offset:4
+; GFX1064-NEXT: buffer_store_dword v4, off, s[64:67], 0
; GFX1064-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1064-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-NEXT: v_mov_b32_e32 v2, s44
-; GFX1064-NEXT: v_mov_b32_e32 v3, s45
+; GFX1064-NEXT: v_mov_b32_e32 v2, s52
+; GFX1064-NEXT: v_mov_b32_e32 v3, s53
; GFX1064-NEXT: v_mov_b32_e32 v4, 0
; GFX1064-NEXT: v_mov_b32_e32 v5, 8
; GFX1064-NEXT: v_mov_b32_e32 v6, 0
; GFX1064-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-NEXT: s_mov_b32 s12, s43
-; GFX1064-NEXT: s_mov_b32 s13, s42
+; GFX1064-NEXT: s_mov_b32 s12, s51
+; GFX1064-NEXT: s_mov_b32 s13, s50
; GFX1064-NEXT: s_mov_b32 s14, s33
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1064-NEXT: v_min_f64 v[0:1], v[0:1], v[41:42]
-; GFX1064-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX1064-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
+; GFX1064-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12
+; GFX1064-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8
; GFX1064-NEXT: v_mov_b32_e32 v0, 8
; GFX1064-NEXT: v_mov_b32_e32 v1, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-NEXT: s_clause 0x1
-; GFX1064-NEXT: buffer_load_dword v4, off, s[48:51], 0
-; GFX1064-NEXT: buffer_load_dword v5, off, s[48:51], 0 offset:4
+; GFX1064-NEXT: buffer_load_dword v4, off, s[64:67], 0
+; GFX1064-NEXT: buffer_load_dword v5, off, s[64:67], 0 offset:4
; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1064-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX1064-NEXT: s_cbranch_execnz .LBB7_4
; GFX1064-NEXT: .LBB7_5:
; GFX1064-NEXT: s_endpgm
;
; GFX1032-LABEL: global_atomic_fmin_double_uni_address_div_value_agent_scope_unsafe:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s50, -1
-; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s66, -1
+; GFX1032-NEXT: s_mov_b32 s67, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s64, s64, s11
; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-NEXT: s_mov_b32 s43, s8
+; GFX1032-NEXT: s_addc_u32 s65, s65, 0
+; GFX1032-NEXT: s_mov_b32 s51, s8
; GFX1032-NEXT: s_add_u32 s8, s34, 44
-; GFX1032-NEXT: s_mov_b32 s42, s9
+; GFX1032-NEXT: s_mov_b32 s50, s9
; GFX1032-NEXT: s_addc_u32 s9, s35, 0
-; GFX1032-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1032-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1032-NEXT: s_getpc_b64 s[0:1]
; GFX1032-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
; GFX1032-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
@@ -4371,17 +4371,17 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1032-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1032-NEXT: s_mov_b32 s33, s10
; GFX1032-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-NEXT: s_mov_b32 s12, s43
+; GFX1032-NEXT: s_mov_b32 s12, s51
; GFX1032-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-NEXT: s_mov_b32 s13, s42
+; GFX1032-NEXT: s_mov_b32 s13, s50
; GFX1032-NEXT: s_mov_b32 s14, s33
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1032-NEXT: s_movk_i32 s32, 0x400
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -4402,17 +4402,17 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1032-NEXT: s_cbranch_scc1 .LBB7_1
; GFX1032-NEXT: ; %bb.2: ; %ComputeEnd
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1032-NEXT: s_mov_b32 s46, 0
+; GFX1032-NEXT: s_mov_b32 s62, 0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-NEXT: s_xor_b32 s0, exec_lo, s0
; GFX1032-NEXT: s_cbranch_execz .LBB7_5
; GFX1032-NEXT: ; %bb.3:
-; GFX1032-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24
+; GFX1032-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24
; GFX1032-NEXT: v_mov_b32_e32 v0, 0
; GFX1032-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: global_load_dwordx2 v[4:5], v0, s[44:45]
+; GFX1032-NEXT: global_load_dwordx2 v[4:5], v0, s[52:53]
; GFX1032-NEXT: .LBB7_4: ; %atomicrmw.start
; GFX1032-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1032-NEXT: s_waitcnt vmcnt(0)
@@ -4422,38 +4422,38 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1032-NEXT: s_getpc_b64 s[0:1]
; GFX1032-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1032-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-NEXT: buffer_store_dword v5, off, s[48:51], 0 offset:4
-; GFX1032-NEXT: buffer_store_dword v4, off, s[48:51], 0
+; GFX1032-NEXT: buffer_store_dword v5, off, s[64:67], 0 offset:4
+; GFX1032-NEXT: buffer_store_dword v4, off, s[64:67], 0
; GFX1032-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1032-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-NEXT: v_mov_b32_e32 v2, s44
-; GFX1032-NEXT: v_mov_b32_e32 v3, s45
+; GFX1032-NEXT: v_mov_b32_e32 v2, s52
+; GFX1032-NEXT: v_mov_b32_e32 v3, s53
; GFX1032-NEXT: v_mov_b32_e32 v4, 0
; GFX1032-NEXT: v_mov_b32_e32 v5, 8
; GFX1032-NEXT: v_mov_b32_e32 v6, 0
; GFX1032-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-NEXT: s_mov_b32 s12, s43
-; GFX1032-NEXT: s_mov_b32 s13, s42
+; GFX1032-NEXT: s_mov_b32 s12, s51
+; GFX1032-NEXT: s_mov_b32 s13, s50
; GFX1032-NEXT: s_mov_b32 s14, s33
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1032-NEXT: v_min_f64 v[0:1], v[0:1], v[41:42]
-; GFX1032-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX1032-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
+; GFX1032-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12
+; GFX1032-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8
; GFX1032-NEXT: v_mov_b32_e32 v0, 8
; GFX1032-NEXT: v_mov_b32_e32 v1, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-NEXT: s_clause 0x1
-; GFX1032-NEXT: buffer_load_dword v4, off, s[48:51], 0
-; GFX1032-NEXT: buffer_load_dword v5, off, s[48:51], 0 offset:4
+; GFX1032-NEXT: buffer_load_dword v4, off, s[64:67], 0
+; GFX1032-NEXT: buffer_load_dword v5, off, s[64:67], 0 offset:4
; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s46
+; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
; GFX1032-NEXT: s_cbranch_execnz .LBB7_4
; GFX1032-NEXT: .LBB7_5:
; GFX1032-NEXT: s_endpgm
@@ -4461,11 +4461,11 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1164-LABEL: global_atomic_fmin_double_uni_address_div_value_agent_scope_unsafe:
; GFX1164: ; %bb.0:
; GFX1164-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1164-NEXT: s_mov_b32 s43, s8
+; GFX1164-NEXT: s_mov_b32 s51, s8
; GFX1164-NEXT: s_add_u32 s8, s34, 44
-; GFX1164-NEXT: s_mov_b32 s42, s9
+; GFX1164-NEXT: s_mov_b32 s50, s9
; GFX1164-NEXT: s_addc_u32 s9, s35, 0
-; GFX1164-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1164-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1164-NEXT: s_getpc_b64 s[0:1]
; GFX1164-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
; GFX1164-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
@@ -4473,15 +4473,15 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1164-NEXT: s_mov_b32 s33, s10
; GFX1164-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1164-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1164-NEXT: s_mov_b32 s12, s43
-; GFX1164-NEXT: s_mov_b32 s13, s42
+; GFX1164-NEXT: s_mov_b32 s12, s51
+; GFX1164-NEXT: s_mov_b32 s13, s50
; GFX1164-NEXT: s_mov_b32 s14, s33
; GFX1164-NEXT: s_mov_b32 s32, 32
; GFX1164-NEXT: v_mov_b32_e32 v40, v0
-; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1164-NEXT: v_mov_b32_e32 v2, 0
@@ -4511,12 +4511,12 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1164-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX1164-NEXT: s_cbranch_execz .LBB7_5
; GFX1164-NEXT: ; %bb.3:
-; GFX1164-NEXT: s_load_b64 s[44:45], s[34:35], 0x24
+; GFX1164-NEXT: s_load_b64 s[52:53], s[34:35], 0x24
; GFX1164-NEXT: v_mov_b32_e32 v0, 0
; GFX1164-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
-; GFX1164-NEXT: s_mov_b64 s[46:47], 0
+; GFX1164-NEXT: s_mov_b64 s[62:63], 0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: global_load_b64 v[4:5], v0, s[44:45]
+; GFX1164-NEXT: global_load_b64 v[4:5], v0, s[52:53]
; GFX1164-NEXT: s_set_inst_prefetch_distance 0x1
; GFX1164-NEXT: .p2align 6
; GFX1164-NEXT: .LBB7_4: ; %atomicrmw.start
@@ -4530,15 +4530,15 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1164-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
; GFX1164-NEXT: v_mov_b32_e32 v31, v40
; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1164-NEXT: v_mov_b32_e32 v2, s44
-; GFX1164-NEXT: v_mov_b32_e32 v3, s45
+; GFX1164-NEXT: v_mov_b32_e32 v2, s52
+; GFX1164-NEXT: v_mov_b32_e32 v3, s53
; GFX1164-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1164-NEXT: s_mov_b32 s12, s43
-; GFX1164-NEXT: s_mov_b32 s13, s42
+; GFX1164-NEXT: s_mov_b32 s12, s51
+; GFX1164-NEXT: s_mov_b32 s13, s50
; GFX1164-NEXT: s_mov_b32 s14, s33
; GFX1164-NEXT: v_min_f64 v[0:1], v[0:1], v[41:42]
; GFX1164-NEXT: scratch_store_b64 off, v[4:5], off
@@ -4553,8 +4553,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[46:47]
+; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63]
; GFX1164-NEXT: s_cbranch_execnz .LBB7_4
; GFX1164-NEXT: .LBB7_5:
; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2
@@ -4563,7 +4563,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1132-LABEL: global_atomic_fmin_double_uni_address_div_value_agent_scope_unsafe:
; GFX1132: ; %bb.0:
; GFX1132-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1132-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1132-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1132-NEXT: s_add_u32 s8, s34, 44
; GFX1132-NEXT: s_addc_u32 s9, s35, 0
; GFX1132-NEXT: s_getpc_b64 s[0:1]
@@ -4572,9 +4572,9 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1132-NEXT: v_mov_b32_e32 v31, v0
; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1132-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1132-NEXT: s_mov_b32 s42, s14
-; GFX1132-NEXT: s_mov_b32 s43, s13
-; GFX1132-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1132-NEXT: s_mov_b32 s50, s14
+; GFX1132-NEXT: s_mov_b32 s51, s13
+; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1132-NEXT: s_mov_b32 s12, s13
@@ -4583,7 +4583,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1132-NEXT: s_mov_b32 s32, 32
; GFX1132-NEXT: s_mov_b32 s33, s15
; GFX1132-NEXT: v_mov_b32_e32 v40, v0
-; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-NEXT: v_mov_b32_e32 v2, 0
@@ -4606,19 +4606,19 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1132-NEXT: s_cbranch_scc1 .LBB7_1
; GFX1132-NEXT: ; %bb.2: ; %ComputeEnd
; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1132-NEXT: s_mov_b32 s46, 0
+; GFX1132-NEXT: s_mov_b32 s62, 0
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-NEXT: s_xor_b32 s0, exec_lo, s0
; GFX1132-NEXT: s_cbranch_execz .LBB7_5
; GFX1132-NEXT: ; %bb.3:
-; GFX1132-NEXT: s_load_b64 s[44:45], s[34:35], 0x24
+; GFX1132-NEXT: s_load_b64 s[52:53], s[34:35], 0x24
; GFX1132-NEXT: v_mov_b32_e32 v0, 0
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX1132-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: global_load_b64 v[4:5], v0, s[44:45]
+; GFX1132-NEXT: global_load_b64 v[4:5], v0, s[52:53]
; GFX1132-NEXT: s_set_inst_prefetch_distance 0x1
; GFX1132-NEXT: .p2align 6
; GFX1132-NEXT: .LBB7_4: ; %atomicrmw.start
@@ -4632,16 +4632,16 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1132-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
; GFX1132-NEXT: v_mov_b32_e32 v31, v40
; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1132-NEXT: v_mov_b32_e32 v3, s45
+; GFX1132-NEXT: v_mov_b32_e32 v3, s53
; GFX1132-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1132-NEXT: s_mov_b32 s12, s43
-; GFX1132-NEXT: s_mov_b32 s13, s42
+; GFX1132-NEXT: s_mov_b32 s12, s51
+; GFX1132-NEXT: s_mov_b32 s13, s50
; GFX1132-NEXT: s_mov_b32 s14, s33
; GFX1132-NEXT: v_mov_b32_e32 v6, 0
-; GFX1132-NEXT: v_mov_b32_e32 v2, s44
+; GFX1132-NEXT: v_mov_b32_e32 v2, s52
; GFX1132-NEXT: v_min_f64 v[0:1], v[0:1], v[41:42]
; GFX1132-NEXT: scratch_store_b64 off, v[4:5], off
; GFX1132-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v5, 8
@@ -4653,8 +4653,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46
+; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
; GFX1132-NEXT: s_cbranch_execnz .LBB7_4
; GFX1132-NEXT: .LBB7_5:
; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2
@@ -4663,22 +4663,22 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX7LESS-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_agent_scope_unsafe:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s82, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s83, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s80, s80, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s81, s81, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s10
-; GFX7LESS-DPP-NEXT: s_mov_b32 s42, s9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s43, s8
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, s9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s47, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s46, -1
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[64:65], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
@@ -4689,34 +4689,34 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v40, v0, v2
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s43
-; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s42
+; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51
+; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[2:3], off, s[44:47], 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[2:3], off, s[64:67], 0
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[52:53], 0
; GFX7LESS-DPP-NEXT: v_max_f64 v[41:42], v[0:1], v[0:1]
; GFX7LESS-DPP-NEXT: .LBB7_1: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_max_f64 v[0:1], v[2:3], v[2:3]
-; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:4
-; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0
+; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:4
+; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
; GFX7LESS-DPP-NEXT: v_min_f64 v[0:1], v[0:1], v[41:42]
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX7LESS-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
+; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:12
+; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[80:83], 0 offset:8
; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v0, 8
@@ -4725,43 +4725,43 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s43
-; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s42
+; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51
+; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s44
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
+; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s64
+; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s65
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v3, off, s[48:51], 0 offset:4
+; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[80:83], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dword v3, off, s[80:83], 0 offset:4
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX7LESS-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX7LESS-DPP-NEXT: s_or_b64 s[52:53], vcc, s[52:53]
+; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[52:53]
; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB7_1
; GFX7LESS-DPP-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX7LESS-DPP-NEXT: s_endpgm
;
; GFX9-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_agent_scope_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s54, -1
-; GFX9-DPP-NEXT: s_mov_b32 s55, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s52, s52, s11
-; GFX9-DPP-NEXT: s_addc_u32 s53, s53, 0
+; GFX9-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s82, -1
+; GFX9-DPP-NEXT: s_mov_b32 s83, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s80, s80, s11
+; GFX9-DPP-NEXT: s_addc_u32 s81, s81, 0
; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX9-DPP-NEXT: s_mov_b32 s43, s8
+; GFX9-DPP-NEXT: s_mov_b32 s51, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX9-DPP-NEXT: s_mov_b32 s42, s9
+; GFX9-DPP-NEXT: s_mov_b32 s50, s9
; GFX9-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX9-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX9-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX9-DPP-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
; GFX9-DPP-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
@@ -4770,17 +4770,17 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b32 s33, s10
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
-; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[52:53]
-; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
+; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-DPP-NEXT: s_mov_b32 s12, s43
-; GFX9-DPP-NEXT: s_mov_b32 s13, s42
+; GFX9-DPP-NEXT: s_mov_b32 s12, s51
+; GFX9-DPP-NEXT: s_mov_b32 s13, s50
; GFX9-DPP-NEXT: s_mov_b32 s14, s33
; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[54:55]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -4834,20 +4834,20 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v1, exec_hi, v1
; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX9-DPP-NEXT: v_readlane_b32 s45, v9, 63
-; GFX9-DPP-NEXT: v_readlane_b32 s44, v8, 63
+; GFX9-DPP-NEXT: v_readlane_b32 s53, v9, 63
+; GFX9-DPP-NEXT: v_readlane_b32 s52, v8, 63
; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1]
; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9-DPP-NEXT: s_cbranch_execz .LBB7_3
; GFX9-DPP-NEXT: ; %bb.1:
-; GFX9-DPP-NEXT: s_load_dwordx2 s[46:47], s[36:37], 0x24
-; GFX9-DPP-NEXT: s_mov_b64 s[48:49], 0
+; GFX9-DPP-NEXT: s_load_dwordx2 s[62:63], s[36:37], 0x24
+; GFX9-DPP-NEXT: s_mov_b64 s[64:65], 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[46:47]
+; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[62:63]
; GFX9-DPP-NEXT: .LBB7_2: ; %atomicrmw.start
; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX9-DPP-NEXT: v_max_f64 v[3:4], s[44:45], s[44:45]
+; GFX9-DPP-NEXT: v_max_f64 v[3:4], s[52:53], s[52:53]
; GFX9-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX9-DPP-NEXT: v_max_f64 v[5:6], v[1:2], v[1:2]
; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44
@@ -4856,54 +4856,54 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[52:53]
-; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[52:55], 0 offset:4
-; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[52:55], 0
-; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
+; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:4
+; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0
+; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-DPP-NEXT: v_min_f64 v[3:4], v[5:6], v[3:4]
-; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-DPP-NEXT: s_mov_b32 s12, s43
-; GFX9-DPP-NEXT: s_mov_b32 s13, s42
+; GFX9-DPP-NEXT: s_mov_b32 s12, s51
+; GFX9-DPP-NEXT: s_mov_b32 s13, s50
; GFX9-DPP-NEXT: s_mov_b32 s14, s33
; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[52:55], 0 offset:12
-; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[52:55], 0 offset:8
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[54:55]
+; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[80:83], 0 offset:12
+; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:8
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s46
-; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s47
+; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s62
+; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s63
; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[52:55], 0
-; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[52:55], 0 offset:4
+; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[80:83], 0
+; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[80:83], 0 offset:4
; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-DPP-NEXT: s_or_b64 s[48:49], vcc, s[48:49]
-; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[48:49]
+; GFX9-DPP-NEXT: s_or_b64 s[64:65], vcc, s[64:65]
+; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[64:65]
; GFX9-DPP-NEXT: s_cbranch_execnz .LBB7_2
; GFX9-DPP-NEXT: .LBB7_3:
; GFX9-DPP-NEXT: s_endpgm
;
; GFX1064-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_agent_scope_unsafe:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s66, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s67, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s64, s64, s11
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-DPP-NEXT: s_mov_b32 s43, s8
+; GFX1064-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX1064-DPP-NEXT: s_mov_b32 s51, s8
; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1064-DPP-NEXT: s_mov_b32 s42, s9
+; GFX1064-DPP-NEXT: s_mov_b32 s50, s9
; GFX1064-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1064-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX1064-DPP-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
@@ -4912,17 +4912,17 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1064-DPP-NEXT: s_mov_b32 s33, s10
; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-DPP-NEXT: s_mov_b32 s12, s43
+; GFX1064-DPP-NEXT: s_mov_b32 s12, s51
; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1064-DPP-NEXT: s_mov_b32 s13, s50
; GFX1064-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1064-DPP-NEXT: s_movk_i32 s32, 0x800
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -4975,10 +4975,10 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064-DPP-NEXT: s_cbranch_execz .LBB7_3
; GFX1064-DPP-NEXT: ; %bb.1:
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24
-; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], 0
+; GFX1064-DPP-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24
+; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45]
+; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53]
; GFX1064-DPP-NEXT: .LBB7_2: ; %atomicrmw.start
; GFX1064-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1064-DPP-NEXT: v_max_f64 v[3:4], v[41:42], v[41:42]
@@ -4989,56 +4989,56 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX1064-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
+; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX1064-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40
; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s44
+; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s52
; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1064-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1064-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1064-DPP-NEXT: s_mov_b32 s13, s50
; GFX1064-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1064-DPP-NEXT: v_min_f64 v[3:4], v[5:6], v[3:4]
; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX1064-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX1064-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-DPP-NEXT: s_clause 0x1
-; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB7_2
; GFX1064-DPP-NEXT: .LBB7_3:
; GFX1064-DPP-NEXT: s_endpgm
;
; GFX1032-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_agent_scope_unsafe:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s66, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s67, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s64, s64, s11
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-DPP-NEXT: s_mov_b32 s43, s8
+; GFX1032-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX1032-DPP-NEXT: s_mov_b32 s51, s8
; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1032-DPP-NEXT: s_mov_b32 s42, s9
+; GFX1032-DPP-NEXT: s_mov_b32 s50, s9
; GFX1032-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1032-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX1032-DPP-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
@@ -5047,17 +5047,17 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1032-DPP-NEXT: s_mov_b32 s33, s10
; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-DPP-NEXT: s_mov_b32 s12, s43
+; GFX1032-DPP-NEXT: s_mov_b32 s12, s51
; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1032-DPP-NEXT: s_mov_b32 s13, s50
; GFX1032-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -5098,15 +5098,15 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, v8
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, v9
-; GFX1032-DPP-NEXT: s_mov_b32 s46, 0
+; GFX1032-DPP-NEXT: s_mov_b32 s62, 0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB7_3
; GFX1032-DPP-NEXT: ; %bb.1:
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24
+; GFX1032-DPP-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24
; GFX1032-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4]
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45]
+; GFX1032-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53]
; GFX1032-DPP-NEXT: .LBB7_2: ; %atomicrmw.start
; GFX1032-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1032-DPP-NEXT: s_waitcnt vmcnt(0)
@@ -5116,38 +5116,38 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX1032-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
+; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX1032-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s44
+; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s52
; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX1032-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1032-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1032-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1032-DPP-NEXT: s_mov_b32 s13, s50
; GFX1032-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1032-DPP-NEXT: v_min_f64 v[3:4], v[3:4], v[41:42]
-; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-DPP-NEXT: s_clause 0x1
-; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-DPP-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s46
+; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB7_2
; GFX1032-DPP-NEXT: .LBB7_3:
; GFX1032-DPP-NEXT: s_endpgm
@@ -5155,11 +5155,11 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1164-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_agent_scope_unsafe:
; GFX1164-DPP: ; %bb.0:
; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1164-DPP-NEXT: s_mov_b32 s43, s8
+; GFX1164-DPP-NEXT: s_mov_b32 s51, s8
; GFX1164-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1164-DPP-NEXT: s_mov_b32 s42, s9
+; GFX1164-DPP-NEXT: s_mov_b32 s50, s9
; GFX1164-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1164-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1164-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX1164-DPP-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
; GFX1164-DPP-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
@@ -5167,15 +5167,15 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1164-DPP-NEXT: s_mov_b32 s33, s10
; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1164-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1164-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1164-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1164-DPP-NEXT: s_mov_b32 s13, s50
; GFX1164-DPP-NEXT: s_mov_b32 s14, s33
; GFX1164-DPP-NEXT: s_mov_b32 s32, 32
; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0
-; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -5239,11 +5239,11 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1
; GFX1164-DPP-NEXT: s_cbranch_execz .LBB7_3
; GFX1164-DPP-NEXT: ; %bb.1:
-; GFX1164-DPP-NEXT: s_load_b64 s[44:45], s[34:35], 0x24
+; GFX1164-DPP-NEXT: s_load_b64 s[52:53], s[34:35], 0x24
; GFX1164-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4]
-; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], 0
+; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[44:45]
+; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[52:53]
; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x1
; GFX1164-DPP-NEXT: .p2align 6
; GFX1164-DPP-NEXT: .LBB7_2: ; %atomicrmw.start
@@ -5261,18 +5261,18 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1164-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1164-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1164-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1164-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1164-DPP-NEXT: s_mov_b32 s13, s50
; GFX1164-DPP-NEXT: s_mov_b32 s14, s33
; GFX1164-DPP-NEXT: v_min_f64 v[3:4], v[3:4], v[41:42]
; GFX1164-DPP-NEXT: scratch_store_b64 off, v[1:2], off
; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s44
+; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s52
; GFX1164-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
@@ -5280,8 +5280,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[46:47]
+; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63]
; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB7_2
; GFX1164-DPP-NEXT: .LBB7_3:
; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2
@@ -5290,7 +5290,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1132-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_agent_scope_unsafe:
; GFX1132-DPP: ; %bb.0:
; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1132-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1132-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1132-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1132-DPP-NEXT: s_addc_u32 s9, s35, 0
; GFX1132-DPP-NEXT: s_getpc_b64 s[0:1]
@@ -5299,9 +5299,9 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1132-DPP-NEXT: v_mov_b32_e32 v31, v0
; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1132-DPP-NEXT: s_mov_b32 s42, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s43, s13
-; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1132-DPP-NEXT: s_mov_b32 s50, s14
+; GFX1132-DPP-NEXT: s_mov_b32 s51, s13
+; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
@@ -5310,7 +5310,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1132-DPP-NEXT: s_mov_b32 s32, 32
; GFX1132-DPP-NEXT: s_mov_b32 s33, s15
; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0
-; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
@@ -5356,16 +5356,16 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1132-DPP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v3, v8
; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
; GFX1132-DPP-NEXT: v_mov_b32_e32 v4, v9
-; GFX1132-DPP-NEXT: s_mov_b32 s46, 0
+; GFX1132-DPP-NEXT: s_mov_b32 s62, 0
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1
; GFX1132-DPP-NEXT: s_cbranch_execz .LBB7_3
; GFX1132-DPP-NEXT: ; %bb.1:
-; GFX1132-DPP-NEXT: s_load_b64 s[44:45], s[34:35], 0x24
+; GFX1132-DPP-NEXT: s_load_b64 s[52:53], s[34:35], 0x24
; GFX1132-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[44:45]
+; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[52:53]
; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x1
; GFX1132-DPP-NEXT: .p2align 6
; GFX1132-DPP-NEXT: .LBB7_2: ; %atomicrmw.start
@@ -5381,26 +5381,26 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1132-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1132-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1132-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1132-DPP-NEXT: s_mov_b32 s13, s50
; GFX1132-DPP-NEXT: s_mov_b32 s14, s33
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX1132-DPP-NEXT: v_min_f64 v[3:4], v[3:4], v[41:42]
; GFX1132-DPP-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44
+; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s52
; GFX1132-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0
+; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s53 :: v_dual_mov_b32 v4, 0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-DPP-NEXT: scratch_load_b64 v[1:2], off, off
; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-DPP-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46
+; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB7_2
; GFX1132-DPP-NEXT: .LBB7_3:
; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2
@@ -5750,12 +5750,12 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
; GFX7LESS-LABEL: global_atomic_fmin_double_uni_address_div_value_one_as_scope_unsafe:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s38, -1
-; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s36, s36, s11
-; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0
+; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
+; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
; GFX7LESS-NEXT: s_mov_b32 s14, s10
; GFX7LESS-NEXT: s_mov_b32 s13, s9
; GFX7LESS-NEXT: s_mov_b32 s12, s8
@@ -5773,8 +5773,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -5831,12 +5831,12 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
;
; GFX9-LABEL: global_atomic_fmin_double_uni_address_div_value_one_as_scope_unsafe:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s11
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s11
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-NEXT: s_mov_b32 s12, s8
; GFX9-NEXT: s_add_u32 s8, s34, 44
@@ -5852,9 +5852,9 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -5905,13 +5905,13 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
;
; GFX1064-LABEL: global_atomic_fmin_double_uni_address_div_value_one_as_scope_unsafe:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s38, -1
-; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s36, s36, s11
+; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s50, -1
+; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s48, s48, s11
; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-NEXT: s_addc_u32 s49, s49, 0
; GFX1064-NEXT: s_mov_b32 s12, s8
; GFX1064-NEXT: s_add_u32 s8, s34, 44
; GFX1064-NEXT: s_mov_b32 s13, s9
@@ -5927,8 +5927,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1064-NEXT: s_mov_b32 s32, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -5964,13 +5964,13 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
;
; GFX1032-LABEL: global_atomic_fmin_double_uni_address_div_value_one_as_scope_unsafe:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s38, -1
-; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s36, s36, s11
+; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s50, -1
+; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s48, s48, s11
; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-NEXT: s_mov_b32 s12, s8
; GFX1032-NEXT: s_add_u32 s8, s34, 44
; GFX1032-NEXT: s_mov_b32 s13, s9
@@ -5986,8 +5986,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1032-NEXT: s_mov_b32 s32, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -6162,19 +6162,19 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
; GFX7LESS-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_one_as_scope_unsafe:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s42, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s43, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s40, s40, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s41, s41, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5]
@@ -6187,11 +6187,11 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[40:41]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[42:43]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[2:3], off, s[36:39], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[2:3], off, s[48:51], 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0
; GFX7LESS-DPP-NEXT: v_max_f64 v[4:5], v[0:1], v[0:1]
; GFX7LESS-DPP-NEXT: .LBB9_1: ; %atomicrmw.start
@@ -6204,7 +6204,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v8, v2
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, v1
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, v0
-; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[36:39], 0 glc
+; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[48:51], 0 glc
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_cmp_eq_u64_e32 vcc, v[6:7], v[2:3]
; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
@@ -6217,12 +6217,12 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
;
; GFX9-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_one_as_scope_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s38, -1
-; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11
-; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s50, -1
+; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-DPP-NEXT: s_mov_b32 s12, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44
@@ -6238,9 +6238,9 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -6325,13 +6325,13 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
;
; GFX1064-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_one_as_scope_unsafe:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s38, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX1064-DPP-NEXT: s_mov_b32 s12, s8
; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1064-DPP-NEXT: s_mov_b32 s13, s9
@@ -6347,8 +6347,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -6409,13 +6409,13 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
;
; GFX1032-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_one_as_scope_unsafe:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s38, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-DPP-NEXT: s_mov_b32 s12, s8
; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1032-DPP-NEXT: s_mov_b32 s13, s9
@@ -6431,8 +6431,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -6683,13 +6683,13 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX7LESS-LABEL: global_atomic_fmin_double_uni_address_uni_value_default_scope_unsafe:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
-; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
-; GFX7LESS-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX7LESS-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-NEXT: s_mov_b32 s67, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s64, s64, s11
+; GFX7LESS-NEXT: s_addc_u32 s65, s65, 0
+; GFX7LESS-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v3, exec_lo, 0
; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v3, exec_hi, v3
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
@@ -6697,15 +6697,15 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX7LESS-NEXT: s_cbranch_execz .LBB10_3
; GFX7LESS-NEXT: ; %bb.1:
; GFX7LESS-NEXT: s_mov_b32 s33, s10
-; GFX7LESS-NEXT: s_mov_b32 s42, s9
-; GFX7LESS-NEXT: s_mov_b32 s43, s8
+; GFX7LESS-NEXT: s_mov_b32 s50, s9
+; GFX7LESS-NEXT: s_mov_b32 s51, s8
; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX7LESS-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x9
+; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
-; GFX7LESS-NEXT: s_mov_b64 s[46:47], 0
+; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
+; GFX7LESS-NEXT: s_mov_b64 s[62:63], 0
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v3, v0, v1
@@ -6717,8 +6717,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7LESS-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-NEXT: v_max_f64 v[2:3], v[0:1], v[0:1]
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0
+; GFX7LESS-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:4
+; GFX7LESS-NEXT: buffer_store_dword v0, off, s[64:67], 0
; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
; GFX7LESS-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-NEXT: v_min_f64 v[0:1], v[2:3], 4.0
@@ -6726,8 +6726,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX7LESS-NEXT: s_getpc_b64 s[0:1]
; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX7LESS-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
+; GFX7LESS-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12
+; GFX7LESS-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8
; GFX7LESS-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX7LESS-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-NEXT: v_mov_b32_e32 v0, 8
@@ -6736,59 +6736,59 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX7LESS-NEXT: v_mov_b32_e32 v5, 8
; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-NEXT: s_mov_b32 s12, s43
-; GFX7LESS-NEXT: s_mov_b32 s13, s42
+; GFX7LESS-NEXT: s_mov_b32 s12, s51
+; GFX7LESS-NEXT: s_mov_b32 s13, s50
; GFX7LESS-NEXT: s_mov_b32 s14, s33
; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX7LESS-NEXT: v_mov_b32_e32 v2, s44
-; GFX7LESS-NEXT: v_mov_b32_e32 v3, s45
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX7LESS-NEXT: v_mov_b32_e32 v2, s52
+; GFX7LESS-NEXT: v_mov_b32_e32 v3, s53
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-NEXT: buffer_load_dword v0, off, s[48:51], 0
-; GFX7LESS-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4
+; GFX7LESS-NEXT: buffer_load_dword v0, off, s[64:67], 0
+; GFX7LESS-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX7LESS-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX7LESS-NEXT: s_cbranch_execnz .LBB10_2
; GFX7LESS-NEXT: .LBB10_3:
; GFX7LESS-NEXT: s_endpgm
;
; GFX9-LABEL: global_atomic_fmin_double_uni_address_uni_value_default_scope_unsafe:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s66, -1
+; GFX9-NEXT: s_mov_b32 s67, 0xe00000
; GFX9-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX9-NEXT: s_add_u32 s48, s48, s11
+; GFX9-NEXT: s_add_u32 s64, s64, s11
; GFX9-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
-; GFX9-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX9-NEXT: s_addc_u32 s65, s65, 0
+; GFX9-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
; GFX9-NEXT: s_movk_i32 s32, 0x800
; GFX9-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9-NEXT: s_cbranch_execz .LBB10_3
; GFX9-NEXT: ; %bb.1:
-; GFX9-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24
+; GFX9-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24
; GFX9-NEXT: v_lshlrev_b32_e32 v3, 20, v2
; GFX9-NEXT: v_lshlrev_b32_e32 v4, 10, v1
; GFX9-NEXT: s_mov_b32 s33, s10
-; GFX9-NEXT: s_mov_b32 s42, s9
+; GFX9-NEXT: s_mov_b32 s50, s9
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
-; GFX9-NEXT: s_mov_b32 s43, s8
+; GFX9-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
+; GFX9-NEXT: s_mov_b32 s51, s8
; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX9-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v2, s1
-; GFX9-NEXT: s_mov_b64 s[46:47], 0
+; GFX9-NEXT: s_mov_b64 s[62:63], 0
; GFX9-NEXT: v_mov_b32_e32 v1, s0
; GFX9-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX9-NEXT: .LBB10_2: ; %atomicrmw.start
@@ -6801,36 +6801,36 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX9-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
; GFX9-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX9-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX9-NEXT: buffer_store_dword v1, off, s[64:67], 0
+; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-NEXT: v_min_f64 v[3:4], v[3:4], 4.0
-; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-NEXT: s_mov_b32 s12, s43
-; GFX9-NEXT: s_mov_b32 s13, s42
+; GFX9-NEXT: s_mov_b32 s12, s51
+; GFX9-NEXT: s_mov_b32 s13, s50
; GFX9-NEXT: s_mov_b32 s14, s33
; GFX9-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX9-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
+; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX9-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX9-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
; GFX9-NEXT: v_mov_b32_e32 v0, 8
; GFX9-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-NEXT: v_mov_b32_e32 v2, s44
-; GFX9-NEXT: v_mov_b32_e32 v3, s45
+; GFX9-NEXT: v_mov_b32_e32 v2, s52
+; GFX9-NEXT: v_mov_b32_e32 v3, s53
; GFX9-NEXT: v_mov_b32_e32 v4, 0
; GFX9-NEXT: v_mov_b32_e32 v5, 8
; GFX9-NEXT: v_mov_b32_e32 v6, 0
; GFX9-NEXT: v_mov_b32_e32 v7, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX9-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX9-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX9-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX9-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX9-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX9-NEXT: s_cbranch_execnz .LBB10_2
; GFX9-NEXT: .LBB10_3:
; GFX9-NEXT: s_endpgm
@@ -6838,32 +6838,32 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX1064-LABEL: global_atomic_fmin_double_uni_address_uni_value_default_scope_unsafe:
; GFX1064: ; %bb.0:
; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s50, -1
-; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
+; GFX1064-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s66, -1
+; GFX1064-NEXT: s_mov_b32 s67, 0x31e16000
; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3
-; GFX1064-NEXT: s_add_u32 s48, s48, s11
-; GFX1064-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1064-NEXT: s_add_u32 s64, s64, s11
+; GFX1064-NEXT: s_addc_u32 s65, s65, 0
+; GFX1064-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1064-NEXT: s_movk_i32 s32, 0x800
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
; GFX1064-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064-NEXT: s_cbranch_execz .LBB10_3
; GFX1064-NEXT: ; %bb.1:
-; GFX1064-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24
+; GFX1064-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24
; GFX1064-NEXT: v_lshlrev_b32_e32 v3, 20, v2
; GFX1064-NEXT: v_lshlrev_b32_e32 v4, 10, v1
; GFX1064-NEXT: s_mov_b32 s33, s10
-; GFX1064-NEXT: s_mov_b32 s42, s9
-; GFX1064-NEXT: s_mov_b32 s43, s8
+; GFX1064-NEXT: s_mov_b32 s50, s9
+; GFX1064-NEXT: s_mov_b32 s51, s8
; GFX1064-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1064-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX1064-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX1064-NEXT: s_mov_b64 s[46:47], 0
+; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1064-NEXT: s_mov_b64 s[62:63], 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
+; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: v_mov_b32_e32 v2, s1
; GFX1064-NEXT: v_mov_b32_e32 v1, s0
@@ -6876,38 +6876,38 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX1064-NEXT: s_getpc_b64 s[0:1]
; GFX1064-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1064-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-NEXT: buffer_store_dword v1, off, s[48:51], 0
+; GFX1064-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX1064-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX1064-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1064-NEXT: v_mov_b32_e32 v31, v40
; GFX1064-NEXT: v_mov_b32_e32 v0, 8
; GFX1064-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-NEXT: v_mov_b32_e32 v2, s44
+; GFX1064-NEXT: v_mov_b32_e32 v2, s52
; GFX1064-NEXT: v_mov_b32_e32 v5, 8
; GFX1064-NEXT: v_mov_b32_e32 v6, 0
; GFX1064-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1064-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1064-NEXT: s_mov_b32 s12, s43
-; GFX1064-NEXT: s_mov_b32 s13, s42
+; GFX1064-NEXT: s_mov_b32 s12, s51
+; GFX1064-NEXT: s_mov_b32 s13, s50
; GFX1064-NEXT: s_mov_b32 s14, s33
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1064-NEXT: v_min_f64 v[3:4], v[3:4], 4.0
-; GFX1064-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-NEXT: v_mov_b32_e32 v3, s45
+; GFX1064-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX1064-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX1064-NEXT: v_mov_b32_e32 v3, s53
; GFX1064-NEXT: v_mov_b32_e32 v4, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-NEXT: s_clause 0x1
-; GFX1064-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX1064-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX1064-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1064-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX1064-NEXT: s_cbranch_execnz .LBB10_2
; GFX1064-NEXT: .LBB10_3:
; GFX1064-NEXT: s_endpgm
@@ -6915,31 +6915,31 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX1032-LABEL: global_atomic_fmin_double_uni_address_uni_value_default_scope_unsafe:
; GFX1032: ; %bb.0:
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s50, -1
-; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX1032-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s66, -1
+; GFX1032-NEXT: s_mov_b32 s67, 0x31c16000
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
-; GFX1032-NEXT: s_add_u32 s48, s48, s11
-; GFX1032-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-NEXT: s_mov_b64 s[40:41], s[0:1]
-; GFX1032-NEXT: s_mov_b32 s46, 0
+; GFX1032-NEXT: s_add_u32 s64, s64, s11
+; GFX1032-NEXT: s_addc_u32 s65, s65, 0
+; GFX1032-NEXT: s_mov_b64 s[48:49], s[0:1]
+; GFX1032-NEXT: s_mov_b32 s62, 0
; GFX1032-NEXT: s_movk_i32 s32, 0x400
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-NEXT: s_cbranch_execz .LBB10_3
; GFX1032-NEXT: ; %bb.1:
-; GFX1032-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24
+; GFX1032-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24
; GFX1032-NEXT: v_lshlrev_b32_e32 v3, 20, v2
; GFX1032-NEXT: v_lshlrev_b32_e32 v4, 10, v1
; GFX1032-NEXT: s_mov_b32 s33, s10
-; GFX1032-NEXT: s_mov_b32 s42, s9
-; GFX1032-NEXT: s_mov_b32 s43, s8
+; GFX1032-NEXT: s_mov_b32 s50, s9
+; GFX1032-NEXT: s_mov_b32 s51, s8
; GFX1032-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1032-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX1032-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
+; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: v_mov_b32_e32 v2, s1
; GFX1032-NEXT: v_mov_b32_e32 v1, s0
@@ -6952,38 +6952,38 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX1032-NEXT: s_getpc_b64 s[0:1]
; GFX1032-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1032-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-NEXT: buffer_store_dword v1, off, s[48:51], 0
+; GFX1032-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX1032-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX1032-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1032-NEXT: v_mov_b32_e32 v31, v40
; GFX1032-NEXT: v_mov_b32_e32 v0, 8
; GFX1032-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-NEXT: v_mov_b32_e32 v2, s44
+; GFX1032-NEXT: v_mov_b32_e32 v2, s52
; GFX1032-NEXT: v_mov_b32_e32 v5, 8
; GFX1032-NEXT: v_mov_b32_e32 v6, 0
; GFX1032-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1032-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1032-NEXT: s_mov_b32 s12, s43
-; GFX1032-NEXT: s_mov_b32 s13, s42
+; GFX1032-NEXT: s_mov_b32 s12, s51
+; GFX1032-NEXT: s_mov_b32 s13, s50
; GFX1032-NEXT: s_mov_b32 s14, s33
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1032-NEXT: v_min_f64 v[3:4], v[3:4], 4.0
-; GFX1032-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1032-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1032-NEXT: v_mov_b32_e32 v3, s45
+; GFX1032-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX1032-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX1032-NEXT: v_mov_b32_e32 v3, s53
; GFX1032-NEXT: v_mov_b32_e32 v4, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-NEXT: s_clause 0x1
-; GFX1032-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1032-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX1032-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX1032-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s46
+; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
; GFX1032-NEXT: s_cbranch_execnz .LBB10_2
; GFX1032-NEXT: .LBB10_3:
; GFX1032-NEXT: s_endpgm
@@ -6992,7 +6992,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX1164: ; %bb.0:
; GFX1164-NEXT: v_mov_b32_e32 v40, v0
; GFX1164-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1164-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1164-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1164-NEXT: s_mov_b32 s32, 32
; GFX1164-NEXT: s_mov_b64 s[0:1], exec
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
@@ -7000,16 +7000,16 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX1164-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1164-NEXT: s_cbranch_execz .LBB10_3
; GFX1164-NEXT: ; %bb.1:
-; GFX1164-NEXT: s_load_b64 s[44:45], s[4:5], 0x24
+; GFX1164-NEXT: s_load_b64 s[52:53], s[4:5], 0x24
; GFX1164-NEXT: s_mov_b32 s33, s10
-; GFX1164-NEXT: s_mov_b32 s42, s9
-; GFX1164-NEXT: s_mov_b32 s43, s8
+; GFX1164-NEXT: s_mov_b32 s50, s9
+; GFX1164-NEXT: s_mov_b32 s51, s8
; GFX1164-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1164-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX1164-NEXT: s_mov_b64 s[46:47], 0
+; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1164-NEXT: s_mov_b64 s[62:63], 0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: s_load_b64 s[0:1], s[44:45], 0x0
+; GFX1164-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: v_mov_b32_e32 v2, s1
; GFX1164-NEXT: v_mov_b32_e32 v1, s0
@@ -7031,18 +7031,18 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX1164-NEXT: v_mov_b32_e32 v5, 8
; GFX1164-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1164-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1164-NEXT: s_mov_b32 s12, s43
-; GFX1164-NEXT: s_mov_b32 s13, s42
+; GFX1164-NEXT: s_mov_b32 s12, s51
+; GFX1164-NEXT: s_mov_b32 s13, s50
; GFX1164-NEXT: s_mov_b32 s14, s33
; GFX1164-NEXT: v_min_f64 v[3:4], v[3:4], 4.0
; GFX1164-NEXT: scratch_store_b64 off, v[1:2], off
; GFX1164-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-NEXT: v_mov_b32_e32 v2, s44
+; GFX1164-NEXT: v_mov_b32_e32 v2, s52
; GFX1164-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1164-NEXT: v_mov_b32_e32 v3, s45
+; GFX1164-NEXT: v_mov_b32_e32 v3, s53
; GFX1164-NEXT: v_mov_b32_e32 v4, 0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1]
@@ -7050,8 +7050,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[46:47]
+; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63]
; GFX1164-NEXT: s_cbranch_execnz .LBB10_2
; GFX1164-NEXT: .LBB10_3:
; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2
@@ -7061,22 +7061,22 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX1132: ; %bb.0:
; GFX1132-NEXT: v_mov_b32_e32 v40, v0
; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1132-NEXT: s_mov_b64 s[40:41], s[0:1]
-; GFX1132-NEXT: s_mov_b32 s46, 0
+; GFX1132-NEXT: s_mov_b64 s[48:49], s[0:1]
+; GFX1132-NEXT: s_mov_b32 s62, 0
; GFX1132-NEXT: s_mov_b32 s32, 32
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-NEXT: s_cbranch_execz .LBB10_3
; GFX1132-NEXT: ; %bb.1:
-; GFX1132-NEXT: s_load_b64 s[44:45], s[4:5], 0x24
+; GFX1132-NEXT: s_load_b64 s[52:53], s[4:5], 0x24
; GFX1132-NEXT: s_mov_b32 s33, s15
-; GFX1132-NEXT: s_mov_b32 s42, s14
-; GFX1132-NEXT: s_mov_b32 s43, s13
+; GFX1132-NEXT: s_mov_b32 s50, s14
+; GFX1132-NEXT: s_mov_b32 s51, s13
; GFX1132-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1132-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_load_b64 s[0:1], s[44:45], 0x0
+; GFX1132-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
; GFX1132-NEXT: s_set_inst_prefetch_distance 0x1
@@ -7095,26 +7095,26 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
; GFX1132-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1132-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1132-NEXT: s_mov_b32 s12, s43
-; GFX1132-NEXT: s_mov_b32 s13, s42
+; GFX1132-NEXT: s_mov_b32 s12, s51
+; GFX1132-NEXT: s_mov_b32 s13, s50
; GFX1132-NEXT: s_mov_b32 s14, s33
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX1132-NEXT: v_min_f64 v[3:4], v[3:4], 4.0
; GFX1132-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44
+; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s52
; GFX1132-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1132-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0
+; GFX1132-NEXT: v_dual_mov_b32 v3, s53 :: v_dual_mov_b32 v4, 0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-NEXT: scratch_load_b64 v[1:2], off, off
; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46
+; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
; GFX1132-NEXT: s_cbranch_execnz .LBB10_2
; GFX1132-NEXT: .LBB10_3:
; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2
@@ -7123,13 +7123,13 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX7LESS-DPP-LABEL: global_atomic_fmin_double_uni_address_uni_value_default_scope_unsafe:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX7LESS-DPP-NEXT: v_mbcnt_lo_u32_b32_e64 v3, exec_lo, 0
; GFX7LESS-DPP-NEXT: v_mbcnt_hi_u32_b32_e32 v3, exec_hi, v3
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
@@ -7137,15 +7137,15 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX7LESS-DPP-NEXT: s_cbranch_execz .LBB10_3
; GFX7LESS-DPP-NEXT: ; %bb.1:
; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s10
-; GFX7LESS-DPP-NEXT: s_mov_b32 s42, s9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s43, s8
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, s9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], 0
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[62:63], 0
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v3, v0, v1
@@ -7157,8 +7157,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_max_f64 v[2:3], v[0:1], v[0:1]
-; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0
+; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:4
+; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[64:67], 0
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-DPP-NEXT: v_min_f64 v[0:1], v[2:3], 4.0
@@ -7166,8 +7166,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX7LESS-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
+; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12
+; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8
; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v0, 8
@@ -7176,59 +7176,59 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s43
-; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s42
+; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51
+; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s44
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s52
+; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-DPP-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[48:51], 0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4
+; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[64:67], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX7LESS-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB10_2
; GFX7LESS-DPP-NEXT: .LBB10_3:
; GFX7LESS-DPP-NEXT: s_endpgm
;
; GFX9-DPP-LABEL: global_atomic_fmin_double_uni_address_uni_value_default_scope_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s50, -1
-; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s66, -1
+; GFX9-DPP-NEXT: s_mov_b32 s67, 0xe00000
; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX9-DPP-NEXT: s_add_u32 s64, s64, s11
; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3
-; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX9-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX9-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX9-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800
; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9-DPP-NEXT: s_cbranch_execz .LBB10_3
; GFX9-DPP-NEXT: ; %bb.1:
-; GFX9-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24
+; GFX9-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1
; GFX9-DPP-NEXT: s_mov_b32 s33, s10
-; GFX9-DPP-NEXT: s_mov_b32 s42, s9
+; GFX9-DPP-NEXT: s_mov_b32 s50, s9
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
-; GFX9-DPP-NEXT: s_mov_b32 s43, s8
+; GFX9-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
+; GFX9-DPP-NEXT: s_mov_b32 s51, s8
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s1
-; GFX9-DPP-NEXT: s_mov_b64 s[46:47], 0
+; GFX9-DPP-NEXT: s_mov_b64 s[62:63], 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v1, s0
; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX9-DPP-NEXT: .LBB10_2: ; %atomicrmw.start
@@ -7241,36 +7241,36 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0
+; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-DPP-NEXT: v_min_f64 v[3:4], v[3:4], 4.0
-; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-DPP-NEXT: s_mov_b32 s12, s43
-; GFX9-DPP-NEXT: s_mov_b32 s13, s42
+; GFX9-DPP-NEXT: s_mov_b32 s12, s51
+; GFX9-DPP-NEXT: s_mov_b32 s13, s50
; GFX9-DPP-NEXT: s_mov_b32 s14, s33
; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s44
-; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s52
+; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX9-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX9-DPP-NEXT: s_cbranch_execnz .LBB10_2
; GFX9-DPP-NEXT: .LBB10_3:
; GFX9-DPP-NEXT: s_endpgm
@@ -7278,32 +7278,32 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX1064-DPP-LABEL: global_atomic_fmin_double_uni_address_uni_value_default_scope_unsafe:
; GFX1064-DPP: ; %bb.0:
; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
+; GFX1064-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s66, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s67, 0x31e16000
; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3
-; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
-; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1064-DPP-NEXT: s_add_u32 s64, s64, s11
+; GFX1064-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX1064-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1064-DPP-NEXT: s_movk_i32 s32, 0x800
; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064-DPP-NEXT: s_cbranch_execz .LBB10_3
; GFX1064-DPP-NEXT: ; %bb.1:
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24
+; GFX1064-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24
; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2
; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1
; GFX1064-DPP-NEXT: s_mov_b32 s33, s10
-; GFX1064-DPP-NEXT: s_mov_b32 s42, s9
-; GFX1064-DPP-NEXT: s_mov_b32 s43, s8
+; GFX1064-DPP-NEXT: s_mov_b32 s50, s9
+; GFX1064-DPP-NEXT: s_mov_b32 s51, s8
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], 0
+; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
+; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s1
; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, s0
@@ -7316,38 +7316,38 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX1064-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
+; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX1064-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40
; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s44
+; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s52
; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX1064-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1064-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1064-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1064-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1064-DPP-NEXT: s_mov_b32 s13, s50
; GFX1064-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1064-DPP-NEXT: v_min_f64 v[3:4], v[3:4], 4.0
-; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX1064-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-DPP-NEXT: s_clause 0x1
-; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB10_2
; GFX1064-DPP-NEXT: .LBB10_3:
; GFX1064-DPP-NEXT: s_endpgm
@@ -7355,31 +7355,31 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX1032-DPP-LABEL: global_atomic_fmin_double_uni_address_uni_value_default_scope_unsafe:
; GFX1032-DPP: ; %bb.0:
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0
-; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX1032-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s66, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s67, 0x31c16000
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
-; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
-; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
-; GFX1032-DPP-NEXT: s_mov_b32 s46, 0
+; GFX1032-DPP-NEXT: s_add_u32 s64, s64, s11
+; GFX1032-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX1032-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
+; GFX1032-DPP-NEXT: s_mov_b32 s62, 0
; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB10_3
; GFX1032-DPP-NEXT: ; %bb.1:
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24
+; GFX1032-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24
; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2
; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1
; GFX1032-DPP-NEXT: s_mov_b32 s33, s10
-; GFX1032-DPP-NEXT: s_mov_b32 s42, s9
-; GFX1032-DPP-NEXT: s_mov_b32 s43, s8
+; GFX1032-DPP-NEXT: s_mov_b32 s50, s9
+; GFX1032-DPP-NEXT: s_mov_b32 s51, s8
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
+; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s1
; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, s0
@@ -7392,38 +7392,38 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX1032-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
+; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX1032-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s44
+; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s52
; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX1032-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1032-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1032-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1032-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1032-DPP-NEXT: s_mov_b32 s13, s50
; GFX1032-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1032-DPP-NEXT: v_min_f64 v[3:4], v[3:4], 4.0
-; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-DPP-NEXT: s_clause 0x1
-; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-DPP-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s46
+; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB10_2
; GFX1032-DPP-NEXT: .LBB10_3:
; GFX1032-DPP-NEXT: s_endpgm
@@ -7432,7 +7432,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX1164-DPP: ; %bb.0:
; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0
; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1164-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1164-DPP-NEXT: s_mov_b32 s32, 32
; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec
; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
@@ -7440,16 +7440,16 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1164-DPP-NEXT: s_cbranch_execz .LBB10_3
; GFX1164-DPP-NEXT: ; %bb.1:
-; GFX1164-DPP-NEXT: s_load_b64 s[44:45], s[4:5], 0x24
+; GFX1164-DPP-NEXT: s_load_b64 s[52:53], s[4:5], 0x24
; GFX1164-DPP-NEXT: s_mov_b32 s33, s10
-; GFX1164-DPP-NEXT: s_mov_b32 s42, s9
-; GFX1164-DPP-NEXT: s_mov_b32 s43, s8
+; GFX1164-DPP-NEXT: s_mov_b32 s50, s9
+; GFX1164-DPP-NEXT: s_mov_b32 s51, s8
; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], 0
+; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[44:45], 0x0
+; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s1
; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, s0
@@ -7471,18 +7471,18 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX1164-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1164-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1164-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1164-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1164-DPP-NEXT: s_mov_b32 s13, s50
; GFX1164-DPP-NEXT: s_mov_b32 s14, s33
; GFX1164-DPP-NEXT: v_min_f64 v[3:4], v[3:4], 4.0
; GFX1164-DPP-NEXT: scratch_store_b64 off, v[1:2], off
; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s44
+; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s52
; GFX1164-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
@@ -7490,8 +7490,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[46:47]
+; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63]
; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB10_2
; GFX1164-DPP-NEXT: .LBB10_3:
; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2
@@ -7501,22 +7501,22 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX1132-DPP: ; %bb.0:
; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0
; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1132-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
-; GFX1132-DPP-NEXT: s_mov_b32 s46, 0
+; GFX1132-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
+; GFX1132-DPP-NEXT: s_mov_b32 s62, 0
; GFX1132-DPP-NEXT: s_mov_b32 s32, 32
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-DPP-NEXT: s_cbranch_execz .LBB10_3
; GFX1132-DPP-NEXT: ; %bb.1:
-; GFX1132-DPP-NEXT: s_load_b64 s[44:45], s[4:5], 0x24
+; GFX1132-DPP-NEXT: s_load_b64 s[52:53], s[4:5], 0x24
; GFX1132-DPP-NEXT: s_mov_b32 s33, s15
-; GFX1132-DPP-NEXT: s_mov_b32 s42, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s43, s13
+; GFX1132-DPP-NEXT: s_mov_b32 s50, s14
+; GFX1132-DPP-NEXT: s_mov_b32 s51, s13
; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[44:45], 0x0
+; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x1
@@ -7535,26 +7535,26 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1132-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1132-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1132-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1132-DPP-NEXT: s_mov_b32 s13, s50
; GFX1132-DPP-NEXT: s_mov_b32 s14, s33
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX1132-DPP-NEXT: v_min_f64 v[3:4], v[3:4], 4.0
; GFX1132-DPP-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44
+; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s52
; GFX1132-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0
+; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s53 :: v_dual_mov_b32 v4, 0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-DPP-NEXT: scratch_load_b64 v[1:2], off, off
; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-DPP-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46
+; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB10_2
; GFX1132-DPP-NEXT: .LBB10_3:
; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2
@@ -7567,19 +7567,19 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX7LESS-LABEL: global_atomic_fmin_double_uni_address_div_value_default_scope_unsafe:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
-; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
+; GFX7LESS-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s82, -1
+; GFX7LESS-NEXT: s_mov_b32 s83, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s80, s80, s11
+; GFX7LESS-NEXT: s_addc_u32 s81, s81, 0
; GFX7LESS-NEXT: s_mov_b32 s33, s10
-; GFX7LESS-NEXT: s_mov_b32 s42, s9
-; GFX7LESS-NEXT: s_mov_b32 s43, s8
+; GFX7LESS-NEXT: s_mov_b32 s50, s9
+; GFX7LESS-NEXT: s_mov_b32 s51, s8
; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0
; GFX7LESS-NEXT: s_getpc_b64 s[0:1]
@@ -7590,15 +7590,15 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-NEXT: v_or_b32_e32 v40, v0, v2
-; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-NEXT: s_mov_b32 s12, s43
-; GFX7LESS-NEXT: s_mov_b32 s13, s42
+; GFX7LESS-NEXT: s_mov_b32 s12, s51
+; GFX7LESS-NEXT: s_mov_b32 s13, s50
; GFX7LESS-NEXT: s_mov_b32 s14, s33
; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -7625,19 +7625,19 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX7LESS-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX7LESS-NEXT: s_cbranch_execz .LBB11_5
; GFX7LESS-NEXT: ; %bb.3:
-; GFX7LESS-NEXT: s_load_dwordx2 s[44:45], s[36:37], 0x9
-; GFX7LESS-NEXT: s_mov_b32 s47, 0xf000
-; GFX7LESS-NEXT: s_mov_b32 s46, -1
+; GFX7LESS-NEXT: s_load_dwordx2 s[64:65], s[36:37], 0x9
+; GFX7LESS-NEXT: s_mov_b32 s67, 0xf000
+; GFX7LESS-NEXT: s_mov_b32 s66, -1
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[44:47], 0
-; GFX7LESS-NEXT: s_mov_b64 s[46:47], 0
+; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[64:67], 0
+; GFX7LESS-NEXT: s_mov_b64 s[52:53], 0
; GFX7LESS-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
; GFX7LESS-NEXT: .LBB11_4: ; %atomicrmw.start
; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7LESS-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-NEXT: v_max_f64 v[2:3], v[0:1], v[0:1]
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0
+; GFX7LESS-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:4
+; GFX7LESS-NEXT: buffer_store_dword v0, off, s[80:83], 0
; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
; GFX7LESS-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-NEXT: v_min_f64 v[0:1], v[2:3], v[41:42]
@@ -7645,8 +7645,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX7LESS-NEXT: s_getpc_b64 s[0:1]
; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX7LESS-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
+; GFX7LESS-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:12
+; GFX7LESS-NEXT: buffer_store_dword v0, off, s[80:83], 0 offset:8
; GFX7LESS-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX7LESS-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-NEXT: v_mov_b32_e32 v0, 8
@@ -7655,43 +7655,43 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX7LESS-NEXT: v_mov_b32_e32 v5, 8
; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-NEXT: s_mov_b32 s12, s43
-; GFX7LESS-NEXT: s_mov_b32 s13, s42
+; GFX7LESS-NEXT: s_mov_b32 s12, s51
+; GFX7LESS-NEXT: s_mov_b32 s13, s50
; GFX7LESS-NEXT: s_mov_b32 s14, s33
; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX7LESS-NEXT: v_mov_b32_e32 v2, s44
-; GFX7LESS-NEXT: v_mov_b32_e32 v3, s45
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83]
+; GFX7LESS-NEXT: v_mov_b32_e32 v2, s64
+; GFX7LESS-NEXT: v_mov_b32_e32 v3, s65
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-NEXT: buffer_load_dword v0, off, s[48:51], 0
-; GFX7LESS-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4
+; GFX7LESS-NEXT: buffer_load_dword v0, off, s[80:83], 0
+; GFX7LESS-NEXT: buffer_load_dword v1, off, s[80:83], 0 offset:4
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX7LESS-NEXT: s_or_b64 s[52:53], vcc, s[52:53]
+; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[52:53]
; GFX7LESS-NEXT: s_cbranch_execnz .LBB11_4
; GFX7LESS-NEXT: .LBB11_5:
; GFX7LESS-NEXT: s_endpgm
;
; GFX9-LABEL: global_atomic_fmin_double_uni_address_div_value_default_scope_unsafe:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s11
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s66, -1
+; GFX9-NEXT: s_mov_b32 s67, 0xe00000
+; GFX9-NEXT: s_add_u32 s64, s64, s11
+; GFX9-NEXT: s_addc_u32 s65, s65, 0
; GFX9-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX9-NEXT: s_mov_b32 s43, s8
+; GFX9-NEXT: s_mov_b32 s51, s8
; GFX9-NEXT: s_add_u32 s8, s36, 44
-; GFX9-NEXT: s_mov_b32 s42, s9
+; GFX9-NEXT: s_mov_b32 s50, s9
; GFX9-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX9-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX9-NEXT: s_getpc_b64 s[0:1]
; GFX9-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
@@ -7700,17 +7700,17 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b32 s33, s10
; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7]
-; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX9-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-NEXT: s_mov_b32 s12, s43
-; GFX9-NEXT: s_mov_b32 s13, s42
+; GFX9-NEXT: s_mov_b32 s12, s51
+; GFX9-NEXT: s_mov_b32 s13, s50
; GFX9-NEXT: s_mov_b32 s14, s33
; GFX9-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX9-NEXT: s_movk_i32 s32, 0x800
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -7737,12 +7737,12 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX9-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX9-NEXT: s_cbranch_execz .LBB11_5
; GFX9-NEXT: ; %bb.3:
-; GFX9-NEXT: s_load_dwordx2 s[44:45], s[36:37], 0x24
+; GFX9-NEXT: s_load_dwordx2 s[52:53], s[36:37], 0x24
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
-; GFX9-NEXT: s_mov_b64 s[46:47], 0
+; GFX9-NEXT: s_mov_b64 s[62:63], 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: global_load_dwordx2 v[4:5], v0, s[44:45]
+; GFX9-NEXT: global_load_dwordx2 v[4:5], v0, s[52:53]
; GFX9-NEXT: .LBB11_4: ; %atomicrmw.start
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-NEXT: s_waitcnt vmcnt(0)
@@ -7753,54 +7753,54 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX9-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
; GFX9-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-NEXT: buffer_store_dword v5, off, s[48:51], 0 offset:4
-; GFX9-NEXT: buffer_store_dword v4, off, s[48:51], 0
-; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX9-NEXT: buffer_store_dword v5, off, s[64:67], 0 offset:4
+; GFX9-NEXT: buffer_store_dword v4, off, s[64:67], 0
+; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[41:42]
-; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-NEXT: s_mov_b32 s12, s43
-; GFX9-NEXT: s_mov_b32 s13, s42
+; GFX9-NEXT: s_mov_b32 s12, s51
+; GFX9-NEXT: s_mov_b32 s13, s50
; GFX9-NEXT: s_mov_b32 s14, s33
; GFX9-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX9-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
+; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX9-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12
+; GFX9-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8
; GFX9-NEXT: v_mov_b32_e32 v0, 8
; GFX9-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-NEXT: v_mov_b32_e32 v2, s44
-; GFX9-NEXT: v_mov_b32_e32 v3, s45
+; GFX9-NEXT: v_mov_b32_e32 v2, s52
+; GFX9-NEXT: v_mov_b32_e32 v3, s53
; GFX9-NEXT: v_mov_b32_e32 v4, 0
; GFX9-NEXT: v_mov_b32_e32 v5, 8
; GFX9-NEXT: v_mov_b32_e32 v6, 0
; GFX9-NEXT: v_mov_b32_e32 v7, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX9-NEXT: buffer_load_dword v4, off, s[48:51], 0
-; GFX9-NEXT: buffer_load_dword v5, off, s[48:51], 0 offset:4
+; GFX9-NEXT: buffer_load_dword v4, off, s[64:67], 0
+; GFX9-NEXT: buffer_load_dword v5, off, s[64:67], 0 offset:4
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX9-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX9-NEXT: s_cbranch_execnz .LBB11_4
; GFX9-NEXT: .LBB11_5:
; GFX9-NEXT: s_endpgm
;
; GFX1064-LABEL: global_atomic_fmin_double_uni_address_div_value_default_scope_unsafe:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s50, -1
-; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s66, -1
+; GFX1064-NEXT: s_mov_b32 s67, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s64, s64, s11
; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-NEXT: s_mov_b32 s43, s8
+; GFX1064-NEXT: s_addc_u32 s65, s65, 0
+; GFX1064-NEXT: s_mov_b32 s51, s8
; GFX1064-NEXT: s_add_u32 s8, s34, 44
-; GFX1064-NEXT: s_mov_b32 s42, s9
+; GFX1064-NEXT: s_mov_b32 s50, s9
; GFX1064-NEXT: s_addc_u32 s9, s35, 0
-; GFX1064-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1064-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1064-NEXT: s_getpc_b64 s[0:1]
; GFX1064-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
; GFX1064-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
@@ -7809,17 +7809,17 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1064-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1064-NEXT: s_mov_b32 s33, s10
; GFX1064-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-NEXT: s_mov_b32 s12, s43
+; GFX1064-NEXT: s_mov_b32 s12, s51
; GFX1064-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-NEXT: s_mov_b32 s13, s42
+; GFX1064-NEXT: s_mov_b32 s13, s50
; GFX1064-NEXT: s_mov_b32 s14, s33
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1064-NEXT: s_movk_i32 s32, 0x800
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -7846,12 +7846,12 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1064-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX1064-NEXT: s_cbranch_execz .LBB11_5
; GFX1064-NEXT: ; %bb.3:
-; GFX1064-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24
+; GFX1064-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24
; GFX1064-NEXT: v_mov_b32_e32 v0, 0
; GFX1064-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
-; GFX1064-NEXT: s_mov_b64 s[46:47], 0
+; GFX1064-NEXT: s_mov_b64 s[62:63], 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: global_load_dwordx2 v[4:5], v0, s[44:45]
+; GFX1064-NEXT: global_load_dwordx2 v[4:5], v0, s[52:53]
; GFX1064-NEXT: .LBB11_4: ; %atomicrmw.start
; GFX1064-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1064-NEXT: s_waitcnt vmcnt(0)
@@ -7861,56 +7861,56 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1064-NEXT: s_getpc_b64 s[0:1]
; GFX1064-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1064-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-NEXT: buffer_store_dword v5, off, s[48:51], 0 offset:4
-; GFX1064-NEXT: buffer_store_dword v4, off, s[48:51], 0
+; GFX1064-NEXT: buffer_store_dword v5, off, s[64:67], 0 offset:4
+; GFX1064-NEXT: buffer_store_dword v4, off, s[64:67], 0
; GFX1064-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1064-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-NEXT: v_mov_b32_e32 v2, s44
-; GFX1064-NEXT: v_mov_b32_e32 v3, s45
+; GFX1064-NEXT: v_mov_b32_e32 v2, s52
+; GFX1064-NEXT: v_mov_b32_e32 v3, s53
; GFX1064-NEXT: v_mov_b32_e32 v4, 0
; GFX1064-NEXT: v_mov_b32_e32 v5, 8
; GFX1064-NEXT: v_mov_b32_e32 v6, 0
; GFX1064-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-NEXT: s_mov_b32 s12, s43
-; GFX1064-NEXT: s_mov_b32 s13, s42
+; GFX1064-NEXT: s_mov_b32 s12, s51
+; GFX1064-NEXT: s_mov_b32 s13, s50
; GFX1064-NEXT: s_mov_b32 s14, s33
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1064-NEXT: v_min_f64 v[0:1], v[0:1], v[41:42]
-; GFX1064-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX1064-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
+; GFX1064-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12
+; GFX1064-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8
; GFX1064-NEXT: v_mov_b32_e32 v0, 8
; GFX1064-NEXT: v_mov_b32_e32 v1, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-NEXT: s_clause 0x1
-; GFX1064-NEXT: buffer_load_dword v4, off, s[48:51], 0
-; GFX1064-NEXT: buffer_load_dword v5, off, s[48:51], 0 offset:4
+; GFX1064-NEXT: buffer_load_dword v4, off, s[64:67], 0
+; GFX1064-NEXT: buffer_load_dword v5, off, s[64:67], 0 offset:4
; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1064-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX1064-NEXT: s_cbranch_execnz .LBB11_4
; GFX1064-NEXT: .LBB11_5:
; GFX1064-NEXT: s_endpgm
;
; GFX1032-LABEL: global_atomic_fmin_double_uni_address_div_value_default_scope_unsafe:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s50, -1
-; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s66, -1
+; GFX1032-NEXT: s_mov_b32 s67, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s64, s64, s11
; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-NEXT: s_mov_b32 s43, s8
+; GFX1032-NEXT: s_addc_u32 s65, s65, 0
+; GFX1032-NEXT: s_mov_b32 s51, s8
; GFX1032-NEXT: s_add_u32 s8, s34, 44
-; GFX1032-NEXT: s_mov_b32 s42, s9
+; GFX1032-NEXT: s_mov_b32 s50, s9
; GFX1032-NEXT: s_addc_u32 s9, s35, 0
-; GFX1032-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1032-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1032-NEXT: s_getpc_b64 s[0:1]
; GFX1032-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
; GFX1032-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
@@ -7919,17 +7919,17 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1032-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1032-NEXT: s_mov_b32 s33, s10
; GFX1032-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-NEXT: s_mov_b32 s12, s43
+; GFX1032-NEXT: s_mov_b32 s12, s51
; GFX1032-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-NEXT: s_mov_b32 s13, s42
+; GFX1032-NEXT: s_mov_b32 s13, s50
; GFX1032-NEXT: s_mov_b32 s14, s33
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1032-NEXT: s_movk_i32 s32, 0x400
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -7950,17 +7950,17 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1032-NEXT: s_cbranch_scc1 .LBB11_1
; GFX1032-NEXT: ; %bb.2: ; %ComputeEnd
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1032-NEXT: s_mov_b32 s46, 0
+; GFX1032-NEXT: s_mov_b32 s62, 0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-NEXT: s_xor_b32 s0, exec_lo, s0
; GFX1032-NEXT: s_cbranch_execz .LBB11_5
; GFX1032-NEXT: ; %bb.3:
-; GFX1032-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24
+; GFX1032-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24
; GFX1032-NEXT: v_mov_b32_e32 v0, 0
; GFX1032-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: global_load_dwordx2 v[4:5], v0, s[44:45]
+; GFX1032-NEXT: global_load_dwordx2 v[4:5], v0, s[52:53]
; GFX1032-NEXT: .LBB11_4: ; %atomicrmw.start
; GFX1032-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1032-NEXT: s_waitcnt vmcnt(0)
@@ -7970,38 +7970,38 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1032-NEXT: s_getpc_b64 s[0:1]
; GFX1032-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1032-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-NEXT: buffer_store_dword v5, off, s[48:51], 0 offset:4
-; GFX1032-NEXT: buffer_store_dword v4, off, s[48:51], 0
+; GFX1032-NEXT: buffer_store_dword v5, off, s[64:67], 0 offset:4
+; GFX1032-NEXT: buffer_store_dword v4, off, s[64:67], 0
; GFX1032-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1032-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-NEXT: v_mov_b32_e32 v2, s44
-; GFX1032-NEXT: v_mov_b32_e32 v3, s45
+; GFX1032-NEXT: v_mov_b32_e32 v2, s52
+; GFX1032-NEXT: v_mov_b32_e32 v3, s53
; GFX1032-NEXT: v_mov_b32_e32 v4, 0
; GFX1032-NEXT: v_mov_b32_e32 v5, 8
; GFX1032-NEXT: v_mov_b32_e32 v6, 0
; GFX1032-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-NEXT: s_mov_b32 s12, s43
-; GFX1032-NEXT: s_mov_b32 s13, s42
+; GFX1032-NEXT: s_mov_b32 s12, s51
+; GFX1032-NEXT: s_mov_b32 s13, s50
; GFX1032-NEXT: s_mov_b32 s14, s33
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1032-NEXT: v_min_f64 v[0:1], v[0:1], v[41:42]
-; GFX1032-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX1032-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
+; GFX1032-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12
+; GFX1032-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8
; GFX1032-NEXT: v_mov_b32_e32 v0, 8
; GFX1032-NEXT: v_mov_b32_e32 v1, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-NEXT: s_clause 0x1
-; GFX1032-NEXT: buffer_load_dword v4, off, s[48:51], 0
-; GFX1032-NEXT: buffer_load_dword v5, off, s[48:51], 0 offset:4
+; GFX1032-NEXT: buffer_load_dword v4, off, s[64:67], 0
+; GFX1032-NEXT: buffer_load_dword v5, off, s[64:67], 0 offset:4
; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s46
+; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
; GFX1032-NEXT: s_cbranch_execnz .LBB11_4
; GFX1032-NEXT: .LBB11_5:
; GFX1032-NEXT: s_endpgm
@@ -8009,11 +8009,11 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1164-LABEL: global_atomic_fmin_double_uni_address_div_value_default_scope_unsafe:
; GFX1164: ; %bb.0:
; GFX1164-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1164-NEXT: s_mov_b32 s43, s8
+; GFX1164-NEXT: s_mov_b32 s51, s8
; GFX1164-NEXT: s_add_u32 s8, s34, 44
-; GFX1164-NEXT: s_mov_b32 s42, s9
+; GFX1164-NEXT: s_mov_b32 s50, s9
; GFX1164-NEXT: s_addc_u32 s9, s35, 0
-; GFX1164-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1164-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1164-NEXT: s_getpc_b64 s[0:1]
; GFX1164-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
; GFX1164-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
@@ -8021,15 +8021,15 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1164-NEXT: s_mov_b32 s33, s10
; GFX1164-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1164-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1164-NEXT: s_mov_b32 s12, s43
-; GFX1164-NEXT: s_mov_b32 s13, s42
+; GFX1164-NEXT: s_mov_b32 s12, s51
+; GFX1164-NEXT: s_mov_b32 s13, s50
; GFX1164-NEXT: s_mov_b32 s14, s33
; GFX1164-NEXT: s_mov_b32 s32, 32
; GFX1164-NEXT: v_mov_b32_e32 v40, v0
-; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1164-NEXT: v_mov_b32_e32 v2, 0
@@ -8059,12 +8059,12 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1164-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX1164-NEXT: s_cbranch_execz .LBB11_5
; GFX1164-NEXT: ; %bb.3:
-; GFX1164-NEXT: s_load_b64 s[44:45], s[34:35], 0x24
+; GFX1164-NEXT: s_load_b64 s[52:53], s[34:35], 0x24
; GFX1164-NEXT: v_mov_b32_e32 v0, 0
; GFX1164-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
-; GFX1164-NEXT: s_mov_b64 s[46:47], 0
+; GFX1164-NEXT: s_mov_b64 s[62:63], 0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: global_load_b64 v[4:5], v0, s[44:45]
+; GFX1164-NEXT: global_load_b64 v[4:5], v0, s[52:53]
; GFX1164-NEXT: s_set_inst_prefetch_distance 0x1
; GFX1164-NEXT: .p2align 6
; GFX1164-NEXT: .LBB11_4: ; %atomicrmw.start
@@ -8078,15 +8078,15 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1164-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
; GFX1164-NEXT: v_mov_b32_e32 v31, v40
; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1164-NEXT: v_mov_b32_e32 v2, s44
-; GFX1164-NEXT: v_mov_b32_e32 v3, s45
+; GFX1164-NEXT: v_mov_b32_e32 v2, s52
+; GFX1164-NEXT: v_mov_b32_e32 v3, s53
; GFX1164-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1164-NEXT: s_mov_b32 s12, s43
-; GFX1164-NEXT: s_mov_b32 s13, s42
+; GFX1164-NEXT: s_mov_b32 s12, s51
+; GFX1164-NEXT: s_mov_b32 s13, s50
; GFX1164-NEXT: s_mov_b32 s14, s33
; GFX1164-NEXT: v_min_f64 v[0:1], v[0:1], v[41:42]
; GFX1164-NEXT: scratch_store_b64 off, v[4:5], off
@@ -8101,8 +8101,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[46:47]
+; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63]
; GFX1164-NEXT: s_cbranch_execnz .LBB11_4
; GFX1164-NEXT: .LBB11_5:
; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2
@@ -8111,7 +8111,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1132-LABEL: global_atomic_fmin_double_uni_address_div_value_default_scope_unsafe:
; GFX1132: ; %bb.0:
; GFX1132-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1132-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1132-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1132-NEXT: s_add_u32 s8, s34, 44
; GFX1132-NEXT: s_addc_u32 s9, s35, 0
; GFX1132-NEXT: s_getpc_b64 s[0:1]
@@ -8120,9 +8120,9 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1132-NEXT: v_mov_b32_e32 v31, v0
; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1132-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1132-NEXT: s_mov_b32 s42, s14
-; GFX1132-NEXT: s_mov_b32 s43, s13
-; GFX1132-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1132-NEXT: s_mov_b32 s50, s14
+; GFX1132-NEXT: s_mov_b32 s51, s13
+; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1132-NEXT: s_mov_b32 s12, s13
@@ -8131,7 +8131,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1132-NEXT: s_mov_b32 s32, 32
; GFX1132-NEXT: s_mov_b32 s33, s15
; GFX1132-NEXT: v_mov_b32_e32 v40, v0
-; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-NEXT: v_mov_b32_e32 v2, 0
@@ -8154,19 +8154,19 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1132-NEXT: s_cbranch_scc1 .LBB11_1
; GFX1132-NEXT: ; %bb.2: ; %ComputeEnd
; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1132-NEXT: s_mov_b32 s46, 0
+; GFX1132-NEXT: s_mov_b32 s62, 0
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-NEXT: s_xor_b32 s0, exec_lo, s0
; GFX1132-NEXT: s_cbranch_execz .LBB11_5
; GFX1132-NEXT: ; %bb.3:
-; GFX1132-NEXT: s_load_b64 s[44:45], s[34:35], 0x24
+; GFX1132-NEXT: s_load_b64 s[52:53], s[34:35], 0x24
; GFX1132-NEXT: v_mov_b32_e32 v0, 0
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX1132-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: global_load_b64 v[4:5], v0, s[44:45]
+; GFX1132-NEXT: global_load_b64 v[4:5], v0, s[52:53]
; GFX1132-NEXT: s_set_inst_prefetch_distance 0x1
; GFX1132-NEXT: .p2align 6
; GFX1132-NEXT: .LBB11_4: ; %atomicrmw.start
@@ -8180,16 +8180,16 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1132-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
; GFX1132-NEXT: v_mov_b32_e32 v31, v40
; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX1132-NEXT: v_mov_b32_e32 v3, s45
+; GFX1132-NEXT: v_mov_b32_e32 v3, s53
; GFX1132-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1132-NEXT: s_mov_b32 s12, s43
-; GFX1132-NEXT: s_mov_b32 s13, s42
+; GFX1132-NEXT: s_mov_b32 s12, s51
+; GFX1132-NEXT: s_mov_b32 s13, s50
; GFX1132-NEXT: s_mov_b32 s14, s33
; GFX1132-NEXT: v_mov_b32_e32 v6, 0
-; GFX1132-NEXT: v_mov_b32_e32 v2, s44
+; GFX1132-NEXT: v_mov_b32_e32 v2, s52
; GFX1132-NEXT: v_min_f64 v[0:1], v[0:1], v[41:42]
; GFX1132-NEXT: scratch_store_b64 off, v[4:5], off
; GFX1132-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v5, 8
@@ -8201,8 +8201,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46
+; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
; GFX1132-NEXT: s_cbranch_execnz .LBB11_4
; GFX1132-NEXT: .LBB11_5:
; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2
@@ -8211,22 +8211,22 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX7LESS-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_default_scope_unsafe:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s82, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s83, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s80, s80, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s81, s81, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s10
-; GFX7LESS-DPP-NEXT: s_mov_b32 s42, s9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s43, s8
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, s9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s47, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s46, -1
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[64:65], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
@@ -8237,34 +8237,34 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v40, v0, v2
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s43
-; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s42
+; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51
+; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[2:3], off, s[44:47], 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[2:3], off, s[64:67], 0
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[52:53], 0
; GFX7LESS-DPP-NEXT: v_max_f64 v[41:42], v[0:1], v[0:1]
; GFX7LESS-DPP-NEXT: .LBB11_1: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_max_f64 v[0:1], v[2:3], v[2:3]
-; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:4
-; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0
+; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:4
+; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
; GFX7LESS-DPP-NEXT: v_min_f64 v[0:1], v[0:1], v[41:42]
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX7LESS-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
+; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:12
+; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[80:83], 0 offset:8
; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v0, 8
@@ -8273,43 +8273,43 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s43
-; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s42
+; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51
+; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s44
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
+; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s64
+; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s65
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v3, off, s[48:51], 0 offset:4
+; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[80:83], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dword v3, off, s[80:83], 0 offset:4
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX7LESS-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX7LESS-DPP-NEXT: s_or_b64 s[52:53], vcc, s[52:53]
+; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[52:53]
; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB11_1
; GFX7LESS-DPP-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX7LESS-DPP-NEXT: s_endpgm
;
; GFX9-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_default_scope_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s54, -1
-; GFX9-DPP-NEXT: s_mov_b32 s55, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s52, s52, s11
-; GFX9-DPP-NEXT: s_addc_u32 s53, s53, 0
+; GFX9-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s82, -1
+; GFX9-DPP-NEXT: s_mov_b32 s83, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s80, s80, s11
+; GFX9-DPP-NEXT: s_addc_u32 s81, s81, 0
; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX9-DPP-NEXT: s_mov_b32 s43, s8
+; GFX9-DPP-NEXT: s_mov_b32 s51, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX9-DPP-NEXT: s_mov_b32 s42, s9
+; GFX9-DPP-NEXT: s_mov_b32 s50, s9
; GFX9-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX9-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX9-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX9-DPP-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
; GFX9-DPP-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
@@ -8318,17 +8318,17 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b32 s33, s10
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
-; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[52:53]
-; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
+; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-DPP-NEXT: s_mov_b32 s12, s43
-; GFX9-DPP-NEXT: s_mov_b32 s13, s42
+; GFX9-DPP-NEXT: s_mov_b32 s12, s51
+; GFX9-DPP-NEXT: s_mov_b32 s13, s50
; GFX9-DPP-NEXT: s_mov_b32 s14, s33
; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[54:55]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -8382,20 +8382,20 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v1, exec_hi, v1
; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX9-DPP-NEXT: v_readlane_b32 s45, v9, 63
-; GFX9-DPP-NEXT: v_readlane_b32 s44, v8, 63
+; GFX9-DPP-NEXT: v_readlane_b32 s53, v9, 63
+; GFX9-DPP-NEXT: v_readlane_b32 s52, v8, 63
; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1]
; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9-DPP-NEXT: s_cbranch_execz .LBB11_3
; GFX9-DPP-NEXT: ; %bb.1:
-; GFX9-DPP-NEXT: s_load_dwordx2 s[46:47], s[36:37], 0x24
-; GFX9-DPP-NEXT: s_mov_b64 s[48:49], 0
+; GFX9-DPP-NEXT: s_load_dwordx2 s[62:63], s[36:37], 0x24
+; GFX9-DPP-NEXT: s_mov_b64 s[64:65], 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[46:47]
+; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[62:63]
; GFX9-DPP-NEXT: .LBB11_2: ; %atomicrmw.start
; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX9-DPP-NEXT: v_max_f64 v[3:4], s[44:45], s[44:45]
+; GFX9-DPP-NEXT: v_max_f64 v[3:4], s[52:53], s[52:53]
; GFX9-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX9-DPP-NEXT: v_max_f64 v[5:6], v[1:2], v[1:2]
; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44
@@ -8404,54 +8404,54 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[52:53]
-; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[52:55], 0 offset:4
-; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[52:55], 0
-; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
+; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:4
+; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0
+; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-DPP-NEXT: v_min_f64 v[3:4], v[5:6], v[3:4]
-; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-DPP-NEXT: s_mov_b32 s12, s43
-; GFX9-DPP-NEXT: s_mov_b32 s13, s42
+; GFX9-DPP-NEXT: s_mov_b32 s12, s51
+; GFX9-DPP-NEXT: s_mov_b32 s13, s50
; GFX9-DPP-NEXT: s_mov_b32 s14, s33
; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[52:55], 0 offset:12
-; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[52:55], 0 offset:8
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[54:55]
+; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[80:83], 0 offset:12
+; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:8
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s46
-; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s47
+; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s62
+; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s63
; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[52:55], 0
-; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[52:55], 0 offset:4
+; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[80:83], 0
+; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[80:83], 0 offset:4
; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-DPP-NEXT: s_or_b64 s[48:49], vcc, s[48:49]
-; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[48:49]
+; GFX9-DPP-NEXT: s_or_b64 s[64:65], vcc, s[64:65]
+; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[64:65]
; GFX9-DPP-NEXT: s_cbranch_execnz .LBB11_2
; GFX9-DPP-NEXT: .LBB11_3:
; GFX9-DPP-NEXT: s_endpgm
;
; GFX1064-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_default_scope_unsafe:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s66, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s67, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s64, s64, s11
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-DPP-NEXT: s_mov_b32 s43, s8
+; GFX1064-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX1064-DPP-NEXT: s_mov_b32 s51, s8
; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1064-DPP-NEXT: s_mov_b32 s42, s9
+; GFX1064-DPP-NEXT: s_mov_b32 s50, s9
; GFX1064-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1064-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX1064-DPP-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
@@ -8460,17 +8460,17 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1064-DPP-NEXT: s_mov_b32 s33, s10
; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-DPP-NEXT: s_mov_b32 s12, s43
+; GFX1064-DPP-NEXT: s_mov_b32 s12, s51
; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1064-DPP-NEXT: s_mov_b32 s13, s50
; GFX1064-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1064-DPP-NEXT: s_movk_i32 s32, 0x800
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -8523,10 +8523,10 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064-DPP-NEXT: s_cbranch_execz .LBB11_3
; GFX1064-DPP-NEXT: ; %bb.1:
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24
-; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], 0
+; GFX1064-DPP-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24
+; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45]
+; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53]
; GFX1064-DPP-NEXT: .LBB11_2: ; %atomicrmw.start
; GFX1064-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1064-DPP-NEXT: v_max_f64 v[3:4], v[41:42], v[41:42]
@@ -8537,56 +8537,56 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX1064-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
+; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX1064-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40
; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s44
+; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s52
; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1064-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1064-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1064-DPP-NEXT: s_mov_b32 s13, s50
; GFX1064-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1064-DPP-NEXT: v_min_f64 v[3:4], v[5:6], v[3:4]
; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX1064-DPP-NEXT: v_mov_b32_e32 v6, 0
-; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX1064-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-DPP-NEXT: s_clause 0x1
-; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB11_2
; GFX1064-DPP-NEXT: .LBB11_3:
; GFX1064-DPP-NEXT: s_endpgm
;
; GFX1032-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_default_scope_unsafe:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s66, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s67, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s64, s64, s11
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-DPP-NEXT: s_mov_b32 s43, s8
+; GFX1032-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX1032-DPP-NEXT: s_mov_b32 s51, s8
; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1032-DPP-NEXT: s_mov_b32 s42, s9
+; GFX1032-DPP-NEXT: s_mov_b32 s50, s9
; GFX1032-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1032-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX1032-DPP-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
@@ -8595,17 +8595,17 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1032-DPP-NEXT: s_mov_b32 s33, s10
; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-DPP-NEXT: s_mov_b32 s12, s43
+; GFX1032-DPP-NEXT: s_mov_b32 s12, s51
; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1032-DPP-NEXT: s_mov_b32 s13, s50
; GFX1032-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -8646,15 +8646,15 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, v8
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, v9
-; GFX1032-DPP-NEXT: s_mov_b32 s46, 0
+; GFX1032-DPP-NEXT: s_mov_b32 s62, 0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB11_3
; GFX1032-DPP-NEXT: ; %bb.1:
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24
+; GFX1032-DPP-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24
; GFX1032-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4]
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45]
+; GFX1032-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53]
; GFX1032-DPP-NEXT: .LBB11_2: ; %atomicrmw.start
; GFX1032-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1032-DPP-NEXT: s_waitcnt vmcnt(0)
@@ -8664,38 +8664,38 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX1032-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
+; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX1032-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s44
+; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s52
; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX1032-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1032-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1032-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1032-DPP-NEXT: s_mov_b32 s13, s50
; GFX1032-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1032-DPP-NEXT: v_min_f64 v[3:4], v[3:4], v[41:42]
-; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-DPP-NEXT: s_clause 0x1
-; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-DPP-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s46
+; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB11_2
; GFX1032-DPP-NEXT: .LBB11_3:
; GFX1032-DPP-NEXT: s_endpgm
@@ -8703,11 +8703,11 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1164-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_default_scope_unsafe:
; GFX1164-DPP: ; %bb.0:
; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1164-DPP-NEXT: s_mov_b32 s43, s8
+; GFX1164-DPP-NEXT: s_mov_b32 s51, s8
; GFX1164-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1164-DPP-NEXT: s_mov_b32 s42, s9
+; GFX1164-DPP-NEXT: s_mov_b32 s50, s9
; GFX1164-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1164-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1164-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX1164-DPP-NEXT: s_add_u32 s0, s0, div.double.value at gotpcrel32@lo+4
; GFX1164-DPP-NEXT: s_addc_u32 s1, s1, div.double.value at gotpcrel32@hi+12
@@ -8715,15 +8715,15 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1164-DPP-NEXT: s_mov_b32 s33, s10
; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1164-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1164-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1164-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1164-DPP-NEXT: s_mov_b32 s13, s50
; GFX1164-DPP-NEXT: s_mov_b32 s14, s33
; GFX1164-DPP-NEXT: s_mov_b32 s32, 32
; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0
-; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -8787,11 +8787,11 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1
; GFX1164-DPP-NEXT: s_cbranch_execz .LBB11_3
; GFX1164-DPP-NEXT: ; %bb.1:
-; GFX1164-DPP-NEXT: s_load_b64 s[44:45], s[34:35], 0x24
+; GFX1164-DPP-NEXT: s_load_b64 s[52:53], s[34:35], 0x24
; GFX1164-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4]
-; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], 0
+; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[44:45]
+; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[52:53]
; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x1
; GFX1164-DPP-NEXT: .p2align 6
; GFX1164-DPP-NEXT: .LBB11_2: ; %atomicrmw.start
@@ -8809,18 +8809,18 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1164-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1164-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1164-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1164-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1164-DPP-NEXT: s_mov_b32 s13, s50
; GFX1164-DPP-NEXT: s_mov_b32 s14, s33
; GFX1164-DPP-NEXT: v_min_f64 v[3:4], v[3:4], v[41:42]
; GFX1164-DPP-NEXT: scratch_store_b64 off, v[1:2], off
; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s44
+; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s52
; GFX1164-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
@@ -8828,8 +8828,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[46:47]
+; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63]
; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB11_2
; GFX1164-DPP-NEXT: .LBB11_3:
; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2
@@ -8838,7 +8838,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1132-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_default_scope_unsafe:
; GFX1132-DPP: ; %bb.0:
; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1132-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1132-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1132-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1132-DPP-NEXT: s_addc_u32 s9, s35, 0
; GFX1132-DPP-NEXT: s_getpc_b64 s[0:1]
@@ -8847,9 +8847,9 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1132-DPP-NEXT: v_mov_b32_e32 v31, v0
; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1132-DPP-NEXT: s_mov_b32 s42, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s43, s13
-; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1132-DPP-NEXT: s_mov_b32 s50, s14
+; GFX1132-DPP-NEXT: s_mov_b32 s51, s13
+; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
@@ -8858,7 +8858,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1132-DPP-NEXT: s_mov_b32 s32, 32
; GFX1132-DPP-NEXT: s_mov_b32 s33, s15
; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0
-; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
@@ -8904,16 +8904,16 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1132-DPP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v3, v8
; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
; GFX1132-DPP-NEXT: v_mov_b32_e32 v4, v9
-; GFX1132-DPP-NEXT: s_mov_b32 s46, 0
+; GFX1132-DPP-NEXT: s_mov_b32 s62, 0
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1
; GFX1132-DPP-NEXT: s_cbranch_execz .LBB11_3
; GFX1132-DPP-NEXT: ; %bb.1:
-; GFX1132-DPP-NEXT: s_load_b64 s[44:45], s[34:35], 0x24
+; GFX1132-DPP-NEXT: s_load_b64 s[52:53], s[34:35], 0x24
; GFX1132-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[44:45]
+; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[52:53]
; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x1
; GFX1132-DPP-NEXT: .p2align 6
; GFX1132-DPP-NEXT: .LBB11_2: ; %atomicrmw.start
@@ -8929,26 +8929,26 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1132-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1132-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1132-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1132-DPP-NEXT: s_mov_b32 s13, s50
; GFX1132-DPP-NEXT: s_mov_b32 s14, s33
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX1132-DPP-NEXT: v_min_f64 v[3:4], v[3:4], v[41:42]
; GFX1132-DPP-NEXT: scratch_store_b64 off, v[1:2], off
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44
+; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s52
; GFX1132-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0
+; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s53 :: v_dual_mov_b32 v4, 0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-DPP-NEXT: scratch_load_b64 v[1:2], off, off
; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-DPP-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46
+; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB11_2
; GFX1132-DPP-NEXT: .LBB11_3:
; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll
index 7126680525b87..416ce5a031810 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll
@@ -425,12 +425,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX7LESS-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_align4_unsafe:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s38, -1
-; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s36, s36, s11
-; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0
+; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
+; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
; GFX7LESS-NEXT: s_mov_b32 s14, s10
; GFX7LESS-NEXT: s_mov_b32 s13, s9
; GFX7LESS-NEXT: s_mov_b32 s12, s8
@@ -448,8 +448,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -497,12 +497,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
;
; GFX9-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_align4_unsafe:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s11
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s11
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-NEXT: s_mov_b32 s12, s8
; GFX9-NEXT: s_add_u32 s8, s34, 44
@@ -518,9 +518,9 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -564,13 +564,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
;
; GFX1064-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_align4_unsafe:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s38, -1
-; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s36, s36, s11
+; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s50, -1
+; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s48, s48, s11
; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-NEXT: s_addc_u32 s49, s49, 0
; GFX1064-NEXT: s_mov_b32 s12, s8
; GFX1064-NEXT: s_add_u32 s8, s34, 44
; GFX1064-NEXT: s_mov_b32 s13, s9
@@ -586,8 +586,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1064-NEXT: s_mov_b32 s32, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -631,13 +631,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
;
; GFX1032-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_align4_unsafe:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s38, -1
-; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s36, s36, s11
+; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s50, -1
+; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s48, s48, s11
; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-NEXT: s_mov_b32 s12, s8
; GFX1032-NEXT: s_add_u32 s8, s34, 44
; GFX1032-NEXT: s_mov_b32 s13, s9
@@ -653,8 +653,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1032-NEXT: s_mov_b32 s32, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -821,19 +821,19 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX7LESS-DPP-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_align4_unsafe:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s42, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s43, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s40, s40, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s41, s41, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5]
@@ -846,11 +846,11 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[40:41]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[42:43]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[36:39], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0
; GFX7LESS-DPP-NEXT: .LBB1_1: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -859,7 +859,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v2
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v1
-; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc
+; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v2
; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
@@ -871,12 +871,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
;
; GFX9-DPP-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_align4_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s38, -1
-; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11
-; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s50, -1
+; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-DPP-NEXT: s_mov_b32 s12, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44
@@ -892,9 +892,9 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -953,13 +953,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
;
; GFX1064-DPP-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_align4_unsafe:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s38, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX1064-DPP-NEXT: s_mov_b32 s12, s8
; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1064-DPP-NEXT: s_mov_b32 s13, s9
@@ -975,8 +975,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -1030,13 +1030,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
;
; GFX1032-DPP-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_align4_unsafe:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s38, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-DPP-NEXT: s_mov_b32 s12, s8
; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1032-DPP-NEXT: s_mov_b32 s13, s9
@@ -1052,8 +1052,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -1757,12 +1757,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
; GFX7LESS-LABEL: global_atomic_fsub_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s38, -1
-; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s36, s36, s11
-; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0
+; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
+; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
; GFX7LESS-NEXT: s_mov_b32 s14, s10
; GFX7LESS-NEXT: s_mov_b32 s13, s9
; GFX7LESS-NEXT: s_mov_b32 s12, s8
@@ -1780,8 +1780,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -1829,12 +1829,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
;
; GFX9-LABEL: global_atomic_fsub_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s11
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s11
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-NEXT: s_mov_b32 s12, s8
; GFX9-NEXT: s_add_u32 s8, s34, 44
@@ -1850,9 +1850,9 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -1896,13 +1896,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
;
; GFX1064-LABEL: global_atomic_fsub_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s38, -1
-; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s36, s36, s11
+; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s50, -1
+; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s48, s48, s11
; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-NEXT: s_addc_u32 s49, s49, 0
; GFX1064-NEXT: s_mov_b32 s12, s8
; GFX1064-NEXT: s_add_u32 s8, s34, 44
; GFX1064-NEXT: s_mov_b32 s13, s9
@@ -1918,8 +1918,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1064-NEXT: s_mov_b32 s32, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -1963,13 +1963,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
;
; GFX1032-LABEL: global_atomic_fsub_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s38, -1
-; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s36, s36, s11
+; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s50, -1
+; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s48, s48, s11
; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-NEXT: s_mov_b32 s12, s8
; GFX1032-NEXT: s_add_u32 s8, s34, 44
; GFX1032-NEXT: s_mov_b32 s13, s9
@@ -1985,8 +1985,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1032-NEXT: s_mov_b32 s32, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -2153,19 +2153,19 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
; GFX7LESS-DPP-LABEL: global_atomic_fsub_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s42, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s43, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s40, s40, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s41, s41, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5]
@@ -2178,11 +2178,11 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[40:41]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[42:43]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[36:39], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0
; GFX7LESS-DPP-NEXT: .LBB3_1: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -2191,7 +2191,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v2
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v1
-; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc
+; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v2
; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
@@ -2203,12 +2203,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
;
; GFX9-DPP-LABEL: global_atomic_fsub_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s38, -1
-; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11
-; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s50, -1
+; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-DPP-NEXT: s_mov_b32 s12, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44
@@ -2224,9 +2224,9 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -2285,13 +2285,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
;
; GFX1064-DPP-LABEL: global_atomic_fsub_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s38, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX1064-DPP-NEXT: s_mov_b32 s12, s8
; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1064-DPP-NEXT: s_mov_b32 s13, s9
@@ -2307,8 +2307,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -2362,13 +2362,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
;
; GFX1032-DPP-LABEL: global_atomic_fsub_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s38, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-DPP-NEXT: s_mov_b32 s12, s8
; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1032-DPP-NEXT: s_mov_b32 s13, s9
@@ -2384,8 +2384,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -3089,12 +3089,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX7LESS-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s38, -1
-; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s36, s36, s11
-; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0
+; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
+; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
; GFX7LESS-NEXT: s_mov_b32 s14, s10
; GFX7LESS-NEXT: s_mov_b32 s13, s9
; GFX7LESS-NEXT: s_mov_b32 s12, s8
@@ -3112,8 +3112,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -3161,12 +3161,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
;
; GFX9-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s11
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s11
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-NEXT: s_mov_b32 s12, s8
; GFX9-NEXT: s_add_u32 s8, s34, 44
@@ -3182,9 +3182,9 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -3228,13 +3228,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
;
; GFX1064-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s38, -1
-; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s36, s36, s11
+; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s50, -1
+; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s48, s48, s11
; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-NEXT: s_addc_u32 s49, s49, 0
; GFX1064-NEXT: s_mov_b32 s12, s8
; GFX1064-NEXT: s_add_u32 s8, s34, 44
; GFX1064-NEXT: s_mov_b32 s13, s9
@@ -3250,8 +3250,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1064-NEXT: s_mov_b32 s32, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -3295,13 +3295,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
;
; GFX1032-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s38, -1
-; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s36, s36, s11
+; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s50, -1
+; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s48, s48, s11
; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-NEXT: s_mov_b32 s12, s8
; GFX1032-NEXT: s_add_u32 s8, s34, 44
; GFX1032-NEXT: s_mov_b32 s13, s9
@@ -3317,8 +3317,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1032-NEXT: s_mov_b32 s32, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -3485,19 +3485,19 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX7LESS-DPP-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s42, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s43, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s40, s40, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s41, s41, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5]
@@ -3510,11 +3510,11 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[40:41]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[42:43]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[36:39], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0
; GFX7LESS-DPP-NEXT: .LBB5_1: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -3523,7 +3523,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v2
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v1
-; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc
+; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v2
; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
@@ -3535,12 +3535,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
;
; GFX9-DPP-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s38, -1
-; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11
-; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s50, -1
+; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-DPP-NEXT: s_mov_b32 s12, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44
@@ -3556,9 +3556,9 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -3617,13 +3617,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
;
; GFX1064-DPP-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s38, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX1064-DPP-NEXT: s_mov_b32 s12, s8
; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1064-DPP-NEXT: s_mov_b32 s13, s9
@@ -3639,8 +3639,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -3694,13 +3694,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
;
; GFX1032-DPP-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s38, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-DPP-NEXT: s_mov_b32 s12, s8
; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1032-DPP-NEXT: s_mov_b32 s13, s9
@@ -3716,8 +3716,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -3917,12 +3917,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX7LESS-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s38, -1
-; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s36, s36, s11
-; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0
+; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
+; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
; GFX7LESS-NEXT: s_mov_b32 s14, s10
; GFX7LESS-NEXT: s_mov_b32 s13, s9
; GFX7LESS-NEXT: s_mov_b32 s12, s8
@@ -3940,8 +3940,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -3989,12 +3989,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
;
; GFX9-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s11
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s11
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-NEXT: s_mov_b32 s12, s8
; GFX9-NEXT: s_add_u32 s8, s34, 44
@@ -4010,9 +4010,9 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -4056,13 +4056,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
;
; GFX1064-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s38, -1
-; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s36, s36, s11
+; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s50, -1
+; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s48, s48, s11
; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-NEXT: s_addc_u32 s49, s49, 0
; GFX1064-NEXT: s_mov_b32 s12, s8
; GFX1064-NEXT: s_add_u32 s8, s34, 44
; GFX1064-NEXT: s_mov_b32 s13, s9
@@ -4078,8 +4078,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1064-NEXT: s_mov_b32 s32, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -4123,13 +4123,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
;
; GFX1032-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s38, -1
-; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s36, s36, s11
+; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s50, -1
+; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s48, s48, s11
; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-NEXT: s_mov_b32 s12, s8
; GFX1032-NEXT: s_add_u32 s8, s34, 44
; GFX1032-NEXT: s_mov_b32 s13, s9
@@ -4145,8 +4145,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1032-NEXT: s_mov_b32 s32, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -4313,19 +4313,19 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX7LESS-DPP-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s42, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s43, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s40, s40, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s41, s41, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5]
@@ -4338,11 +4338,11 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[40:41]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[42:43]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[36:39], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0
; GFX7LESS-DPP-NEXT: .LBB6_1: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -4351,7 +4351,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v2
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v1
-; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc
+; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v2
; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
@@ -4363,12 +4363,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
;
; GFX9-DPP-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s38, -1
-; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11
-; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s50, -1
+; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-DPP-NEXT: s_mov_b32 s12, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44
@@ -4384,9 +4384,9 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -4445,13 +4445,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
;
; GFX1064-DPP-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s38, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX1064-DPP-NEXT: s_mov_b32 s12, s8
; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1064-DPP-NEXT: s_mov_b32 s13, s9
@@ -4467,8 +4467,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -4522,13 +4522,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
;
; GFX1032-DPP-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s38, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-DPP-NEXT: s_mov_b32 s12, s8
; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1032-DPP-NEXT: s_mov_b32 s13, s9
@@ -4544,8 +4544,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -5248,12 +5248,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
; GFX7LESS-LABEL: global_atomic_fsub_uni_address_div_value_default_scope_strictfp:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s38, -1
-; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s36, s36, s11
-; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0
+; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
+; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
; GFX7LESS-NEXT: s_mov_b32 s14, s10
; GFX7LESS-NEXT: s_mov_b32 s13, s9
; GFX7LESS-NEXT: s_mov_b32 s12, s8
@@ -5271,8 +5271,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -5320,12 +5320,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
;
; GFX9-LABEL: global_atomic_fsub_uni_address_div_value_default_scope_strictfp:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s11
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s11
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-NEXT: s_mov_b32 s12, s8
; GFX9-NEXT: s_add_u32 s8, s34, 44
@@ -5341,9 +5341,9 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -5387,13 +5387,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
;
; GFX1064-LABEL: global_atomic_fsub_uni_address_div_value_default_scope_strictfp:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s38, -1
-; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s36, s36, s11
+; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s50, -1
+; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s48, s48, s11
; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-NEXT: s_addc_u32 s49, s49, 0
; GFX1064-NEXT: s_mov_b32 s12, s8
; GFX1064-NEXT: s_add_u32 s8, s34, 44
; GFX1064-NEXT: s_mov_b32 s13, s9
@@ -5409,8 +5409,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1064-NEXT: s_mov_b32 s32, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -5454,16 +5454,16 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
;
; GFX1032-LABEL: global_atomic_fsub_uni_address_div_value_default_scope_strictfp:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s38, -1
-; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s36, s36, s11
-; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_addc_u32 s37, s37, 0
-; GFX1032-NEXT: s_mov_b32 s12, s8
-; GFX1032-NEXT: s_add_u32 s8, s34, 44
-; GFX1032-NEXT: s_mov_b32 s13, s9
+; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s50, -1
+; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
+; GFX1032-NEXT: s_addc_u32 s49, s49, 0
+; GFX1032-NEXT: s_mov_b32 s12, s8
+; GFX1032-NEXT: s_add_u32 s8, s34, 44
+; GFX1032-NEXT: s_mov_b32 s13, s9
; GFX1032-NEXT: s_addc_u32 s9, s35, 0
; GFX1032-NEXT: s_getpc_b64 s[4:5]
; GFX1032-NEXT: s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
@@ -5476,8 +5476,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1032-NEXT: s_mov_b32 s32, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -5644,19 +5644,19 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
; GFX7LESS-DPP-LABEL: global_atomic_fsub_uni_address_div_value_default_scope_strictfp:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s42, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s43, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s40, s40, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s41, s41, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5]
@@ -5669,11 +5669,11 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[40:41]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[42:43]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[36:39], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0
; GFX7LESS-DPP-NEXT: .LBB8_1: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -5682,7 +5682,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v2
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v1
-; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc
+; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v2
; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
@@ -5694,12 +5694,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
;
; GFX9-DPP-LABEL: global_atomic_fsub_uni_address_div_value_default_scope_strictfp:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s38, -1
-; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11
-; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s50, -1
+; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-DPP-NEXT: s_mov_b32 s12, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44
@@ -5715,9 +5715,9 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -5776,13 +5776,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
;
; GFX1064-DPP-LABEL: global_atomic_fsub_uni_address_div_value_default_scope_strictfp:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s38, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX1064-DPP-NEXT: s_mov_b32 s12, s8
; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1064-DPP-NEXT: s_mov_b32 s13, s9
@@ -5798,8 +5798,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -5853,13 +5853,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
;
; GFX1032-DPP-LABEL: global_atomic_fsub_uni_address_div_value_default_scope_strictfp:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s38, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-DPP-NEXT: s_mov_b32 s12, s8
; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1032-DPP-NEXT: s_mov_b32 s13, s9
@@ -5875,8 +5875,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -6075,14 +6075,14 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX7LESS-LABEL: global_atomic_fsub_double_uni_address_uni_value_agent_scope_unsafe:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
-; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
-; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX7LESS-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-NEXT: s_mov_b32 s67, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s64, s64, s11
+; GFX7LESS-NEXT: s_addc_u32 s65, s65, 0
+; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v3, s0, 0
; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v3, s1, v3
@@ -6091,16 +6091,16 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX7LESS-NEXT: s_cbranch_execz .LBB9_3
; GFX7LESS-NEXT: ; %bb.1:
; GFX7LESS-NEXT: s_mov_b32 s33, s10
-; GFX7LESS-NEXT: s_mov_b32 s42, s9
-; GFX7LESS-NEXT: s_mov_b32 s43, s8
+; GFX7LESS-NEXT: s_mov_b32 s50, s9
+; GFX7LESS-NEXT: s_mov_b32 s51, s8
; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x9
+; GFX7LESS-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9
; GFX7LESS-NEXT: s_bcnt1_i32_b64 s2, s[0:1]
-; GFX7LESS-NEXT: s_mov_b64 s[46:47], 0
+; GFX7LESS-NEXT: s_mov_b64 s[62:63], 0
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v3, 20, v2
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
+; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v4, 10, v1
; GFX7LESS-NEXT: v_cvt_f64_u32_e32 v[1:2], s2
; GFX7LESS-NEXT: v_or_b32_e32 v4, v0, v4
@@ -6113,11 +6113,11 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7LESS-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-NEXT: v_add_f64 v[2:3], v[0:1], -v[41:42]
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0
+; GFX7LESS-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:4
+; GFX7LESS-NEXT: buffer_store_dword v0, off, s[64:67], 0
; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:12
-; GFX7LESS-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:8
+; GFX7LESS-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:12
+; GFX7LESS-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:8
; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0
; GFX7LESS-NEXT: s_getpc_b64 s[0:1]
; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
@@ -6130,64 +6130,64 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX7LESS-NEXT: v_mov_b32_e32 v5, 8
; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-NEXT: s_mov_b32 s12, s43
-; GFX7LESS-NEXT: s_mov_b32 s13, s42
+; GFX7LESS-NEXT: s_mov_b32 s12, s51
+; GFX7LESS-NEXT: s_mov_b32 s13, s50
; GFX7LESS-NEXT: s_mov_b32 s14, s33
; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX7LESS-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-NEXT: v_mov_b32_e32 v2, s44
-; GFX7LESS-NEXT: v_mov_b32_e32 v3, s45
+; GFX7LESS-NEXT: v_mov_b32_e32 v2, s52
+; GFX7LESS-NEXT: v_mov_b32_e32 v3, s53
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-NEXT: buffer_load_dword v0, off, s[48:51], 0
-; GFX7LESS-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4
+; GFX7LESS-NEXT: buffer_load_dword v0, off, s[64:67], 0
+; GFX7LESS-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX7LESS-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX7LESS-NEXT: s_cbranch_execnz .LBB9_2
; GFX7LESS-NEXT: .LBB9_3:
; GFX7LESS-NEXT: s_endpgm
;
; GFX9-LABEL: global_atomic_fsub_double_uni_address_uni_value_agent_scope_unsafe:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX9-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s66, -1
+; GFX9-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX9-NEXT: s_mov_b64 s[0:1], exec
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_mov_b32 s67, 0xe00000
; GFX9-NEXT: v_mbcnt_lo_u32_b32 v3, s0, 0
-; GFX9-NEXT: s_add_u32 s48, s48, s11
+; GFX9-NEXT: s_add_u32 s64, s64, s11
; GFX9-NEXT: v_mbcnt_hi_u32_b32 v3, s1, v3
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
-; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX9-NEXT: s_addc_u32 s65, s65, 0
+; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
; GFX9-NEXT: s_movk_i32 s32, 0x800
; GFX9-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX9-NEXT: s_cbranch_execz .LBB9_3
; GFX9-NEXT: ; %bb.1:
; GFX9-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
-; GFX9-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24
+; GFX9-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24
; GFX9-NEXT: v_cvt_f64_u32_e32 v[3:4], s0
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b32 s33, s10
; GFX9-NEXT: v_mul_f64 v[41:42], v[3:4], 4.0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
-; GFX9-NEXT: s_mov_b32 s42, s9
-; GFX9-NEXT: s_mov_b32 s43, s8
+; GFX9-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
+; GFX9-NEXT: s_mov_b32 s50, s9
+; GFX9-NEXT: s_mov_b32 s51, s8
; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX9-NEXT: s_mov_b64 s[36:37], s[4:5]
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v4, s1
-; GFX9-NEXT: s_mov_b64 s[46:47], 0
+; GFX9-NEXT: s_mov_b64 s[62:63], 0
; GFX9-NEXT: v_mov_b32_e32 v3, s0
; GFX9-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX9-NEXT: .LBB9_2: ; %atomicrmw.start
@@ -6200,68 +6200,68 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX9-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
; GFX9-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:4
-; GFX9-NEXT: buffer_store_dword v3, off, s[48:51], 0
-; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX9-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
-; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX9-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:4
+; GFX9-NEXT: buffer_store_dword v3, off, s[64:67], 0
+; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX9-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12
+; GFX9-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8
+; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-NEXT: s_mov_b32 s12, s43
-; GFX9-NEXT: s_mov_b32 s13, s42
+; GFX9-NEXT: s_mov_b32 s12, s51
+; GFX9-NEXT: s_mov_b32 s13, s50
; GFX9-NEXT: s_mov_b32 s14, s33
; GFX9-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX9-NEXT: v_mov_b32_e32 v0, 8
; GFX9-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-NEXT: v_mov_b32_e32 v2, s44
-; GFX9-NEXT: v_mov_b32_e32 v3, s45
+; GFX9-NEXT: v_mov_b32_e32 v2, s52
+; GFX9-NEXT: v_mov_b32_e32 v3, s53
; GFX9-NEXT: v_mov_b32_e32 v4, 0
; GFX9-NEXT: v_mov_b32_e32 v5, 8
; GFX9-NEXT: v_mov_b32_e32 v6, 0
; GFX9-NEXT: v_mov_b32_e32 v7, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX9-NEXT: buffer_load_dword v3, off, s[48:51], 0
-; GFX9-NEXT: buffer_load_dword v4, off, s[48:51], 0 offset:4
+; GFX9-NEXT: buffer_load_dword v3, off, s[64:67], 0
+; GFX9-NEXT: buffer_load_dword v4, off, s[64:67], 0 offset:4
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX9-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX9-NEXT: s_cbranch_execnz .LBB9_2
; GFX9-NEXT: .LBB9_3:
; GFX9-NEXT: s_endpgm
;
; GFX1064-LABEL: global_atomic_fsub_double_uni_address_uni_value_agent_scope_unsafe:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s50, -1
-; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s66, -1
+; GFX1064-NEXT: s_mov_b32 s67, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s64, s64, s11
; GFX1064-NEXT: s_mov_b32 s33, s10
; GFX1064-NEXT: s_mov_b64 s[10:11], exec
-; GFX1064-NEXT: s_addc_u32 s49, s49, 0
+; GFX1064-NEXT: s_addc_u32 s65, s65, 0
; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v3, s10, 0
-; GFX1064-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1064-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1064-NEXT: s_movk_i32 s32, 0x800
; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v3, s11, v3
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
; GFX1064-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064-NEXT: s_cbranch_execz .LBB9_3
; GFX1064-NEXT: ; %bb.1:
-; GFX1064-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24
+; GFX1064-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24
; GFX1064-NEXT: s_bcnt1_i32_b64 s0, s[10:11]
-; GFX1064-NEXT: s_mov_b32 s42, s9
+; GFX1064-NEXT: s_mov_b32 s50, s9
; GFX1064-NEXT: v_cvt_f64_u32_e32 v[3:4], s0
-; GFX1064-NEXT: s_mov_b32 s43, s8
+; GFX1064-NEXT: s_mov_b32 s51, s8
; GFX1064-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1064-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX1064-NEXT: s_mov_b64 s[46:47], 0
+; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1064-NEXT: s_mov_b64 s[62:63], 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
+; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1064-NEXT: v_mul_f64 v[41:42], v[3:4], 4.0
; GFX1064-NEXT: v_lshlrev_b32_e32 v3, 20, v2
; GFX1064-NEXT: v_lshlrev_b32_e32 v4, 10, v1
@@ -6278,69 +6278,69 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX1064-NEXT: s_getpc_b64 s[0:1]
; GFX1064-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1064-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-NEXT: buffer_store_dword v1, off, s[48:51], 0
+; GFX1064-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX1064-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX1064-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1064-NEXT: v_mov_b32_e32 v31, v40
; GFX1064-NEXT: v_mov_b32_e32 v0, 8
; GFX1064-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-NEXT: v_mov_b32_e32 v2, s44
+; GFX1064-NEXT: v_mov_b32_e32 v2, s52
; GFX1064-NEXT: v_mov_b32_e32 v5, 8
; GFX1064-NEXT: v_mov_b32_e32 v6, 0
; GFX1064-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1064-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1064-NEXT: s_mov_b32 s12, s43
-; GFX1064-NEXT: s_mov_b32 s13, s42
+; GFX1064-NEXT: s_mov_b32 s12, s51
+; GFX1064-NEXT: s_mov_b32 s13, s50
; GFX1064-NEXT: s_mov_b32 s14, s33
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-NEXT: v_mov_b32_e32 v3, s45
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX1064-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX1064-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX1064-NEXT: v_mov_b32_e32 v3, s53
; GFX1064-NEXT: v_mov_b32_e32 v4, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-NEXT: s_clause 0x1
-; GFX1064-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX1064-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX1064-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1064-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX1064-NEXT: s_cbranch_execnz .LBB9_2
; GFX1064-NEXT: .LBB9_3:
; GFX1064-NEXT: s_endpgm
;
; GFX1032-LABEL: global_atomic_fsub_double_uni_address_uni_value_agent_scope_unsafe:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s42, s9
+; GFX1032-NEXT: s_mov_b32 s50, s9
; GFX1032-NEXT: s_mov_b32 s9, exec_lo
-; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, s9, 0
-; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s50, -1
-; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s66, -1
+; GFX1032-NEXT: s_mov_b32 s67, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s64, s64, s11
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
-; GFX1032-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-NEXT: s_mov_b64 s[40:41], s[0:1]
-; GFX1032-NEXT: s_mov_b32 s46, 0
+; GFX1032-NEXT: s_addc_u32 s65, s65, 0
+; GFX1032-NEXT: s_mov_b64 s[48:49], s[0:1]
+; GFX1032-NEXT: s_mov_b32 s62, 0
; GFX1032-NEXT: s_movk_i32 s32, 0x400
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-NEXT: s_cbranch_execz .LBB9_3
; GFX1032-NEXT: ; %bb.1:
-; GFX1032-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24
+; GFX1032-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24
; GFX1032-NEXT: s_bcnt1_i32_b32 s0, s9
; GFX1032-NEXT: s_mov_b32 s33, s10
; GFX1032-NEXT: v_cvt_f64_u32_e32 v[3:4], s0
-; GFX1032-NEXT: s_mov_b32 s43, s8
+; GFX1032-NEXT: s_mov_b32 s51, s8
; GFX1032-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1032-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
+; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1032-NEXT: v_mul_f64 v[41:42], v[3:4], 4.0
; GFX1032-NEXT: v_lshlrev_b32_e32 v3, 20, v2
; GFX1032-NEXT: v_lshlrev_b32_e32 v4, 10, v1
@@ -6357,37 +6357,37 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX1032-NEXT: s_getpc_b64 s[0:1]
; GFX1032-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1032-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-NEXT: buffer_store_dword v1, off, s[48:51], 0
+; GFX1032-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX1032-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX1032-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1032-NEXT: v_mov_b32_e32 v31, v40
; GFX1032-NEXT: v_mov_b32_e32 v0, 8
; GFX1032-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-NEXT: v_mov_b32_e32 v2, s44
+; GFX1032-NEXT: v_mov_b32_e32 v2, s52
; GFX1032-NEXT: v_mov_b32_e32 v5, 8
; GFX1032-NEXT: v_mov_b32_e32 v6, 0
; GFX1032-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1032-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1032-NEXT: s_mov_b32 s12, s43
-; GFX1032-NEXT: s_mov_b32 s13, s42
+; GFX1032-NEXT: s_mov_b32 s12, s51
+; GFX1032-NEXT: s_mov_b32 s13, s50
; GFX1032-NEXT: s_mov_b32 s14, s33
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1032-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1032-NEXT: v_mov_b32_e32 v3, s45
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX1032-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX1032-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX1032-NEXT: v_mov_b32_e32 v3, s53
; GFX1032-NEXT: v_mov_b32_e32 v4, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-NEXT: s_clause 0x1
-; GFX1032-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1032-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX1032-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX1032-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s46
+; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
; GFX1032-NEXT: s_cbranch_execnz .LBB9_2
; GFX1032-NEXT: .LBB9_3:
; GFX1032-NEXT: s_endpgm
@@ -6398,7 +6398,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX1164-NEXT: s_mov_b64 s[10:11], exec
; GFX1164-NEXT: v_mov_b32_e32 v40, v0
; GFX1164-NEXT: v_mbcnt_lo_u32_b32 v0, s10, 0
-; GFX1164-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1164-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1164-NEXT: s_mov_b32 s32, 32
; GFX1164-NEXT: s_mov_b64 s[0:1], exec
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
@@ -6407,16 +6407,16 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX1164-NEXT: s_cbranch_execz .LBB9_3
; GFX1164-NEXT: ; %bb.1:
; GFX1164-NEXT: s_bcnt1_i32_b64 s0, s[10:11]
-; GFX1164-NEXT: s_load_b64 s[44:45], s[4:5], 0x24
+; GFX1164-NEXT: s_load_b64 s[52:53], s[4:5], 0x24
; GFX1164-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
-; GFX1164-NEXT: s_mov_b32 s42, s9
-; GFX1164-NEXT: s_mov_b32 s43, s8
+; GFX1164-NEXT: s_mov_b32 s50, s9
+; GFX1164-NEXT: s_mov_b32 s51, s8
; GFX1164-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1164-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX1164-NEXT: s_mov_b64 s[46:47], 0
+; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1164-NEXT: s_mov_b64 s[62:63], 0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: s_load_b64 s[0:1], s[44:45], 0x0
+; GFX1164-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164-NEXT: v_mul_f64 v[41:42], v[0:1], 4.0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
@@ -6439,18 +6439,18 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX1164-NEXT: v_mov_b32_e32 v31, v40
; GFX1164-NEXT: v_mov_b32_e32 v0, 8
; GFX1164-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-NEXT: v_mov_b32_e32 v2, s44
+; GFX1164-NEXT: v_mov_b32_e32 v2, s52
; GFX1164-NEXT: v_mov_b32_e32 v5, 8
; GFX1164-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1164-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1164-NEXT: s_mov_b32 s12, s43
-; GFX1164-NEXT: s_mov_b32 s13, s42
+; GFX1164-NEXT: s_mov_b32 s12, s51
+; GFX1164-NEXT: s_mov_b32 s13, s50
; GFX1164-NEXT: s_mov_b32 s14, s33
; GFX1164-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1164-NEXT: v_mov_b32_e32 v3, s45
+; GFX1164-NEXT: v_mov_b32_e32 v3, s53
; GFX1164-NEXT: v_mov_b32_e32 v4, 0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1]
@@ -6458,8 +6458,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[46:47]
+; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63]
; GFX1164-NEXT: s_cbranch_execnz .LBB9_2
; GFX1164-NEXT: .LBB9_3:
; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2
@@ -6470,24 +6470,24 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX1132-NEXT: s_mov_b32 s8, exec_lo
; GFX1132-NEXT: v_mov_b32_e32 v40, v0
; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, s8, 0
-; GFX1132-NEXT: s_mov_b64 s[40:41], s[0:1]
-; GFX1132-NEXT: s_mov_b32 s46, 0
+; GFX1132-NEXT: s_mov_b64 s[48:49], s[0:1]
+; GFX1132-NEXT: s_mov_b32 s62, 0
; GFX1132-NEXT: s_mov_b32 s32, 32
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-NEXT: s_cbranch_execz .LBB9_3
; GFX1132-NEXT: ; %bb.1:
; GFX1132-NEXT: s_bcnt1_i32_b32 s0, s8
-; GFX1132-NEXT: s_load_b64 s[44:45], s[4:5], 0x24
+; GFX1132-NEXT: s_load_b64 s[52:53], s[4:5], 0x24
; GFX1132-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
; GFX1132-NEXT: s_mov_b32 s33, s15
-; GFX1132-NEXT: s_mov_b32 s42, s14
-; GFX1132-NEXT: s_mov_b32 s43, s13
+; GFX1132-NEXT: s_mov_b32 s50, s14
+; GFX1132-NEXT: s_mov_b32 s51, s13
; GFX1132-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1132-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_load_b64 s[0:1], s[44:45], 0x0
+; GFX1132-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-NEXT: v_mul_f64 v[41:42], v[0:1], 4.0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
@@ -6507,25 +6507,25 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX1132-NEXT: scratch_store_b64 off, v[1:2], off
; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1132-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8
-; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44
+; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s52
; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
; GFX1132-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1132-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1132-NEXT: s_mov_b32 s12, s43
-; GFX1132-NEXT: s_mov_b32 s13, s42
+; GFX1132-NEXT: s_mov_b32 s12, s51
+; GFX1132-NEXT: s_mov_b32 s13, s50
; GFX1132-NEXT: s_mov_b32 s14, s33
; GFX1132-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1132-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0
+; GFX1132-NEXT: v_dual_mov_b32 v3, s53 :: v_dual_mov_b32 v4, 0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-NEXT: scratch_load_b64 v[1:2], off, off
; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46
+; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
; GFX1132-NEXT: s_cbranch_execnz .LBB9_2
; GFX1132-NEXT: .LBB9_3:
; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2
@@ -6534,14 +6534,14 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX7LESS-DPP-LABEL: global_atomic_fsub_double_uni_address_uni_value_agent_scope_unsafe:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], exec
; GFX7LESS-DPP-NEXT: v_mbcnt_lo_u32_b32_e64 v3, s0, 0
; GFX7LESS-DPP-NEXT: v_mbcnt_hi_u32_b32_e32 v3, s1, v3
@@ -6550,16 +6550,16 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX7LESS-DPP-NEXT: s_cbranch_execz .LBB9_3
; GFX7LESS-DPP-NEXT: ; %bb.1:
; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s10
-; GFX7LESS-DPP-NEXT: s_mov_b32 s42, s9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s43, s8
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, s9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9
; GFX7LESS-DPP-NEXT: s_bcnt1_i32_b64 s2, s[0:1]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], 0
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[62:63], 0
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1
; GFX7LESS-DPP-NEXT: v_cvt_f64_u32_e32 v[1:2], s2
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v4, v0, v4
@@ -6572,11 +6572,11 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_add_f64 v[2:3], v[0:1], -v[41:42]
-; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0
+; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:4
+; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[64:67], 0
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:8
+; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:12
+; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:8
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
@@ -6589,64 +6589,64 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s43
-; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s42
+; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51
+; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s44
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s52
+; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-DPP-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[48:51], 0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4
+; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[64:67], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX7LESS-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB9_2
; GFX7LESS-DPP-NEXT: .LBB9_3:
; GFX7LESS-DPP-NEXT: s_endpgm
;
; GFX9-DPP-LABEL: global_atomic_fsub_double_uni_address_uni_value_agent_scope_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s50, -1
-; GFX9-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX9-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s66, -1
+; GFX9-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX9-DPP-NEXT: s_mov_b64 s[0:1], exec
-; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-DPP-NEXT: s_mov_b32 s67, 0xe00000
; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, s0, 0
-; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX9-DPP-NEXT: s_add_u32 s64, s64, s11
; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v3, s1, v3
-; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX9-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800
; GFX9-DPP-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX9-DPP-NEXT: s_cbranch_execz .LBB9_3
; GFX9-DPP-NEXT: ; %bb.1:
; GFX9-DPP-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
-; GFX9-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24
+; GFX9-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24
; GFX9-DPP-NEXT: v_cvt_f64_u32_e32 v[3:4], s0
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b32 s33, s10
; GFX9-DPP-NEXT: v_mul_f64 v[41:42], v[3:4], 4.0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
-; GFX9-DPP-NEXT: s_mov_b32 s42, s9
-; GFX9-DPP-NEXT: s_mov_b32 s43, s8
+; GFX9-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
+; GFX9-DPP-NEXT: s_mov_b32 s50, s9
+; GFX9-DPP-NEXT: s_mov_b32 s51, s8
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: v_mov_b32_e32 v4, s1
-; GFX9-DPP-NEXT: s_mov_b64 s[46:47], 0
+; GFX9-DPP-NEXT: s_mov_b64 s[62:63], 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s0
; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX9-DPP-NEXT: .LBB9_2: ; %atomicrmw.start
@@ -6659,68 +6659,68 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:4
-; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0
-; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12
-; GFX9-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8
-; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:4
+; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0
+; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12
+; GFX9-DPP-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8
+; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-DPP-NEXT: s_mov_b32 s12, s43
-; GFX9-DPP-NEXT: s_mov_b32 s13, s42
+; GFX9-DPP-NEXT: s_mov_b32 s12, s51
+; GFX9-DPP-NEXT: s_mov_b32 s13, s50
; GFX9-DPP-NEXT: s_mov_b32 s14, s33
; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s44
-; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s52
+; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX9-DPP-NEXT: buffer_load_dword v3, off, s[48:51], 0
-; GFX9-DPP-NEXT: buffer_load_dword v4, off, s[48:51], 0 offset:4
+; GFX9-DPP-NEXT: buffer_load_dword v3, off, s[64:67], 0
+; GFX9-DPP-NEXT: buffer_load_dword v4, off, s[64:67], 0 offset:4
; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX9-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX9-DPP-NEXT: s_cbranch_execnz .LBB9_2
; GFX9-DPP-NEXT: .LBB9_3:
; GFX9-DPP-NEXT: s_endpgm
;
; GFX1064-DPP-LABEL: global_atomic_fsub_double_uni_address_uni_value_agent_scope_unsafe:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s66, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s67, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s64, s64, s11
; GFX1064-DPP-NEXT: s_mov_b32 s33, s10
; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], exec
-; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX1064-DPP-NEXT: s_addc_u32 s65, s65, 0
; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, s10, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1064-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1064-DPP-NEXT: s_movk_i32 s32, 0x800
; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v3, s11, v3
; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064-DPP-NEXT: s_cbranch_execz .LBB9_3
; GFX1064-DPP-NEXT: ; %bb.1:
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24
+; GFX1064-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24
; GFX1064-DPP-NEXT: s_bcnt1_i32_b64 s0, s[10:11]
-; GFX1064-DPP-NEXT: s_mov_b32 s42, s9
+; GFX1064-DPP-NEXT: s_mov_b32 s50, s9
; GFX1064-DPP-NEXT: v_cvt_f64_u32_e32 v[3:4], s0
-; GFX1064-DPP-NEXT: s_mov_b32 s43, s8
+; GFX1064-DPP-NEXT: s_mov_b32 s51, s8
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], 0
+; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
+; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1064-DPP-NEXT: v_mul_f64 v[41:42], v[3:4], 4.0
; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2
; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1
@@ -6737,69 +6737,69 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX1064-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
+; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX1064-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40
; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s44
+; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s52
; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX1064-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1064-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1064-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1064-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1064-DPP-NEXT: s_mov_b32 s13, s50
; GFX1064-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX1064-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-DPP-NEXT: s_clause 0x1
-; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB9_2
; GFX1064-DPP-NEXT: .LBB9_3:
; GFX1064-DPP-NEXT: s_endpgm
;
; GFX1032-DPP-LABEL: global_atomic_fsub_double_uni_address_uni_value_agent_scope_unsafe:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s42, s9
+; GFX1032-DPP-NEXT: s_mov_b32 s50, s9
; GFX1032-DPP-NEXT: s_mov_b32 s9, exec_lo
-; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, s9, 0
-; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s66, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s67, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s64, s64, s11
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
-; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
-; GFX1032-DPP-NEXT: s_mov_b32 s46, 0
+; GFX1032-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX1032-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
+; GFX1032-DPP-NEXT: s_mov_b32 s62, 0
; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB9_3
; GFX1032-DPP-NEXT: ; %bb.1:
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24
+; GFX1032-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24
; GFX1032-DPP-NEXT: s_bcnt1_i32_b32 s0, s9
; GFX1032-DPP-NEXT: s_mov_b32 s33, s10
; GFX1032-DPP-NEXT: v_cvt_f64_u32_e32 v[3:4], s0
-; GFX1032-DPP-NEXT: s_mov_b32 s43, s8
+; GFX1032-DPP-NEXT: s_mov_b32 s51, s8
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
+; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1032-DPP-NEXT: v_mul_f64 v[41:42], v[3:4], 4.0
; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2
; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1
@@ -6816,37 +6816,37 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX1032-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
+; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX1032-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s44
+; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s52
; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX1032-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1032-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1032-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1032-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1032-DPP-NEXT: s_mov_b32 s13, s50
; GFX1032-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-DPP-NEXT: s_clause 0x1
-; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-DPP-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s46
+; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB9_2
; GFX1032-DPP-NEXT: .LBB9_3:
; GFX1032-DPP-NEXT: s_endpgm
@@ -6857,7 +6857,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], exec
; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0
; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s10, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1164-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1164-DPP-NEXT: s_mov_b32 s32, 32
; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec
; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
@@ -6866,16 +6866,16 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX1164-DPP-NEXT: s_cbranch_execz .LBB9_3
; GFX1164-DPP-NEXT: ; %bb.1:
; GFX1164-DPP-NEXT: s_bcnt1_i32_b64 s0, s[10:11]
-; GFX1164-DPP-NEXT: s_load_b64 s[44:45], s[4:5], 0x24
+; GFX1164-DPP-NEXT: s_load_b64 s[52:53], s[4:5], 0x24
; GFX1164-DPP-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
-; GFX1164-DPP-NEXT: s_mov_b32 s42, s9
-; GFX1164-DPP-NEXT: s_mov_b32 s43, s8
+; GFX1164-DPP-NEXT: s_mov_b32 s50, s9
+; GFX1164-DPP-NEXT: s_mov_b32 s51, s8
; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], 0
+; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[44:45], 0x0
+; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164-DPP-NEXT: v_mul_f64 v[41:42], v[0:1], 4.0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
@@ -6898,18 +6898,18 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX1164-DPP-NEXT: v_mov_b32_e32 v31, v40
; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s44
+; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s52
; GFX1164-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1164-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1164-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1164-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1164-DPP-NEXT: s_mov_b32 s13, s50
; GFX1164-DPP-NEXT: s_mov_b32 s14, s33
; GFX1164-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
@@ -6917,8 +6917,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[46:47]
+; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63]
; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB9_2
; GFX1164-DPP-NEXT: .LBB9_3:
; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2
@@ -6929,24 +6929,24 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX1132-DPP-NEXT: s_mov_b32 s8, exec_lo
; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0
; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s8, 0
-; GFX1132-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
-; GFX1132-DPP-NEXT: s_mov_b32 s46, 0
+; GFX1132-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
+; GFX1132-DPP-NEXT: s_mov_b32 s62, 0
; GFX1132-DPP-NEXT: s_mov_b32 s32, 32
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-DPP-NEXT: s_cbranch_execz .LBB9_3
; GFX1132-DPP-NEXT: ; %bb.1:
; GFX1132-DPP-NEXT: s_bcnt1_i32_b32 s0, s8
-; GFX1132-DPP-NEXT: s_load_b64 s[44:45], s[4:5], 0x24
+; GFX1132-DPP-NEXT: s_load_b64 s[52:53], s[4:5], 0x24
; GFX1132-DPP-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
; GFX1132-DPP-NEXT: s_mov_b32 s33, s15
-; GFX1132-DPP-NEXT: s_mov_b32 s42, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s43, s13
+; GFX1132-DPP-NEXT: s_mov_b32 s50, s14
+; GFX1132-DPP-NEXT: s_mov_b32 s51, s13
; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[44:45], 0x0
+; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-DPP-NEXT: v_mul_f64 v[41:42], v[0:1], 4.0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
@@ -6966,25 +6966,25 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX1132-DPP-NEXT: scratch_store_b64 off, v[1:2], off
; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1132-DPP-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44
+; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s52
; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1132-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1132-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1132-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1132-DPP-NEXT: s_mov_b32 s13, s50
; GFX1132-DPP-NEXT: s_mov_b32 s14, s33
; GFX1132-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0
+; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s53 :: v_dual_mov_b32 v4, 0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-DPP-NEXT: scratch_load_b64 v[1:2], off, off
; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-DPP-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46
+; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB9_2
; GFX1132-DPP-NEXT: .LBB9_3:
; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2
@@ -6997,19 +6997,19 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX7LESS-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_align4_unsafe:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
-; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
+; GFX7LESS-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s82, -1
+; GFX7LESS-NEXT: s_mov_b32 s83, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s80, s80, s11
+; GFX7LESS-NEXT: s_addc_u32 s81, s81, 0
; GFX7LESS-NEXT: s_mov_b32 s33, s10
-; GFX7LESS-NEXT: s_mov_b32 s42, s9
-; GFX7LESS-NEXT: s_mov_b32 s43, s8
+; GFX7LESS-NEXT: s_mov_b32 s50, s9
+; GFX7LESS-NEXT: s_mov_b32 s51, s8
; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0
; GFX7LESS-NEXT: s_getpc_b64 s[0:1]
@@ -7020,15 +7020,15 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-NEXT: v_or_b32_e32 v40, v0, v2
-; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-NEXT: s_mov_b32 s12, s43
-; GFX7LESS-NEXT: s_mov_b32 s13, s42
+; GFX7LESS-NEXT: s_mov_b32 s12, s51
+; GFX7LESS-NEXT: s_mov_b32 s13, s50
; GFX7LESS-NEXT: s_mov_b32 s14, s33
; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -7053,21 +7053,21 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX7LESS-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX7LESS-NEXT: s_cbranch_execz .LBB10_5
; GFX7LESS-NEXT: ; %bb.3:
-; GFX7LESS-NEXT: s_load_dwordx2 s[44:45], s[36:37], 0x9
-; GFX7LESS-NEXT: s_mov_b32 s47, 0xf000
-; GFX7LESS-NEXT: s_mov_b32 s46, -1
+; GFX7LESS-NEXT: s_load_dwordx2 s[64:65], s[36:37], 0x9
+; GFX7LESS-NEXT: s_mov_b32 s67, 0xf000
+; GFX7LESS-NEXT: s_mov_b32 s66, -1
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[44:47], 0
-; GFX7LESS-NEXT: s_mov_b64 s[46:47], 0
+; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[64:67], 0
+; GFX7LESS-NEXT: s_mov_b64 s[52:53], 0
; GFX7LESS-NEXT: .LBB10_4: ; %atomicrmw.start
; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7LESS-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-NEXT: v_add_f64 v[2:3], v[0:1], -v[41:42]
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0
+; GFX7LESS-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:4
+; GFX7LESS-NEXT: buffer_store_dword v0, off, s[80:83], 0
; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:12
-; GFX7LESS-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:8
+; GFX7LESS-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:12
+; GFX7LESS-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:8
; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0
; GFX7LESS-NEXT: s_getpc_b64 s[0:1]
; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
@@ -7080,44 +7080,44 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX7LESS-NEXT: v_mov_b32_e32 v5, 8
; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-NEXT: s_mov_b32 s12, s43
-; GFX7LESS-NEXT: s_mov_b32 s13, s42
+; GFX7LESS-NEXT: s_mov_b32 s12, s51
+; GFX7LESS-NEXT: s_mov_b32 s13, s50
; GFX7LESS-NEXT: s_mov_b32 s14, s33
; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83]
; GFX7LESS-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-NEXT: v_mov_b32_e32 v2, s44
-; GFX7LESS-NEXT: v_mov_b32_e32 v3, s45
+; GFX7LESS-NEXT: v_mov_b32_e32 v2, s64
+; GFX7LESS-NEXT: v_mov_b32_e32 v3, s65
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-NEXT: buffer_load_dword v0, off, s[48:51], 0
-; GFX7LESS-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4
+; GFX7LESS-NEXT: buffer_load_dword v0, off, s[80:83], 0
+; GFX7LESS-NEXT: buffer_load_dword v1, off, s[80:83], 0 offset:4
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX7LESS-NEXT: s_or_b64 s[52:53], vcc, s[52:53]
+; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[52:53]
; GFX7LESS-NEXT: s_cbranch_execnz .LBB10_4
; GFX7LESS-NEXT: .LBB10_5:
; GFX7LESS-NEXT: s_endpgm
;
; GFX9-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_align4_unsafe:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s11
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s66, -1
+; GFX9-NEXT: s_mov_b32 s67, 0xe00000
+; GFX9-NEXT: s_add_u32 s64, s64, s11
+; GFX9-NEXT: s_addc_u32 s65, s65, 0
; GFX9-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX9-NEXT: s_mov_b32 s43, s8
+; GFX9-NEXT: s_mov_b32 s51, s8
; GFX9-NEXT: s_add_u32 s8, s36, 44
-; GFX9-NEXT: s_mov_b32 s42, s9
+; GFX9-NEXT: s_mov_b32 s50, s9
; GFX9-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX9-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX9-NEXT: s_getpc_b64 s[0:1]
; GFX9-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
@@ -7126,17 +7126,17 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b32 s33, s10
; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7]
-; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX9-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-NEXT: s_mov_b32 s12, s43
-; GFX9-NEXT: s_mov_b32 s13, s42
+; GFX9-NEXT: s_mov_b32 s12, s51
+; GFX9-NEXT: s_mov_b32 s13, s50
; GFX9-NEXT: s_mov_b32 s14, s33
; GFX9-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX9-NEXT: s_movk_i32 s32, 0x800
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -7161,11 +7161,11 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX9-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX9-NEXT: s_cbranch_execz .LBB10_5
; GFX9-NEXT: ; %bb.3:
-; GFX9-NEXT: s_load_dwordx2 s[44:45], s[36:37], 0x24
+; GFX9-NEXT: s_load_dwordx2 s[52:53], s[36:37], 0x24
; GFX9-NEXT: v_mov_b32_e32 v0, 0
-; GFX9-NEXT: s_mov_b64 s[46:47], 0
+; GFX9-NEXT: s_mov_b64 s[62:63], 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45]
+; GFX9-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53]
; GFX9-NEXT: .LBB10_4: ; %atomicrmw.start
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-NEXT: s_waitcnt vmcnt(0)
@@ -7176,53 +7176,53 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX9-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
; GFX9-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX9-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX9-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX9-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX9-NEXT: buffer_store_dword v1, off, s[64:67], 0
+; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX9-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX9-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-NEXT: s_mov_b32 s12, s43
-; GFX9-NEXT: s_mov_b32 s13, s42
+; GFX9-NEXT: s_mov_b32 s12, s51
+; GFX9-NEXT: s_mov_b32 s13, s50
; GFX9-NEXT: s_mov_b32 s14, s33
; GFX9-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX9-NEXT: v_mov_b32_e32 v0, 8
; GFX9-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-NEXT: v_mov_b32_e32 v2, s44
-; GFX9-NEXT: v_mov_b32_e32 v3, s45
+; GFX9-NEXT: v_mov_b32_e32 v2, s52
+; GFX9-NEXT: v_mov_b32_e32 v3, s53
; GFX9-NEXT: v_mov_b32_e32 v4, 0
; GFX9-NEXT: v_mov_b32_e32 v5, 8
; GFX9-NEXT: v_mov_b32_e32 v6, 0
; GFX9-NEXT: v_mov_b32_e32 v7, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX9-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX9-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX9-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX9-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX9-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX9-NEXT: s_cbranch_execnz .LBB10_4
; GFX9-NEXT: .LBB10_5:
; GFX9-NEXT: s_endpgm
;
; GFX1064-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_align4_unsafe:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s50, -1
-; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s66, -1
+; GFX1064-NEXT: s_mov_b32 s67, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s64, s64, s11
; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-NEXT: s_mov_b32 s43, s8
+; GFX1064-NEXT: s_addc_u32 s65, s65, 0
+; GFX1064-NEXT: s_mov_b32 s51, s8
; GFX1064-NEXT: s_add_u32 s8, s34, 44
-; GFX1064-NEXT: s_mov_b32 s42, s9
+; GFX1064-NEXT: s_mov_b32 s50, s9
; GFX1064-NEXT: s_addc_u32 s9, s35, 0
-; GFX1064-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1064-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1064-NEXT: s_getpc_b64 s[0:1]
; GFX1064-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
; GFX1064-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
@@ -7231,17 +7231,17 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1064-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1064-NEXT: s_mov_b32 s33, s10
; GFX1064-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-NEXT: s_mov_b32 s12, s43
+; GFX1064-NEXT: s_mov_b32 s12, s51
; GFX1064-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-NEXT: s_mov_b32 s13, s42
+; GFX1064-NEXT: s_mov_b32 s13, s50
; GFX1064-NEXT: s_mov_b32 s14, s33
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1064-NEXT: s_movk_i32 s32, 0x800
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -7266,11 +7266,11 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1064-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX1064-NEXT: s_cbranch_execz .LBB10_5
; GFX1064-NEXT: ; %bb.3:
-; GFX1064-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24
+; GFX1064-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24
; GFX1064-NEXT: v_mov_b32_e32 v0, 0
-; GFX1064-NEXT: s_mov_b64 s[46:47], 0
+; GFX1064-NEXT: s_mov_b64 s[62:63], 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45]
+; GFX1064-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53]
; GFX1064-NEXT: .LBB10_4: ; %atomicrmw.start
; GFX1064-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1064-NEXT: s_waitcnt vmcnt(0)
@@ -7280,55 +7280,55 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1064-NEXT: s_getpc_b64 s[0:1]
; GFX1064-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1064-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-NEXT: buffer_store_dword v1, off, s[48:51], 0
+; GFX1064-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX1064-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX1064-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1064-NEXT: v_mov_b32_e32 v31, v40
; GFX1064-NEXT: v_mov_b32_e32 v0, 8
; GFX1064-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-NEXT: v_mov_b32_e32 v2, s44
+; GFX1064-NEXT: v_mov_b32_e32 v2, s52
; GFX1064-NEXT: v_mov_b32_e32 v5, 8
; GFX1064-NEXT: v_mov_b32_e32 v6, 0
; GFX1064-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-NEXT: s_mov_b32 s12, s43
-; GFX1064-NEXT: s_mov_b32 s13, s42
+; GFX1064-NEXT: s_mov_b32 s12, s51
+; GFX1064-NEXT: s_mov_b32 s13, s50
; GFX1064-NEXT: s_mov_b32 s14, s33
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-NEXT: v_mov_b32_e32 v3, s45
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX1064-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX1064-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX1064-NEXT: v_mov_b32_e32 v3, s53
; GFX1064-NEXT: v_mov_b32_e32 v4, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-NEXT: s_clause 0x1
-; GFX1064-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX1064-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX1064-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1064-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX1064-NEXT: s_cbranch_execnz .LBB10_4
; GFX1064-NEXT: .LBB10_5:
; GFX1064-NEXT: s_endpgm
;
; GFX1032-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_align4_unsafe:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s50, -1
-; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s66, -1
+; GFX1032-NEXT: s_mov_b32 s67, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s64, s64, s11
; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-NEXT: s_mov_b32 s43, s8
+; GFX1032-NEXT: s_addc_u32 s65, s65, 0
+; GFX1032-NEXT: s_mov_b32 s51, s8
; GFX1032-NEXT: s_add_u32 s8, s34, 44
-; GFX1032-NEXT: s_mov_b32 s42, s9
+; GFX1032-NEXT: s_mov_b32 s50, s9
; GFX1032-NEXT: s_addc_u32 s9, s35, 0
-; GFX1032-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1032-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1032-NEXT: s_getpc_b64 s[0:1]
; GFX1032-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
; GFX1032-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
@@ -7337,17 +7337,17 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1032-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1032-NEXT: s_mov_b32 s33, s10
; GFX1032-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-NEXT: s_mov_b32 s12, s43
+; GFX1032-NEXT: s_mov_b32 s12, s51
; GFX1032-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-NEXT: s_mov_b32 s13, s42
+; GFX1032-NEXT: s_mov_b32 s13, s50
; GFX1032-NEXT: s_mov_b32 s14, s33
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1032-NEXT: s_movk_i32 s32, 0x400
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -7366,16 +7366,16 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1032-NEXT: s_cbranch_scc1 .LBB10_1
; GFX1032-NEXT: ; %bb.2: ; %ComputeEnd
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1032-NEXT: s_mov_b32 s46, 0
+; GFX1032-NEXT: s_mov_b32 s62, 0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-NEXT: s_xor_b32 s0, exec_lo, s0
; GFX1032-NEXT: s_cbranch_execz .LBB10_5
; GFX1032-NEXT: ; %bb.3:
-; GFX1032-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24
+; GFX1032-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24
; GFX1032-NEXT: v_mov_b32_e32 v0, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45]
+; GFX1032-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53]
; GFX1032-NEXT: .LBB10_4: ; %atomicrmw.start
; GFX1032-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1032-NEXT: s_waitcnt vmcnt(0)
@@ -7385,37 +7385,37 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1032-NEXT: s_getpc_b64 s[0:1]
; GFX1032-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1032-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-NEXT: buffer_store_dword v1, off, s[48:51], 0
+; GFX1032-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX1032-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX1032-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1032-NEXT: v_mov_b32_e32 v31, v40
; GFX1032-NEXT: v_mov_b32_e32 v0, 8
; GFX1032-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-NEXT: v_mov_b32_e32 v2, s44
+; GFX1032-NEXT: v_mov_b32_e32 v2, s52
; GFX1032-NEXT: v_mov_b32_e32 v5, 8
; GFX1032-NEXT: v_mov_b32_e32 v6, 0
; GFX1032-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-NEXT: s_mov_b32 s12, s43
-; GFX1032-NEXT: s_mov_b32 s13, s42
+; GFX1032-NEXT: s_mov_b32 s12, s51
+; GFX1032-NEXT: s_mov_b32 s13, s50
; GFX1032-NEXT: s_mov_b32 s14, s33
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1032-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1032-NEXT: v_mov_b32_e32 v3, s45
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX1032-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX1032-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX1032-NEXT: v_mov_b32_e32 v3, s53
; GFX1032-NEXT: v_mov_b32_e32 v4, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-NEXT: s_clause 0x1
-; GFX1032-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1032-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX1032-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX1032-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s46
+; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
; GFX1032-NEXT: s_cbranch_execnz .LBB10_4
; GFX1032-NEXT: .LBB10_5:
; GFX1032-NEXT: s_endpgm
@@ -7423,11 +7423,11 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1164-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_align4_unsafe:
; GFX1164: ; %bb.0:
; GFX1164-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1164-NEXT: s_mov_b32 s43, s8
+; GFX1164-NEXT: s_mov_b32 s51, s8
; GFX1164-NEXT: s_add_u32 s8, s34, 44
-; GFX1164-NEXT: s_mov_b32 s42, s9
+; GFX1164-NEXT: s_mov_b32 s50, s9
; GFX1164-NEXT: s_addc_u32 s9, s35, 0
-; GFX1164-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1164-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1164-NEXT: s_getpc_b64 s[0:1]
; GFX1164-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
; GFX1164-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
@@ -7435,15 +7435,15 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1164-NEXT: s_mov_b32 s33, s10
; GFX1164-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1164-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1164-NEXT: s_mov_b32 s12, s43
-; GFX1164-NEXT: s_mov_b32 s13, s42
+; GFX1164-NEXT: s_mov_b32 s12, s51
+; GFX1164-NEXT: s_mov_b32 s13, s50
; GFX1164-NEXT: s_mov_b32 s14, s33
; GFX1164-NEXT: s_mov_b32 s32, 32
; GFX1164-NEXT: v_mov_b32_e32 v40, v0
-; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1164-NEXT: v_mov_b32_e32 v41, 0
@@ -7471,11 +7471,11 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1164-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX1164-NEXT: s_cbranch_execz .LBB10_5
; GFX1164-NEXT: ; %bb.3:
-; GFX1164-NEXT: s_load_b64 s[44:45], s[34:35], 0x24
+; GFX1164-NEXT: s_load_b64 s[52:53], s[34:35], 0x24
; GFX1164-NEXT: v_mov_b32_e32 v0, 0
-; GFX1164-NEXT: s_mov_b64 s[46:47], 0
+; GFX1164-NEXT: s_mov_b64 s[62:63], 0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: global_load_b64 v[1:2], v0, s[44:45]
+; GFX1164-NEXT: global_load_b64 v[1:2], v0, s[52:53]
; GFX1164-NEXT: s_set_inst_prefetch_distance 0x1
; GFX1164-NEXT: .p2align 6
; GFX1164-NEXT: .LBB10_4: ; %atomicrmw.start
@@ -7492,18 +7492,18 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1164-NEXT: v_mov_b32_e32 v31, v40
; GFX1164-NEXT: v_mov_b32_e32 v0, 8
; GFX1164-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-NEXT: v_mov_b32_e32 v2, s44
+; GFX1164-NEXT: v_mov_b32_e32 v2, s52
; GFX1164-NEXT: v_mov_b32_e32 v5, 8
; GFX1164-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1164-NEXT: s_mov_b32 s12, s43
-; GFX1164-NEXT: s_mov_b32 s13, s42
+; GFX1164-NEXT: s_mov_b32 s12, s51
+; GFX1164-NEXT: s_mov_b32 s13, s50
; GFX1164-NEXT: s_mov_b32 s14, s33
; GFX1164-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1164-NEXT: v_mov_b32_e32 v3, s45
+; GFX1164-NEXT: v_mov_b32_e32 v3, s53
; GFX1164-NEXT: v_mov_b32_e32 v4, 0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1]
@@ -7511,8 +7511,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[46:47]
+; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63]
; GFX1164-NEXT: s_cbranch_execnz .LBB10_4
; GFX1164-NEXT: .LBB10_5:
; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2
@@ -7521,7 +7521,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1132-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_align4_unsafe:
; GFX1132: ; %bb.0:
; GFX1132-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1132-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1132-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1132-NEXT: s_add_u32 s8, s34, 44
; GFX1132-NEXT: s_addc_u32 s9, s35, 0
; GFX1132-NEXT: s_getpc_b64 s[0:1]
@@ -7530,9 +7530,9 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1132-NEXT: v_mov_b32_e32 v31, v0
; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1132-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1132-NEXT: s_mov_b32 s42, s14
-; GFX1132-NEXT: s_mov_b32 s43, s13
-; GFX1132-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1132-NEXT: s_mov_b32 s50, s14
+; GFX1132-NEXT: s_mov_b32 s51, s13
+; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1132-NEXT: s_mov_b32 s12, s13
@@ -7541,7 +7541,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1132-NEXT: s_mov_b32 s32, 32
; GFX1132-NEXT: s_mov_b32 s33, s15
; GFX1132-NEXT: v_mov_b32_e32 v40, v0
-; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-NEXT: v_mov_b32_e32 v41, 0
@@ -7561,17 +7561,17 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1132-NEXT: s_cbranch_scc1 .LBB10_1
; GFX1132-NEXT: ; %bb.2: ; %ComputeEnd
; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1132-NEXT: s_mov_b32 s46, 0
+; GFX1132-NEXT: s_mov_b32 s62, 0
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-NEXT: s_xor_b32 s0, exec_lo, s0
; GFX1132-NEXT: s_cbranch_execz .LBB10_5
; GFX1132-NEXT: ; %bb.3:
-; GFX1132-NEXT: s_load_b64 s[44:45], s[34:35], 0x24
+; GFX1132-NEXT: s_load_b64 s[52:53], s[34:35], 0x24
; GFX1132-NEXT: v_mov_b32_e32 v0, 0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: global_load_b64 v[1:2], v0, s[44:45]
+; GFX1132-NEXT: global_load_b64 v[1:2], v0, s[52:53]
; GFX1132-NEXT: s_set_inst_prefetch_distance 0x1
; GFX1132-NEXT: .p2align 6
; GFX1132-NEXT: .LBB10_4: ; %atomicrmw.start
@@ -7586,25 +7586,25 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1132-NEXT: scratch_store_b64 off, v[1:2], off
; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1132-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8
-; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44
+; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s52
; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
; GFX1132-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1132-NEXT: s_mov_b32 s12, s43
-; GFX1132-NEXT: s_mov_b32 s13, s42
+; GFX1132-NEXT: s_mov_b32 s12, s51
+; GFX1132-NEXT: s_mov_b32 s13, s50
; GFX1132-NEXT: s_mov_b32 s14, s33
; GFX1132-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1132-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0
+; GFX1132-NEXT: v_dual_mov_b32 v3, s53 :: v_dual_mov_b32 v4, 0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-NEXT: scratch_load_b64 v[1:2], off, off
; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46
+; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
; GFX1132-NEXT: s_cbranch_execnz .LBB10_4
; GFX1132-NEXT: .LBB10_5:
; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2
@@ -7613,22 +7613,22 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX7LESS-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_align4_unsafe:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s82, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s83, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s80, s80, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s81, s81, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s10
-; GFX7LESS-DPP-NEXT: s_mov_b32 s42, s9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s43, s8
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, s9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s47, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s46, -1
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[64:65], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
@@ -7639,30 +7639,30 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v42, v0, v2
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s43
-; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s42
+; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51
+; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v42
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v40, v0
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v41, v1
-; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[0:1], off, s[44:47], 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[0:1], off, s[64:67], 0
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[52:53], 0
; GFX7LESS-DPP-NEXT: .LBB10_1: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_add_f64 v[2:3], v[0:1], -v[40:41]
-; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0
+; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:4
+; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[80:83], 0
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:8
+; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:12
+; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:8
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
@@ -7675,44 +7675,44 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s43
-; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s42
+; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51
+; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v42
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s44
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s64
+; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s65
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-DPP-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[48:51], 0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4
+; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[80:83], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[80:83], 0 offset:4
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX7LESS-DPP-NEXT: s_or_b64 s[52:53], vcc, s[52:53]
+; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[52:53]
; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB10_1
; GFX7LESS-DPP-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX7LESS-DPP-NEXT: s_endpgm
;
; GFX9-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_align4_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s54, -1
-; GFX9-DPP-NEXT: s_mov_b32 s55, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s52, s52, s11
-; GFX9-DPP-NEXT: s_addc_u32 s53, s53, 0
+; GFX9-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s82, -1
+; GFX9-DPP-NEXT: s_mov_b32 s83, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s80, s80, s11
+; GFX9-DPP-NEXT: s_addc_u32 s81, s81, 0
; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX9-DPP-NEXT: s_mov_b32 s43, s8
+; GFX9-DPP-NEXT: s_mov_b32 s51, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX9-DPP-NEXT: s_mov_b32 s42, s9
+; GFX9-DPP-NEXT: s_mov_b32 s50, s9
; GFX9-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX9-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX9-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX9-DPP-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
; GFX9-DPP-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
@@ -7721,17 +7721,17 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b32 s33, s10
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
-; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[52:53]
-; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
+; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-DPP-NEXT: s_mov_b32 s12, s43
-; GFX9-DPP-NEXT: s_mov_b32 s13, s42
+; GFX9-DPP-NEXT: s_mov_b32 s12, s51
+; GFX9-DPP-NEXT: s_mov_b32 s13, s50
; GFX9-DPP-NEXT: s_mov_b32 s14, s33
; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[54:55]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -7778,74 +7778,74 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v1, exec_hi, v1
; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX9-DPP-NEXT: v_readlane_b32 s45, v9, 63
-; GFX9-DPP-NEXT: v_readlane_b32 s44, v8, 63
+; GFX9-DPP-NEXT: v_readlane_b32 s53, v9, 63
+; GFX9-DPP-NEXT: v_readlane_b32 s52, v8, 63
; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1]
; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9-DPP-NEXT: s_cbranch_execz .LBB10_3
; GFX9-DPP-NEXT: ; %bb.1:
-; GFX9-DPP-NEXT: s_load_dwordx2 s[46:47], s[36:37], 0x24
-; GFX9-DPP-NEXT: s_mov_b64 s[48:49], 0
+; GFX9-DPP-NEXT: s_load_dwordx2 s[62:63], s[36:37], 0x24
+; GFX9-DPP-NEXT: s_mov_b64 s[64:65], 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[46:47]
+; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[62:63]
; GFX9-DPP-NEXT: .LBB10_2: ; %atomicrmw.start
; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX9-DPP-NEXT: v_add_f64 v[3:4], v[1:2], -s[44:45]
+; GFX9-DPP-NEXT: v_add_f64 v[3:4], v[1:2], -s[52:53]
; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44
; GFX9-DPP-NEXT: s_addc_u32 s9, s37, 0
; GFX9-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[52:53]
-; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[52:55], 0 offset:4
-; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[52:55], 0
-; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[52:55], 0 offset:12
-; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[52:55], 0 offset:8
-; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
+; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:4
+; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0
+; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[80:83], 0 offset:12
+; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:8
+; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-DPP-NEXT: s_mov_b32 s12, s43
-; GFX9-DPP-NEXT: s_mov_b32 s13, s42
+; GFX9-DPP-NEXT: s_mov_b32 s12, s51
+; GFX9-DPP-NEXT: s_mov_b32 s13, s50
; GFX9-DPP-NEXT: s_mov_b32 s14, s33
; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[54:55]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s46
-; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s47
+; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s62
+; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s63
; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[52:55], 0
-; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[52:55], 0 offset:4
+; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[80:83], 0
+; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[80:83], 0 offset:4
; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-DPP-NEXT: s_or_b64 s[48:49], vcc, s[48:49]
-; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[48:49]
+; GFX9-DPP-NEXT: s_or_b64 s[64:65], vcc, s[64:65]
+; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[64:65]
; GFX9-DPP-NEXT: s_cbranch_execnz .LBB10_2
; GFX9-DPP-NEXT: .LBB10_3:
; GFX9-DPP-NEXT: s_endpgm
;
; GFX1064-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_align4_unsafe:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s66, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s67, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s64, s64, s11
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-DPP-NEXT: s_mov_b32 s43, s8
+; GFX1064-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX1064-DPP-NEXT: s_mov_b32 s51, s8
; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1064-DPP-NEXT: s_mov_b32 s42, s9
+; GFX1064-DPP-NEXT: s_mov_b32 s50, s9
; GFX1064-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1064-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX1064-DPP-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
@@ -7854,17 +7854,17 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1064-DPP-NEXT: s_mov_b32 s33, s10
; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-DPP-NEXT: s_mov_b32 s12, s43
+; GFX1064-DPP-NEXT: s_mov_b32 s12, s51
; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1064-DPP-NEXT: s_mov_b32 s13, s50
; GFX1064-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1064-DPP-NEXT: s_movk_i32 s32, 0x800
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -7909,10 +7909,10 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064-DPP-NEXT: s_cbranch_execz .LBB10_3
; GFX1064-DPP-NEXT: ; %bb.1:
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24
-; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], 0
+; GFX1064-DPP-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24
+; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45]
+; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53]
; GFX1064-DPP-NEXT: .LBB10_2: ; %atomicrmw.start
; GFX1064-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1064-DPP-NEXT: s_waitcnt vmcnt(0)
@@ -7922,55 +7922,55 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX1064-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
+; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX1064-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40
; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s44
+; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s52
; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX1064-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1064-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1064-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1064-DPP-NEXT: s_mov_b32 s13, s50
; GFX1064-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX1064-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-DPP-NEXT: s_clause 0x1
-; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB10_2
; GFX1064-DPP-NEXT: .LBB10_3:
; GFX1064-DPP-NEXT: s_endpgm
;
; GFX1032-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_align4_unsafe:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s66, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s67, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s64, s64, s11
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-DPP-NEXT: s_mov_b32 s43, s8
+; GFX1032-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX1032-DPP-NEXT: s_mov_b32 s51, s8
; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1032-DPP-NEXT: s_mov_b32 s42, s9
+; GFX1032-DPP-NEXT: s_mov_b32 s50, s9
; GFX1032-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1032-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX1032-DPP-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
@@ -7979,17 +7979,17 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1032-DPP-NEXT: s_mov_b32 s33, s10
; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-DPP-NEXT: s_mov_b32 s12, s43
+; GFX1032-DPP-NEXT: s_mov_b32 s12, s51
; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1032-DPP-NEXT: s_mov_b32 s13, s50
; GFX1032-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -8024,14 +8024,14 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1032-DPP-NEXT: v_mov_b32_e32 v41, v8
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v42, v9
-; GFX1032-DPP-NEXT: s_mov_b32 s46, 0
+; GFX1032-DPP-NEXT: s_mov_b32 s62, 0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB10_3
; GFX1032-DPP-NEXT: ; %bb.1:
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24
+; GFX1032-DPP-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45]
+; GFX1032-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53]
; GFX1032-DPP-NEXT: .LBB10_2: ; %atomicrmw.start
; GFX1032-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1032-DPP-NEXT: s_waitcnt vmcnt(0)
@@ -8041,37 +8041,37 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX1032-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
+; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX1032-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s44
+; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s52
; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX1032-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1032-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1032-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1032-DPP-NEXT: s_mov_b32 s13, s50
; GFX1032-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-DPP-NEXT: s_clause 0x1
-; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-DPP-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s46
+; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB10_2
; GFX1032-DPP-NEXT: .LBB10_3:
; GFX1032-DPP-NEXT: s_endpgm
@@ -8079,11 +8079,11 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1164-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_align4_unsafe:
; GFX1164-DPP: ; %bb.0:
; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1164-DPP-NEXT: s_mov_b32 s43, s8
+; GFX1164-DPP-NEXT: s_mov_b32 s51, s8
; GFX1164-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1164-DPP-NEXT: s_mov_b32 s42, s9
+; GFX1164-DPP-NEXT: s_mov_b32 s50, s9
; GFX1164-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1164-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1164-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX1164-DPP-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
; GFX1164-DPP-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
@@ -8091,15 +8091,15 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1164-DPP-NEXT: s_mov_b32 s33, s10
; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1164-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1164-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1164-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1164-DPP-NEXT: s_mov_b32 s13, s50
; GFX1164-DPP-NEXT: s_mov_b32 s14, s33
; GFX1164-DPP-NEXT: s_mov_b32 s32, 32
; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0
-; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -8153,10 +8153,10 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1
; GFX1164-DPP-NEXT: s_cbranch_execz .LBB10_3
; GFX1164-DPP-NEXT: ; %bb.1:
-; GFX1164-DPP-NEXT: s_load_b64 s[44:45], s[34:35], 0x24
-; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], 0
+; GFX1164-DPP-NEXT: s_load_b64 s[52:53], s[34:35], 0x24
+; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[44:45]
+; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[52:53]
; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x1
; GFX1164-DPP-NEXT: .p2align 6
; GFX1164-DPP-NEXT: .LBB10_2: ; %atomicrmw.start
@@ -8173,18 +8173,18 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1164-DPP-NEXT: v_mov_b32_e32 v31, v40
; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s44
+; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s52
; GFX1164-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1164-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1164-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1164-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1164-DPP-NEXT: s_mov_b32 s13, s50
; GFX1164-DPP-NEXT: s_mov_b32 s14, s33
; GFX1164-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
@@ -8192,8 +8192,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[46:47]
+; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63]
; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB10_2
; GFX1164-DPP-NEXT: .LBB10_3:
; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2
@@ -8202,7 +8202,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1132-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_align4_unsafe:
; GFX1132-DPP: ; %bb.0:
; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1132-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1132-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1132-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1132-DPP-NEXT: s_addc_u32 s9, s35, 0
; GFX1132-DPP-NEXT: s_getpc_b64 s[0:1]
@@ -8211,9 +8211,9 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1132-DPP-NEXT: v_mov_b32_e32 v31, v0
; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1132-DPP-NEXT: s_mov_b32 s42, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s43, s13
-; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1132-DPP-NEXT: s_mov_b32 s50, s14
+; GFX1132-DPP-NEXT: s_mov_b32 s51, s13
+; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
@@ -8222,7 +8222,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1132-DPP-NEXT: s_mov_b32 s32, 32
; GFX1132-DPP-NEXT: s_mov_b32 s33, s15
; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0
-; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
@@ -8264,14 +8264,14 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX1132-DPP-NEXT: v_mov_b32_e32 v42, v9
-; GFX1132-DPP-NEXT: s_mov_b32 s46, 0
+; GFX1132-DPP-NEXT: s_mov_b32 s62, 0
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1
; GFX1132-DPP-NEXT: s_cbranch_execz .LBB10_3
; GFX1132-DPP-NEXT: ; %bb.1:
-; GFX1132-DPP-NEXT: s_load_b64 s[44:45], s[34:35], 0x24
+; GFX1132-DPP-NEXT: s_load_b64 s[52:53], s[34:35], 0x24
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[44:45]
+; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[52:53]
; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x1
; GFX1132-DPP-NEXT: .p2align 6
; GFX1132-DPP-NEXT: .LBB10_2: ; %atomicrmw.start
@@ -8286,25 +8286,25 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1132-DPP-NEXT: scratch_store_b64 off, v[1:2], off
; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1132-DPP-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44
+; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s52
; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1132-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1132-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1132-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1132-DPP-NEXT: s_mov_b32 s13, s50
; GFX1132-DPP-NEXT: s_mov_b32 s14, s33
; GFX1132-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0
+; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s53 :: v_dual_mov_b32 v4, 0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-DPP-NEXT: scratch_load_b64 v[1:2], off, off
; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-DPP-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46
+; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB10_2
; GFX1132-DPP-NEXT: .LBB10_3:
; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2
@@ -8835,12 +8835,12 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
; GFX7LESS-LABEL: global_atomic_fsub_double_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s38, -1
-; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s36, s36, s11
-; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0
+; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
+; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
; GFX7LESS-NEXT: s_mov_b32 s14, s10
; GFX7LESS-NEXT: s_mov_b32 s13, s9
; GFX7LESS-NEXT: s_mov_b32 s12, s8
@@ -8858,8 +8858,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -8912,12 +8912,12 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
;
; GFX9-LABEL: global_atomic_fsub_double_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s11
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s11
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-NEXT: s_mov_b32 s12, s8
; GFX9-NEXT: s_add_u32 s8, s34, 44
@@ -8933,9 +8933,9 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -8982,13 +8982,13 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
;
; GFX1064-LABEL: global_atomic_fsub_double_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s38, -1
-; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s36, s36, s11
+; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s50, -1
+; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s48, s48, s11
; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-NEXT: s_addc_u32 s49, s49, 0
; GFX1064-NEXT: s_mov_b32 s12, s8
; GFX1064-NEXT: s_add_u32 s8, s34, 44
; GFX1064-NEXT: s_mov_b32 s13, s9
@@ -9004,8 +9004,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1064-NEXT: s_mov_b32 s32, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -9052,13 +9052,13 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
;
; GFX1032-LABEL: global_atomic_fsub_double_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s38, -1
-; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s36, s36, s11
+; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s50, -1
+; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s48, s48, s11
; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-NEXT: s_mov_b32 s12, s8
; GFX1032-NEXT: s_add_u32 s8, s34, 44
; GFX1032-NEXT: s_mov_b32 s13, s9
@@ -9074,8 +9074,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1032-NEXT: s_mov_b32 s32, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -9249,19 +9249,19 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
; GFX7LESS-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s42, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s43, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s40, s40, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s41, s41, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5]
@@ -9274,11 +9274,11 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[40:41]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[42:43]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[4:5], off, s[36:39], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[4:5], off, s[48:51], 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0
; GFX7LESS-DPP-NEXT: .LBB12_1: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -9289,7 +9289,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v8, v4
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, v3
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, v2
-; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[36:39], 0 glc
+; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[48:51], 0 glc
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_cmp_eq_u64_e32 vcc, v[6:7], v[4:5]
; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
@@ -9302,12 +9302,12 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
;
; GFX9-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s38, -1
-; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11
-; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s50, -1
+; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-DPP-NEXT: s_mov_b32 s12, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44
@@ -9323,9 +9323,9 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -9401,13 +9401,13 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
;
; GFX1064-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s38, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX1064-DPP-NEXT: s_mov_b32 s12, s8
; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1064-DPP-NEXT: s_mov_b32 s13, s9
@@ -9423,8 +9423,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -9490,13 +9490,13 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
;
; GFX1032-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s38, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-DPP-NEXT: s_mov_b32 s12, s8
; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1032-DPP-NEXT: s_mov_b32 s13, s9
@@ -9512,8 +9512,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -10268,12 +10268,12 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX7LESS-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s38, -1
-; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s36, s36, s11
-; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0
+; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
+; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
; GFX7LESS-NEXT: s_mov_b32 s14, s10
; GFX7LESS-NEXT: s_mov_b32 s13, s9
; GFX7LESS-NEXT: s_mov_b32 s12, s8
@@ -10291,8 +10291,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -10345,12 +10345,12 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
;
; GFX9-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s11
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s11
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-NEXT: s_mov_b32 s12, s8
; GFX9-NEXT: s_add_u32 s8, s34, 44
@@ -10366,9 +10366,9 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -10415,13 +10415,13 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
;
; GFX1064-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s38, -1
-; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s36, s36, s11
+; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s50, -1
+; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s48, s48, s11
; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-NEXT: s_addc_u32 s49, s49, 0
; GFX1064-NEXT: s_mov_b32 s12, s8
; GFX1064-NEXT: s_add_u32 s8, s34, 44
; GFX1064-NEXT: s_mov_b32 s13, s9
@@ -10437,8 +10437,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1064-NEXT: s_mov_b32 s32, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -10485,13 +10485,13 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
;
; GFX1032-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s38, -1
-; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s36, s36, s11
+; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s50, -1
+; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s48, s48, s11
; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-NEXT: s_mov_b32 s12, s8
; GFX1032-NEXT: s_add_u32 s8, s34, 44
; GFX1032-NEXT: s_mov_b32 s13, s9
@@ -10507,8 +10507,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1032-NEXT: s_mov_b32 s32, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -10682,19 +10682,19 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX7LESS-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s42, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s43, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s40, s40, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s41, s41, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5]
@@ -10707,11 +10707,11 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[40:41]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[42:43]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[4:5], off, s[36:39], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[4:5], off, s[48:51], 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0
; GFX7LESS-DPP-NEXT: .LBB14_1: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -10722,7 +10722,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v8, v4
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, v3
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, v2
-; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[36:39], 0 glc
+; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[48:51], 0 glc
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_cmp_eq_u64_e32 vcc, v[6:7], v[4:5]
; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
@@ -10735,12 +10735,12 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
;
; GFX9-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s38, -1
-; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11
-; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s50, -1
+; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-DPP-NEXT: s_mov_b32 s12, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44
@@ -10756,9 +10756,9 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -10834,13 +10834,13 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
;
; GFX1064-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s38, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX1064-DPP-NEXT: s_mov_b32 s12, s8
; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1064-DPP-NEXT: s_mov_b32 s13, s9
@@ -10856,8 +10856,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -10923,13 +10923,13 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
;
; GFX1032-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s38, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-DPP-NEXT: s_mov_b32 s12, s8
; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1032-DPP-NEXT: s_mov_b32 s13, s9
@@ -10945,8 +10945,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -11183,12 +11183,12 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX7LESS-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s38, -1
-; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s36, s36, s11
-; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0
+; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
+; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
; GFX7LESS-NEXT: s_mov_b32 s14, s10
; GFX7LESS-NEXT: s_mov_b32 s13, s9
; GFX7LESS-NEXT: s_mov_b32 s12, s8
@@ -11206,8 +11206,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -11260,12 +11260,12 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
;
; GFX9-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s11
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s11
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-NEXT: s_mov_b32 s12, s8
; GFX9-NEXT: s_add_u32 s8, s34, 44
@@ -11281,9 +11281,9 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -11330,13 +11330,13 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
;
; GFX1064-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s38, -1
-; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s36, s36, s11
+; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s50, -1
+; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s48, s48, s11
; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-NEXT: s_addc_u32 s49, s49, 0
; GFX1064-NEXT: s_mov_b32 s12, s8
; GFX1064-NEXT: s_add_u32 s8, s34, 44
; GFX1064-NEXT: s_mov_b32 s13, s9
@@ -11352,8 +11352,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1064-NEXT: s_mov_b32 s32, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -11400,13 +11400,13 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
;
; GFX1032-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s38, -1
-; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s36, s36, s11
+; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s50, -1
+; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s48, s48, s11
; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-NEXT: s_mov_b32 s12, s8
; GFX1032-NEXT: s_add_u32 s8, s34, 44
; GFX1032-NEXT: s_mov_b32 s13, s9
@@ -11422,8 +11422,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1032-NEXT: s_mov_b32 s32, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -11597,19 +11597,19 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX7LESS-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s42, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s43, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s40, s40, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s41, s41, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5]
@@ -11622,11 +11622,11 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[40:41]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[42:43]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[4:5], off, s[36:39], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[4:5], off, s[48:51], 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0
; GFX7LESS-DPP-NEXT: .LBB15_1: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -11637,7 +11637,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v8, v4
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, v3
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, v2
-; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[36:39], 0 glc
+; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[48:51], 0 glc
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_cmp_eq_u64_e32 vcc, v[6:7], v[4:5]
; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
@@ -11650,12 +11650,12 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
;
; GFX9-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s38, -1
-; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11
-; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s50, -1
+; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-DPP-NEXT: s_mov_b32 s12, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44
@@ -11671,9 +11671,9 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -11749,13 +11749,13 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
;
; GFX1064-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s38, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX1064-DPP-NEXT: s_mov_b32 s12, s8
; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1064-DPP-NEXT: s_mov_b32 s13, s9
@@ -11771,8 +11771,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -11838,13 +11838,13 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
;
; GFX1032-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s38, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0
+; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX1032-DPP-NEXT: s_mov_b32 s12, s8
; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1032-DPP-NEXT: s_mov_b32 s13, s9
@@ -11860,8 +11860,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -12097,13 +12097,13 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX7LESS-LABEL: global_atomic_fsub_double_uni_address_uni_value_default_scope_strictfp:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
-; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
-; GFX7LESS-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX7LESS-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-NEXT: s_mov_b32 s67, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s64, s64, s11
+; GFX7LESS-NEXT: s_addc_u32 s65, s65, 0
+; GFX7LESS-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v5, exec_lo, 0
; GFX7LESS-NEXT: s_bcnt1_i32_b64 s0, exec
; GFX7LESS-NEXT: s_mov_b32 s1, 0x43300000
@@ -12117,15 +12117,15 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX7LESS-NEXT: s_cbranch_execz .LBB16_3
; GFX7LESS-NEXT: ; %bb.1:
; GFX7LESS-NEXT: s_mov_b32 s33, s10
-; GFX7LESS-NEXT: s_mov_b32 s42, s9
-; GFX7LESS-NEXT: s_mov_b32 s43, s8
+; GFX7LESS-NEXT: s_mov_b32 s50, s9
+; GFX7LESS-NEXT: s_mov_b32 s51, s8
; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX7LESS-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x9
+; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
-; GFX7LESS-NEXT: s_mov_b64 s[46:47], 0
+; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
+; GFX7LESS-NEXT: s_mov_b64 s[62:63], 0
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v3, v0, v1
@@ -12137,11 +12137,11 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7LESS-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-NEXT: v_add_f64 v[2:3], v[0:1], -v[41:42]
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0
+; GFX7LESS-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:4
+; GFX7LESS-NEXT: buffer_store_dword v0, off, s[64:67], 0
; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:12
-; GFX7LESS-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:8
+; GFX7LESS-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:12
+; GFX7LESS-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:8
; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0
; GFX7LESS-NEXT: s_getpc_b64 s[0:1]
; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
@@ -12154,40 +12154,40 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX7LESS-NEXT: v_mov_b32_e32 v5, 8
; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-NEXT: s_mov_b32 s12, s43
-; GFX7LESS-NEXT: s_mov_b32 s13, s42
+; GFX7LESS-NEXT: s_mov_b32 s12, s51
+; GFX7LESS-NEXT: s_mov_b32 s13, s50
; GFX7LESS-NEXT: s_mov_b32 s14, s33
; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX7LESS-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-NEXT: v_mov_b32_e32 v2, s44
-; GFX7LESS-NEXT: v_mov_b32_e32 v3, s45
+; GFX7LESS-NEXT: v_mov_b32_e32 v2, s52
+; GFX7LESS-NEXT: v_mov_b32_e32 v3, s53
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-NEXT: buffer_load_dword v0, off, s[48:51], 0
-; GFX7LESS-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4
+; GFX7LESS-NEXT: buffer_load_dword v0, off, s[64:67], 0
+; GFX7LESS-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX7LESS-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX7LESS-NEXT: s_cbranch_execnz .LBB16_2
; GFX7LESS-NEXT: .LBB16_3:
; GFX7LESS-NEXT: s_endpgm
;
; GFX9-LABEL: global_atomic_fsub_double_uni_address_uni_value_default_scope_strictfp:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s11
+; GFX9-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s66, -1
+; GFX9-NEXT: s_mov_b32 s67, 0xe00000
+; GFX9-NEXT: s_add_u32 s64, s64, s11
; GFX9-NEXT: v_mov_b32_e32 v3, 0
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
-; GFX9-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX9-NEXT: s_addc_u32 s65, s65, 0
+; GFX9-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX9-NEXT: s_bcnt1_i32_b64 s0, exec
; GFX9-NEXT: v_mov_b32_e32 v4, 0xc3300000
; GFX9-NEXT: s_mov_b32 s1, 0x43300000
@@ -12200,20 +12200,20 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX9-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9-NEXT: s_cbranch_execz .LBB16_3
; GFX9-NEXT: ; %bb.1:
-; GFX9-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24
+; GFX9-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24
; GFX9-NEXT: v_lshlrev_b32_e32 v3, 20, v2
; GFX9-NEXT: v_lshlrev_b32_e32 v4, 10, v1
; GFX9-NEXT: s_mov_b32 s33, s10
-; GFX9-NEXT: s_mov_b32 s42, s9
+; GFX9-NEXT: s_mov_b32 s50, s9
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
-; GFX9-NEXT: s_mov_b32 s43, s8
+; GFX9-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
+; GFX9-NEXT: s_mov_b32 s51, s8
; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX9-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v2, s1
-; GFX9-NEXT: s_mov_b64 s[46:47], 0
+; GFX9-NEXT: s_mov_b64 s[62:63], 0
; GFX9-NEXT: v_mov_b32_e32 v1, s0
; GFX9-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX9-NEXT: .LBB16_2: ; %atomicrmw.start
@@ -12226,48 +12226,48 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX9-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
; GFX9-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX9-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX9-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX9-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX9-NEXT: buffer_store_dword v1, off, s[64:67], 0
+; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX9-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX9-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-NEXT: s_mov_b32 s12, s43
-; GFX9-NEXT: s_mov_b32 s13, s42
+; GFX9-NEXT: s_mov_b32 s12, s51
+; GFX9-NEXT: s_mov_b32 s13, s50
; GFX9-NEXT: s_mov_b32 s14, s33
; GFX9-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX9-NEXT: v_mov_b32_e32 v0, 8
; GFX9-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-NEXT: v_mov_b32_e32 v2, s44
-; GFX9-NEXT: v_mov_b32_e32 v3, s45
+; GFX9-NEXT: v_mov_b32_e32 v2, s52
+; GFX9-NEXT: v_mov_b32_e32 v3, s53
; GFX9-NEXT: v_mov_b32_e32 v4, 0
; GFX9-NEXT: v_mov_b32_e32 v5, 8
; GFX9-NEXT: v_mov_b32_e32 v6, 0
; GFX9-NEXT: v_mov_b32_e32 v7, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX9-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX9-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX9-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX9-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX9-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX9-NEXT: s_cbranch_execnz .LBB16_2
; GFX9-NEXT: .LBB16_3:
; GFX9-NEXT: s_endpgm
;
; GFX1064-LABEL: global_atomic_fsub_double_uni_address_uni_value_default_scope_strictfp:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s50, -1
-; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s48, s48, s11
-; GFX1064-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1064-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s66, -1
+; GFX1064-NEXT: s_mov_b32 s67, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s64, s64, s11
+; GFX1064-NEXT: s_addc_u32 s65, s65, 0
+; GFX1064-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1064-NEXT: s_bcnt1_i32_b64 s0, exec
; GFX1064-NEXT: s_mov_b32 s1, 0x43300000
; GFX1064-NEXT: s_movk_i32 s32, 0x800
@@ -12279,19 +12279,19 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX1064-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064-NEXT: s_cbranch_execz .LBB16_3
; GFX1064-NEXT: ; %bb.1:
-; GFX1064-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24
+; GFX1064-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24
; GFX1064-NEXT: v_lshlrev_b32_e32 v3, 20, v2
; GFX1064-NEXT: v_lshlrev_b32_e32 v4, 10, v1
; GFX1064-NEXT: s_mov_b32 s33, s10
-; GFX1064-NEXT: s_mov_b32 s42, s9
-; GFX1064-NEXT: s_mov_b32 s43, s8
+; GFX1064-NEXT: s_mov_b32 s50, s9
+; GFX1064-NEXT: s_mov_b32 s51, s8
; GFX1064-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1064-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX1064-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX1064-NEXT: s_mov_b64 s[46:47], 0
+; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1064-NEXT: s_mov_b64 s[62:63], 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
+; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: v_mov_b32_e32 v2, s1
; GFX1064-NEXT: v_mov_b32_e32 v1, s0
@@ -12304,53 +12304,53 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX1064-NEXT: s_getpc_b64 s[0:1]
; GFX1064-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1064-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-NEXT: buffer_store_dword v1, off, s[48:51], 0
+; GFX1064-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX1064-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX1064-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1064-NEXT: v_mov_b32_e32 v31, v40
; GFX1064-NEXT: v_mov_b32_e32 v0, 8
; GFX1064-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-NEXT: v_mov_b32_e32 v2, s44
+; GFX1064-NEXT: v_mov_b32_e32 v2, s52
; GFX1064-NEXT: v_mov_b32_e32 v5, 8
; GFX1064-NEXT: v_mov_b32_e32 v6, 0
; GFX1064-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1064-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1064-NEXT: s_mov_b32 s12, s43
-; GFX1064-NEXT: s_mov_b32 s13, s42
+; GFX1064-NEXT: s_mov_b32 s12, s51
+; GFX1064-NEXT: s_mov_b32 s13, s50
; GFX1064-NEXT: s_mov_b32 s14, s33
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-NEXT: v_mov_b32_e32 v3, s45
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX1064-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX1064-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX1064-NEXT: v_mov_b32_e32 v3, s53
; GFX1064-NEXT: v_mov_b32_e32 v4, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-NEXT: s_clause 0x1
-; GFX1064-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX1064-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX1064-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1064-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX1064-NEXT: s_cbranch_execnz .LBB16_2
; GFX1064-NEXT: .LBB16_3:
; GFX1064-NEXT: s_endpgm
;
; GFX1032-LABEL: global_atomic_fsub_double_uni_address_uni_value_default_scope_strictfp:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s50, -1
-; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s48, s48, s11
-; GFX1032-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1032-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s66, -1
+; GFX1032-NEXT: s_mov_b32 s67, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s64, s64, s11
+; GFX1032-NEXT: s_addc_u32 s65, s65, 0
+; GFX1032-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1032-NEXT: s_bcnt1_i32_b32 s0, exec_lo
; GFX1032-NEXT: s_mov_b32 s1, 0x43300000
-; GFX1032-NEXT: s_mov_b32 s46, 0
+; GFX1032-NEXT: s_mov_b32 s62, 0
; GFX1032-NEXT: v_add_f64 v[3:4], 0xc3300000, s[0:1]
; GFX1032-NEXT: s_movk_i32 s32, 0x400
; GFX1032-NEXT: v_mul_f64 v[41:42], 4.0, v[3:4]
@@ -12359,18 +12359,18 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-NEXT: s_cbranch_execz .LBB16_3
; GFX1032-NEXT: ; %bb.1:
-; GFX1032-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24
+; GFX1032-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24
; GFX1032-NEXT: v_lshlrev_b32_e32 v3, 20, v2
; GFX1032-NEXT: v_lshlrev_b32_e32 v4, 10, v1
; GFX1032-NEXT: s_mov_b32 s33, s10
-; GFX1032-NEXT: s_mov_b32 s42, s9
-; GFX1032-NEXT: s_mov_b32 s43, s8
+; GFX1032-NEXT: s_mov_b32 s50, s9
+; GFX1032-NEXT: s_mov_b32 s51, s8
; GFX1032-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1032-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX1032-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
+; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: v_mov_b32_e32 v2, s1
; GFX1032-NEXT: v_mov_b32_e32 v1, s0
@@ -12383,44 +12383,44 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX1032-NEXT: s_getpc_b64 s[0:1]
; GFX1032-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1032-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-NEXT: buffer_store_dword v1, off, s[48:51], 0
+; GFX1032-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX1032-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX1032-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1032-NEXT: v_mov_b32_e32 v31, v40
; GFX1032-NEXT: v_mov_b32_e32 v0, 8
; GFX1032-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-NEXT: v_mov_b32_e32 v2, s44
+; GFX1032-NEXT: v_mov_b32_e32 v2, s52
; GFX1032-NEXT: v_mov_b32_e32 v5, 8
; GFX1032-NEXT: v_mov_b32_e32 v6, 0
; GFX1032-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1032-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1032-NEXT: s_mov_b32 s12, s43
-; GFX1032-NEXT: s_mov_b32 s13, s42
+; GFX1032-NEXT: s_mov_b32 s12, s51
+; GFX1032-NEXT: s_mov_b32 s13, s50
; GFX1032-NEXT: s_mov_b32 s14, s33
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1032-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1032-NEXT: v_mov_b32_e32 v3, s45
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX1032-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX1032-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX1032-NEXT: v_mov_b32_e32 v3, s53
; GFX1032-NEXT: v_mov_b32_e32 v4, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-NEXT: s_clause 0x1
-; GFX1032-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1032-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX1032-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX1032-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s46
+; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
; GFX1032-NEXT: s_cbranch_execnz .LBB16_2
; GFX1032-NEXT: .LBB16_3:
; GFX1032-NEXT: s_endpgm
;
; GFX1164-LABEL: global_atomic_fsub_double_uni_address_uni_value_default_scope_strictfp:
; GFX1164: ; %bb.0:
-; GFX1164-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1164-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1164-NEXT: s_bcnt1_i32_b64 s0, exec
; GFX1164-NEXT: v_mov_b32_e32 v40, v0
; GFX1164-NEXT: v_mov_b32_e32 v0, 0x43300000
@@ -12441,16 +12441,16 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX1164-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1164-NEXT: s_cbranch_execz .LBB16_3
; GFX1164-NEXT: ; %bb.1:
-; GFX1164-NEXT: s_load_b64 s[44:45], s[4:5], 0x24
+; GFX1164-NEXT: s_load_b64 s[52:53], s[4:5], 0x24
; GFX1164-NEXT: s_mov_b32 s33, s10
-; GFX1164-NEXT: s_mov_b32 s42, s9
-; GFX1164-NEXT: s_mov_b32 s43, s8
+; GFX1164-NEXT: s_mov_b32 s50, s9
+; GFX1164-NEXT: s_mov_b32 s51, s8
; GFX1164-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1164-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX1164-NEXT: s_mov_b64 s[46:47], 0
+; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1164-NEXT: s_mov_b64 s[62:63], 0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: s_load_b64 s[0:1], s[44:45], 0x0
+; GFX1164-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: v_mov_b32_e32 v2, s1
; GFX1164-NEXT: v_mov_b32_e32 v1, s0
@@ -12471,18 +12471,18 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX1164-NEXT: v_mov_b32_e32 v31, v40
; GFX1164-NEXT: v_mov_b32_e32 v0, 8
; GFX1164-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-NEXT: v_mov_b32_e32 v2, s44
+; GFX1164-NEXT: v_mov_b32_e32 v2, s52
; GFX1164-NEXT: v_mov_b32_e32 v5, 8
; GFX1164-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1164-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1164-NEXT: s_mov_b32 s12, s43
-; GFX1164-NEXT: s_mov_b32 s13, s42
+; GFX1164-NEXT: s_mov_b32 s12, s51
+; GFX1164-NEXT: s_mov_b32 s13, s50
; GFX1164-NEXT: s_mov_b32 s14, s33
; GFX1164-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1164-NEXT: v_mov_b32_e32 v3, s45
+; GFX1164-NEXT: v_mov_b32_e32 v3, s53
; GFX1164-NEXT: v_mov_b32_e32 v4, 0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1]
@@ -12490,8 +12490,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[46:47]
+; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63]
; GFX1164-NEXT: s_cbranch_execnz .LBB16_2
; GFX1164-NEXT: .LBB16_3:
; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2
@@ -12499,12 +12499,12 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
;
; GFX1132-LABEL: global_atomic_fsub_double_uni_address_uni_value_default_scope_strictfp:
; GFX1132: ; %bb.0:
-; GFX1132-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1132-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1132-NEXT: s_bcnt1_i32_b32 s0, exec_lo
; GFX1132-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1132-NEXT: v_dual_mov_b32 v40, v0 :: v_dual_mov_b32 v1, s0
; GFX1132-NEXT: v_mov_b32_e32 v0, 0x43300000
-; GFX1132-NEXT: s_mov_b32 s46, 0
+; GFX1132-NEXT: s_mov_b32 s62, 0
; GFX1132-NEXT: s_clause 0x1
; GFX1132-NEXT: scratch_store_b32 off, v0, off offset:20
; GFX1132-NEXT: scratch_store_b32 off, v1, off offset:16
@@ -12519,15 +12519,15 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-NEXT: s_cbranch_execz .LBB16_3
; GFX1132-NEXT: ; %bb.1:
-; GFX1132-NEXT: s_load_b64 s[44:45], s[4:5], 0x24
+; GFX1132-NEXT: s_load_b64 s[52:53], s[4:5], 0x24
; GFX1132-NEXT: s_mov_b32 s33, s15
-; GFX1132-NEXT: s_mov_b32 s42, s14
-; GFX1132-NEXT: s_mov_b32 s43, s13
+; GFX1132-NEXT: s_mov_b32 s50, s14
+; GFX1132-NEXT: s_mov_b32 s51, s13
; GFX1132-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1132-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_load_b64 s[0:1], s[44:45], 0x0
+; GFX1132-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
; GFX1132-NEXT: s_set_inst_prefetch_distance 0x1
@@ -12545,25 +12545,25 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX1132-NEXT: scratch_store_b64 off, v[1:2], off
; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1132-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8
-; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44
+; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s52
; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
; GFX1132-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1132-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1132-NEXT: s_mov_b32 s12, s43
-; GFX1132-NEXT: s_mov_b32 s13, s42
+; GFX1132-NEXT: s_mov_b32 s12, s51
+; GFX1132-NEXT: s_mov_b32 s13, s50
; GFX1132-NEXT: s_mov_b32 s14, s33
; GFX1132-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1132-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0
+; GFX1132-NEXT: v_dual_mov_b32 v3, s53 :: v_dual_mov_b32 v4, 0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-NEXT: scratch_load_b64 v[1:2], off, off
; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46
+; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
; GFX1132-NEXT: s_cbranch_execnz .LBB16_2
; GFX1132-NEXT: .LBB16_3:
; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2
@@ -12572,13 +12572,13 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX7LESS-DPP-LABEL: global_atomic_fsub_double_uni_address_uni_value_default_scope_strictfp:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX7LESS-DPP-NEXT: v_mbcnt_lo_u32_b32_e64 v5, exec_lo, 0
; GFX7LESS-DPP-NEXT: s_bcnt1_i32_b64 s0, exec
; GFX7LESS-DPP-NEXT: s_mov_b32 s1, 0x43300000
@@ -12592,15 +12592,15 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX7LESS-DPP-NEXT: s_cbranch_execz .LBB16_3
; GFX7LESS-DPP-NEXT: ; %bb.1:
; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s10
-; GFX7LESS-DPP-NEXT: s_mov_b32 s42, s9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s43, s8
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, s9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], 0
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[62:63], 0
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v3, v0, v1
@@ -12612,11 +12612,11 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_add_f64 v[2:3], v[0:1], -v[41:42]
-; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0
+; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:4
+; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[64:67], 0
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:8
+; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:12
+; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:8
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
@@ -12629,40 +12629,40 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s43
-; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s42
+; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51
+; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s44
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s52
+; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-DPP-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[48:51], 0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4
+; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[64:67], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX7LESS-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB16_2
; GFX7LESS-DPP-NEXT: .LBB16_3:
; GFX7LESS-DPP-NEXT: s_endpgm
;
; GFX9-DPP-LABEL: global_atomic_fsub_double_uni_address_uni_value_default_scope_strictfp:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s50, -1
-; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX9-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s66, -1
+; GFX9-DPP-NEXT: s_mov_b32 s67, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s64, s64, s11
; GFX9-DPP-NEXT: v_mov_b32_e32 v3, 0
-; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX9-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX9-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX9-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX9-DPP-NEXT: s_bcnt1_i32_b64 s0, exec
; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0xc3300000
; GFX9-DPP-NEXT: s_mov_b32 s1, 0x43300000
@@ -12675,20 +12675,20 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9-DPP-NEXT: s_cbranch_execz .LBB16_3
; GFX9-DPP-NEXT: ; %bb.1:
-; GFX9-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24
+; GFX9-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1
; GFX9-DPP-NEXT: s_mov_b32 s33, s10
-; GFX9-DPP-NEXT: s_mov_b32 s42, s9
+; GFX9-DPP-NEXT: s_mov_b32 s50, s9
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
-; GFX9-DPP-NEXT: s_mov_b32 s43, s8
+; GFX9-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
+; GFX9-DPP-NEXT: s_mov_b32 s51, s8
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s1
-; GFX9-DPP-NEXT: s_mov_b64 s[46:47], 0
+; GFX9-DPP-NEXT: s_mov_b64 s[62:63], 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v1, s0
; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX9-DPP-NEXT: .LBB16_2: ; %atomicrmw.start
@@ -12701,48 +12701,48 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0
+; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-DPP-NEXT: s_mov_b32 s12, s43
-; GFX9-DPP-NEXT: s_mov_b32 s13, s42
+; GFX9-DPP-NEXT: s_mov_b32 s12, s51
+; GFX9-DPP-NEXT: s_mov_b32 s13, s50
; GFX9-DPP-NEXT: s_mov_b32 s14, s33
; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s44
-; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s52
+; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX9-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX9-DPP-NEXT: s_cbranch_execnz .LBB16_2
; GFX9-DPP-NEXT: .LBB16_3:
; GFX9-DPP-NEXT: s_endpgm
;
; GFX1064-DPP-LABEL: global_atomic_fsub_double_uni_address_uni_value_default_scope_strictfp:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
-; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1064-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s66, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s67, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s64, s64, s11
+; GFX1064-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX1064-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1064-DPP-NEXT: s_bcnt1_i32_b64 s0, exec
; GFX1064-DPP-NEXT: s_mov_b32 s1, 0x43300000
; GFX1064-DPP-NEXT: s_movk_i32 s32, 0x800
@@ -12754,19 +12754,19 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064-DPP-NEXT: s_cbranch_execz .LBB16_3
; GFX1064-DPP-NEXT: ; %bb.1:
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24
+; GFX1064-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24
; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2
; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1
; GFX1064-DPP-NEXT: s_mov_b32 s33, s10
-; GFX1064-DPP-NEXT: s_mov_b32 s42, s9
-; GFX1064-DPP-NEXT: s_mov_b32 s43, s8
+; GFX1064-DPP-NEXT: s_mov_b32 s50, s9
+; GFX1064-DPP-NEXT: s_mov_b32 s51, s8
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], 0
+; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
+; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s1
; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, s0
@@ -12779,53 +12779,53 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX1064-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
+; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX1064-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40
; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s44
+; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s52
; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX1064-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1064-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1064-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1064-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1064-DPP-NEXT: s_mov_b32 s13, s50
; GFX1064-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX1064-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-DPP-NEXT: s_clause 0x1
-; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB16_2
; GFX1064-DPP-NEXT: .LBB16_3:
; GFX1064-DPP-NEXT: s_endpgm
;
; GFX1032-DPP-LABEL: global_atomic_fsub_double_uni_address_uni_value_default_scope_strictfp:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
-; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1032-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s66, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s67, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s64, s64, s11
+; GFX1032-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX1032-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1032-DPP-NEXT: s_bcnt1_i32_b32 s0, exec_lo
; GFX1032-DPP-NEXT: s_mov_b32 s1, 0x43300000
-; GFX1032-DPP-NEXT: s_mov_b32 s46, 0
+; GFX1032-DPP-NEXT: s_mov_b32 s62, 0
; GFX1032-DPP-NEXT: v_add_f64 v[3:4], 0xc3300000, s[0:1]
; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400
; GFX1032-DPP-NEXT: v_mul_f64 v[41:42], 4.0, v[3:4]
@@ -12834,18 +12834,18 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB16_3
; GFX1032-DPP-NEXT: ; %bb.1:
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24
+; GFX1032-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24
; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2
; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1
; GFX1032-DPP-NEXT: s_mov_b32 s33, s10
-; GFX1032-DPP-NEXT: s_mov_b32 s42, s9
-; GFX1032-DPP-NEXT: s_mov_b32 s43, s8
+; GFX1032-DPP-NEXT: s_mov_b32 s50, s9
+; GFX1032-DPP-NEXT: s_mov_b32 s51, s8
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0
+; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s1
; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, s0
@@ -12858,44 +12858,44 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX1032-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
+; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX1032-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s44
+; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s52
; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX1032-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1032-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1032-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1032-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1032-DPP-NEXT: s_mov_b32 s13, s50
; GFX1032-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-DPP-NEXT: s_clause 0x1
-; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-DPP-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s46
+; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB16_2
; GFX1032-DPP-NEXT: .LBB16_3:
; GFX1032-DPP-NEXT: s_endpgm
;
; GFX1164-DPP-LABEL: global_atomic_fsub_double_uni_address_uni_value_default_scope_strictfp:
; GFX1164-DPP: ; %bb.0:
-; GFX1164-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1164-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1164-DPP-NEXT: s_bcnt1_i32_b64 s0, exec
; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0
; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 0x43300000
@@ -12916,16 +12916,16 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1164-DPP-NEXT: s_cbranch_execz .LBB16_3
; GFX1164-DPP-NEXT: ; %bb.1:
-; GFX1164-DPP-NEXT: s_load_b64 s[44:45], s[4:5], 0x24
+; GFX1164-DPP-NEXT: s_load_b64 s[52:53], s[4:5], 0x24
; GFX1164-DPP-NEXT: s_mov_b32 s33, s10
-; GFX1164-DPP-NEXT: s_mov_b32 s42, s9
-; GFX1164-DPP-NEXT: s_mov_b32 s43, s8
+; GFX1164-DPP-NEXT: s_mov_b32 s50, s9
+; GFX1164-DPP-NEXT: s_mov_b32 s51, s8
; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], 0
+; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[44:45], 0x0
+; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s1
; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, s0
@@ -12946,18 +12946,18 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX1164-DPP-NEXT: v_mov_b32_e32 v31, v40
; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s44
+; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s52
; GFX1164-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1164-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1164-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1164-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1164-DPP-NEXT: s_mov_b32 s13, s50
; GFX1164-DPP-NEXT: s_mov_b32 s14, s33
; GFX1164-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
@@ -12965,8 +12965,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[46:47]
+; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63]
; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB16_2
; GFX1164-DPP-NEXT: .LBB16_3:
; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2
@@ -12974,12 +12974,12 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
;
; GFX1132-DPP-LABEL: global_atomic_fsub_double_uni_address_uni_value_default_scope_strictfp:
; GFX1132-DPP: ; %bb.0:
-; GFX1132-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1132-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1132-DPP-NEXT: s_bcnt1_i32_b32 s0, exec_lo
; GFX1132-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1132-DPP-NEXT: v_dual_mov_b32 v40, v0 :: v_dual_mov_b32 v1, s0
; GFX1132-DPP-NEXT: v_mov_b32_e32 v0, 0x43300000
-; GFX1132-DPP-NEXT: s_mov_b32 s46, 0
+; GFX1132-DPP-NEXT: s_mov_b32 s62, 0
; GFX1132-DPP-NEXT: s_clause 0x1
; GFX1132-DPP-NEXT: scratch_store_b32 off, v0, off offset:20
; GFX1132-DPP-NEXT: scratch_store_b32 off, v1, off offset:16
@@ -12994,15 +12994,15 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-DPP-NEXT: s_cbranch_execz .LBB16_3
; GFX1132-DPP-NEXT: ; %bb.1:
-; GFX1132-DPP-NEXT: s_load_b64 s[44:45], s[4:5], 0x24
+; GFX1132-DPP-NEXT: s_load_b64 s[52:53], s[4:5], 0x24
; GFX1132-DPP-NEXT: s_mov_b32 s33, s15
-; GFX1132-DPP-NEXT: s_mov_b32 s42, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s43, s13
+; GFX1132-DPP-NEXT: s_mov_b32 s50, s14
+; GFX1132-DPP-NEXT: s_mov_b32 s51, s13
; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[44:45], 0x0
+; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x1
@@ -13020,25 +13020,25 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX1132-DPP-NEXT: scratch_store_b64 off, v[1:2], off
; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1132-DPP-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44
+; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s52
; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX1132-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1132-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1132-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1132-DPP-NEXT: s_mov_b32 s13, s50
; GFX1132-DPP-NEXT: s_mov_b32 s14, s33
; GFX1132-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0
+; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s53 :: v_dual_mov_b32 v4, 0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-DPP-NEXT: scratch_load_b64 v[1:2], off, off
; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-DPP-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46
+; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB16_2
; GFX1132-DPP-NEXT: .LBB16_3:
; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2
@@ -13051,19 +13051,19 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX7LESS-LABEL: global_atomic_fsub_double_uni_address_div_value_default_scope_strictfp:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
-; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
+; GFX7LESS-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s82, -1
+; GFX7LESS-NEXT: s_mov_b32 s83, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s80, s80, s11
+; GFX7LESS-NEXT: s_addc_u32 s81, s81, 0
; GFX7LESS-NEXT: s_mov_b32 s33, s10
-; GFX7LESS-NEXT: s_mov_b32 s42, s9
-; GFX7LESS-NEXT: s_mov_b32 s43, s8
+; GFX7LESS-NEXT: s_mov_b32 s50, s9
+; GFX7LESS-NEXT: s_mov_b32 s51, s8
; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0
; GFX7LESS-NEXT: s_getpc_b64 s[0:1]
@@ -13074,15 +13074,15 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-NEXT: v_or_b32_e32 v40, v0, v2
-; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-NEXT: s_mov_b32 s12, s43
-; GFX7LESS-NEXT: s_mov_b32 s13, s42
+; GFX7LESS-NEXT: s_mov_b32 s12, s51
+; GFX7LESS-NEXT: s_mov_b32 s13, s50
; GFX7LESS-NEXT: s_mov_b32 s14, s33
; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -13107,21 +13107,21 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX7LESS-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX7LESS-NEXT: s_cbranch_execz .LBB17_5
; GFX7LESS-NEXT: ; %bb.3:
-; GFX7LESS-NEXT: s_load_dwordx2 s[44:45], s[36:37], 0x9
-; GFX7LESS-NEXT: s_mov_b32 s47, 0xf000
-; GFX7LESS-NEXT: s_mov_b32 s46, -1
+; GFX7LESS-NEXT: s_load_dwordx2 s[64:65], s[36:37], 0x9
+; GFX7LESS-NEXT: s_mov_b32 s67, 0xf000
+; GFX7LESS-NEXT: s_mov_b32 s66, -1
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[44:47], 0
-; GFX7LESS-NEXT: s_mov_b64 s[46:47], 0
+; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[64:67], 0
+; GFX7LESS-NEXT: s_mov_b64 s[52:53], 0
; GFX7LESS-NEXT: .LBB17_4: ; %atomicrmw.start
; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7LESS-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-NEXT: v_add_f64 v[2:3], v[0:1], -v[41:42]
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0
+; GFX7LESS-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:4
+; GFX7LESS-NEXT: buffer_store_dword v0, off, s[80:83], 0
; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:12
-; GFX7LESS-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:8
+; GFX7LESS-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:12
+; GFX7LESS-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:8
; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0
; GFX7LESS-NEXT: s_getpc_b64 s[0:1]
; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
@@ -13134,44 +13134,44 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX7LESS-NEXT: v_mov_b32_e32 v5, 8
; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-NEXT: s_mov_b32 s12, s43
-; GFX7LESS-NEXT: s_mov_b32 s13, s42
+; GFX7LESS-NEXT: s_mov_b32 s12, s51
+; GFX7LESS-NEXT: s_mov_b32 s13, s50
; GFX7LESS-NEXT: s_mov_b32 s14, s33
; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83]
; GFX7LESS-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-NEXT: v_mov_b32_e32 v2, s44
-; GFX7LESS-NEXT: v_mov_b32_e32 v3, s45
+; GFX7LESS-NEXT: v_mov_b32_e32 v2, s64
+; GFX7LESS-NEXT: v_mov_b32_e32 v3, s65
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-NEXT: buffer_load_dword v0, off, s[48:51], 0
-; GFX7LESS-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4
+; GFX7LESS-NEXT: buffer_load_dword v0, off, s[80:83], 0
+; GFX7LESS-NEXT: buffer_load_dword v1, off, s[80:83], 0 offset:4
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX7LESS-NEXT: s_or_b64 s[52:53], vcc, s[52:53]
+; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[52:53]
; GFX7LESS-NEXT: s_cbranch_execnz .LBB17_4
; GFX7LESS-NEXT: .LBB17_5:
; GFX7LESS-NEXT: s_endpgm
;
; GFX9-LABEL: global_atomic_fsub_double_uni_address_div_value_default_scope_strictfp:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s11
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s66, -1
+; GFX9-NEXT: s_mov_b32 s67, 0xe00000
+; GFX9-NEXT: s_add_u32 s64, s64, s11
+; GFX9-NEXT: s_addc_u32 s65, s65, 0
; GFX9-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX9-NEXT: s_mov_b32 s43, s8
+; GFX9-NEXT: s_mov_b32 s51, s8
; GFX9-NEXT: s_add_u32 s8, s36, 44
-; GFX9-NEXT: s_mov_b32 s42, s9
+; GFX9-NEXT: s_mov_b32 s50, s9
; GFX9-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX9-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX9-NEXT: s_getpc_b64 s[0:1]
; GFX9-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
@@ -13180,17 +13180,17 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b32 s33, s10
; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7]
-; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX9-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-NEXT: s_mov_b32 s12, s43
-; GFX9-NEXT: s_mov_b32 s13, s42
+; GFX9-NEXT: s_mov_b32 s12, s51
+; GFX9-NEXT: s_mov_b32 s13, s50
; GFX9-NEXT: s_mov_b32 s14, s33
; GFX9-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX9-NEXT: s_movk_i32 s32, 0x800
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -13215,11 +13215,11 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX9-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX9-NEXT: s_cbranch_execz .LBB17_5
; GFX9-NEXT: ; %bb.3:
-; GFX9-NEXT: s_load_dwordx2 s[44:45], s[36:37], 0x24
+; GFX9-NEXT: s_load_dwordx2 s[52:53], s[36:37], 0x24
; GFX9-NEXT: v_mov_b32_e32 v0, 0
-; GFX9-NEXT: s_mov_b64 s[46:47], 0
+; GFX9-NEXT: s_mov_b64 s[62:63], 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45]
+; GFX9-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53]
; GFX9-NEXT: .LBB17_4: ; %atomicrmw.start
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-NEXT: s_waitcnt vmcnt(0)
@@ -13230,53 +13230,53 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX9-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
; GFX9-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], 0
-; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX9-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX9-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX9-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX9-NEXT: buffer_store_dword v1, off, s[64:67], 0
+; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX9-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX9-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-NEXT: s_mov_b32 s12, s43
-; GFX9-NEXT: s_mov_b32 s13, s42
+; GFX9-NEXT: s_mov_b32 s12, s51
+; GFX9-NEXT: s_mov_b32 s13, s50
; GFX9-NEXT: s_mov_b32 s14, s33
; GFX9-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX9-NEXT: v_mov_b32_e32 v0, 8
; GFX9-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-NEXT: v_mov_b32_e32 v2, s44
-; GFX9-NEXT: v_mov_b32_e32 v3, s45
+; GFX9-NEXT: v_mov_b32_e32 v2, s52
+; GFX9-NEXT: v_mov_b32_e32 v3, s53
; GFX9-NEXT: v_mov_b32_e32 v4, 0
; GFX9-NEXT: v_mov_b32_e32 v5, 8
; GFX9-NEXT: v_mov_b32_e32 v6, 0
; GFX9-NEXT: v_mov_b32_e32 v7, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX9-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX9-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX9-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX9-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX9-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX9-NEXT: s_cbranch_execnz .LBB17_4
; GFX9-NEXT: .LBB17_5:
; GFX9-NEXT: s_endpgm
;
; GFX1064-LABEL: global_atomic_fsub_double_uni_address_div_value_default_scope_strictfp:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s50, -1
-; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s66, -1
+; GFX1064-NEXT: s_mov_b32 s67, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s64, s64, s11
; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-NEXT: s_mov_b32 s43, s8
+; GFX1064-NEXT: s_addc_u32 s65, s65, 0
+; GFX1064-NEXT: s_mov_b32 s51, s8
; GFX1064-NEXT: s_add_u32 s8, s34, 44
-; GFX1064-NEXT: s_mov_b32 s42, s9
+; GFX1064-NEXT: s_mov_b32 s50, s9
; GFX1064-NEXT: s_addc_u32 s9, s35, 0
-; GFX1064-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1064-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1064-NEXT: s_getpc_b64 s[0:1]
; GFX1064-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
; GFX1064-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
@@ -13285,17 +13285,17 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1064-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1064-NEXT: s_mov_b32 s33, s10
; GFX1064-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-NEXT: s_mov_b32 s12, s43
+; GFX1064-NEXT: s_mov_b32 s12, s51
; GFX1064-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-NEXT: s_mov_b32 s13, s42
+; GFX1064-NEXT: s_mov_b32 s13, s50
; GFX1064-NEXT: s_mov_b32 s14, s33
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1064-NEXT: s_movk_i32 s32, 0x800
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -13320,11 +13320,11 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1064-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX1064-NEXT: s_cbranch_execz .LBB17_5
; GFX1064-NEXT: ; %bb.3:
-; GFX1064-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24
+; GFX1064-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24
; GFX1064-NEXT: v_mov_b32_e32 v0, 0
-; GFX1064-NEXT: s_mov_b64 s[46:47], 0
+; GFX1064-NEXT: s_mov_b64 s[62:63], 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45]
+; GFX1064-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53]
; GFX1064-NEXT: .LBB17_4: ; %atomicrmw.start
; GFX1064-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1064-NEXT: s_waitcnt vmcnt(0)
@@ -13334,55 +13334,55 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1064-NEXT: s_getpc_b64 s[0:1]
; GFX1064-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1064-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-NEXT: buffer_store_dword v1, off, s[48:51], 0
+; GFX1064-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX1064-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX1064-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1064-NEXT: v_mov_b32_e32 v31, v40
; GFX1064-NEXT: v_mov_b32_e32 v0, 8
; GFX1064-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-NEXT: v_mov_b32_e32 v2, s44
+; GFX1064-NEXT: v_mov_b32_e32 v2, s52
; GFX1064-NEXT: v_mov_b32_e32 v5, 8
; GFX1064-NEXT: v_mov_b32_e32 v6, 0
; GFX1064-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-NEXT: s_mov_b32 s12, s43
-; GFX1064-NEXT: s_mov_b32 s13, s42
+; GFX1064-NEXT: s_mov_b32 s12, s51
+; GFX1064-NEXT: s_mov_b32 s13, s50
; GFX1064-NEXT: s_mov_b32 s14, s33
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-NEXT: v_mov_b32_e32 v3, s45
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX1064-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX1064-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX1064-NEXT: v_mov_b32_e32 v3, s53
; GFX1064-NEXT: v_mov_b32_e32 v4, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-NEXT: s_clause 0x1
-; GFX1064-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX1064-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX1064-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1064-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX1064-NEXT: s_cbranch_execnz .LBB17_4
; GFX1064-NEXT: .LBB17_5:
; GFX1064-NEXT: s_endpgm
;
; GFX1032-LABEL: global_atomic_fsub_double_uni_address_div_value_default_scope_strictfp:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s50, -1
-; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s66, -1
+; GFX1032-NEXT: s_mov_b32 s67, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s64, s64, s11
; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-NEXT: s_mov_b32 s43, s8
+; GFX1032-NEXT: s_addc_u32 s65, s65, 0
+; GFX1032-NEXT: s_mov_b32 s51, s8
; GFX1032-NEXT: s_add_u32 s8, s34, 44
-; GFX1032-NEXT: s_mov_b32 s42, s9
+; GFX1032-NEXT: s_mov_b32 s50, s9
; GFX1032-NEXT: s_addc_u32 s9, s35, 0
-; GFX1032-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1032-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1032-NEXT: s_getpc_b64 s[0:1]
; GFX1032-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
; GFX1032-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
@@ -13391,17 +13391,17 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1032-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1032-NEXT: s_mov_b32 s33, s10
; GFX1032-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-NEXT: s_mov_b32 s12, s43
+; GFX1032-NEXT: s_mov_b32 s12, s51
; GFX1032-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-NEXT: s_mov_b32 s13, s42
+; GFX1032-NEXT: s_mov_b32 s13, s50
; GFX1032-NEXT: s_mov_b32 s14, s33
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1032-NEXT: s_movk_i32 s32, 0x400
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -13420,16 +13420,16 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1032-NEXT: s_cbranch_scc1 .LBB17_1
; GFX1032-NEXT: ; %bb.2: ; %ComputeEnd
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1032-NEXT: s_mov_b32 s46, 0
+; GFX1032-NEXT: s_mov_b32 s62, 0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-NEXT: s_xor_b32 s0, exec_lo, s0
; GFX1032-NEXT: s_cbranch_execz .LBB17_5
; GFX1032-NEXT: ; %bb.3:
-; GFX1032-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24
+; GFX1032-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24
; GFX1032-NEXT: v_mov_b32_e32 v0, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45]
+; GFX1032-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53]
; GFX1032-NEXT: .LBB17_4: ; %atomicrmw.start
; GFX1032-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1032-NEXT: s_waitcnt vmcnt(0)
@@ -13439,37 +13439,37 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1032-NEXT: s_getpc_b64 s[0:1]
; GFX1032-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1032-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-NEXT: buffer_store_dword v1, off, s[48:51], 0
+; GFX1032-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX1032-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX1032-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1032-NEXT: v_mov_b32_e32 v31, v40
; GFX1032-NEXT: v_mov_b32_e32 v0, 8
; GFX1032-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-NEXT: v_mov_b32_e32 v2, s44
+; GFX1032-NEXT: v_mov_b32_e32 v2, s52
; GFX1032-NEXT: v_mov_b32_e32 v5, 8
; GFX1032-NEXT: v_mov_b32_e32 v6, 0
; GFX1032-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-NEXT: s_mov_b32 s12, s43
-; GFX1032-NEXT: s_mov_b32 s13, s42
+; GFX1032-NEXT: s_mov_b32 s12, s51
+; GFX1032-NEXT: s_mov_b32 s13, s50
; GFX1032-NEXT: s_mov_b32 s14, s33
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1032-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1032-NEXT: v_mov_b32_e32 v3, s45
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX1032-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX1032-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX1032-NEXT: v_mov_b32_e32 v3, s53
; GFX1032-NEXT: v_mov_b32_e32 v4, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-NEXT: s_clause 0x1
-; GFX1032-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1032-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX1032-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX1032-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s46
+; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
; GFX1032-NEXT: s_cbranch_execnz .LBB17_4
; GFX1032-NEXT: .LBB17_5:
; GFX1032-NEXT: s_endpgm
@@ -13477,11 +13477,11 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1164-LABEL: global_atomic_fsub_double_uni_address_div_value_default_scope_strictfp:
; GFX1164: ; %bb.0:
; GFX1164-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1164-NEXT: s_mov_b32 s43, s8
+; GFX1164-NEXT: s_mov_b32 s51, s8
; GFX1164-NEXT: s_add_u32 s8, s34, 44
-; GFX1164-NEXT: s_mov_b32 s42, s9
+; GFX1164-NEXT: s_mov_b32 s50, s9
; GFX1164-NEXT: s_addc_u32 s9, s35, 0
-; GFX1164-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1164-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1164-NEXT: s_getpc_b64 s[0:1]
; GFX1164-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
; GFX1164-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
@@ -13489,15 +13489,15 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1164-NEXT: s_mov_b32 s33, s10
; GFX1164-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1164-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1164-NEXT: s_mov_b32 s12, s43
-; GFX1164-NEXT: s_mov_b32 s13, s42
+; GFX1164-NEXT: s_mov_b32 s12, s51
+; GFX1164-NEXT: s_mov_b32 s13, s50
; GFX1164-NEXT: s_mov_b32 s14, s33
; GFX1164-NEXT: s_mov_b32 s32, 32
; GFX1164-NEXT: v_mov_b32_e32 v40, v0
-; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1164-NEXT: v_mov_b32_e32 v41, 0
@@ -13525,11 +13525,11 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1164-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX1164-NEXT: s_cbranch_execz .LBB17_5
; GFX1164-NEXT: ; %bb.3:
-; GFX1164-NEXT: s_load_b64 s[44:45], s[34:35], 0x24
+; GFX1164-NEXT: s_load_b64 s[52:53], s[34:35], 0x24
; GFX1164-NEXT: v_mov_b32_e32 v0, 0
-; GFX1164-NEXT: s_mov_b64 s[46:47], 0
+; GFX1164-NEXT: s_mov_b64 s[62:63], 0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT: global_load_b64 v[1:2], v0, s[44:45]
+; GFX1164-NEXT: global_load_b64 v[1:2], v0, s[52:53]
; GFX1164-NEXT: s_set_inst_prefetch_distance 0x1
; GFX1164-NEXT: .p2align 6
; GFX1164-NEXT: .LBB17_4: ; %atomicrmw.start
@@ -13546,18 +13546,18 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1164-NEXT: v_mov_b32_e32 v31, v40
; GFX1164-NEXT: v_mov_b32_e32 v0, 8
; GFX1164-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-NEXT: v_mov_b32_e32 v2, s44
+; GFX1164-NEXT: v_mov_b32_e32 v2, s52
; GFX1164-NEXT: v_mov_b32_e32 v5, 8
; GFX1164-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1164-NEXT: s_mov_b32 s12, s43
-; GFX1164-NEXT: s_mov_b32 s13, s42
+; GFX1164-NEXT: s_mov_b32 s12, s51
+; GFX1164-NEXT: s_mov_b32 s13, s50
; GFX1164-NEXT: s_mov_b32 s14, s33
; GFX1164-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1164-NEXT: v_mov_b32_e32 v3, s45
+; GFX1164-NEXT: v_mov_b32_e32 v3, s53
; GFX1164-NEXT: v_mov_b32_e32 v4, 0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1]
@@ -13565,8 +13565,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[46:47]
+; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63]
; GFX1164-NEXT: s_cbranch_execnz .LBB17_4
; GFX1164-NEXT: .LBB17_5:
; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2
@@ -13575,7 +13575,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1132-LABEL: global_atomic_fsub_double_uni_address_div_value_default_scope_strictfp:
; GFX1132: ; %bb.0:
; GFX1132-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1132-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1132-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1132-NEXT: s_add_u32 s8, s34, 44
; GFX1132-NEXT: s_addc_u32 s9, s35, 0
; GFX1132-NEXT: s_getpc_b64 s[0:1]
@@ -13584,9 +13584,9 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1132-NEXT: v_mov_b32_e32 v31, v0
; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1132-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1132-NEXT: s_mov_b32 s42, s14
-; GFX1132-NEXT: s_mov_b32 s43, s13
-; GFX1132-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1132-NEXT: s_mov_b32 s50, s14
+; GFX1132-NEXT: s_mov_b32 s51, s13
+; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1132-NEXT: s_mov_b32 s12, s13
@@ -13595,7 +13595,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1132-NEXT: s_mov_b32 s32, 32
; GFX1132-NEXT: s_mov_b32 s33, s15
; GFX1132-NEXT: v_mov_b32_e32 v40, v0
-; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-NEXT: v_mov_b32_e32 v41, 0
@@ -13615,17 +13615,17 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1132-NEXT: s_cbranch_scc1 .LBB17_1
; GFX1132-NEXT: ; %bb.2: ; %ComputeEnd
; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1132-NEXT: s_mov_b32 s46, 0
+; GFX1132-NEXT: s_mov_b32 s62, 0
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX1132-NEXT: s_xor_b32 s0, exec_lo, s0
; GFX1132-NEXT: s_cbranch_execz .LBB17_5
; GFX1132-NEXT: ; %bb.3:
-; GFX1132-NEXT: s_load_b64 s[44:45], s[34:35], 0x24
+; GFX1132-NEXT: s_load_b64 s[52:53], s[34:35], 0x24
; GFX1132-NEXT: v_mov_b32_e32 v0, 0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: global_load_b64 v[1:2], v0, s[44:45]
+; GFX1132-NEXT: global_load_b64 v[1:2], v0, s[52:53]
; GFX1132-NEXT: s_set_inst_prefetch_distance 0x1
; GFX1132-NEXT: .p2align 6
; GFX1132-NEXT: .LBB17_4: ; %atomicrmw.start
@@ -13640,25 +13640,25 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1132-NEXT: scratch_store_b64 off, v[1:2], off
; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1132-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8
-; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44
+; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s52
; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
; GFX1132-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1132-NEXT: s_mov_b32 s12, s43
-; GFX1132-NEXT: s_mov_b32 s13, s42
+; GFX1132-NEXT: s_mov_b32 s12, s51
+; GFX1132-NEXT: s_mov_b32 s13, s50
; GFX1132-NEXT: s_mov_b32 s14, s33
; GFX1132-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1132-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0
+; GFX1132-NEXT: v_dual_mov_b32 v3, s53 :: v_dual_mov_b32 v4, 0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-NEXT: scratch_load_b64 v[1:2], off, off
; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46
+; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
; GFX1132-NEXT: s_cbranch_execnz .LBB17_4
; GFX1132-NEXT: .LBB17_5:
; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2
@@ -13667,22 +13667,22 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX7LESS-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_default_scope_strictfp:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s82, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s83, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s80, s80, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s81, s81, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s10
-; GFX7LESS-DPP-NEXT: s_mov_b32 s42, s9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s43, s8
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, s9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s47, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s46, -1
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[64:65], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
@@ -13693,30 +13693,30 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v42, v0, v2
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s43
-; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s42
+; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51
+; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v42
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v40, v0
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v41, v1
-; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[0:1], off, s[44:47], 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[0:1], off, s[64:67], 0
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[52:53], 0
; GFX7LESS-DPP-NEXT: .LBB17_1: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_add_f64 v[2:3], v[0:1], -v[40:41]
-; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4
-; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0
+; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:4
+; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[80:83], 0
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:8
+; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:12
+; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:8
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
@@ -13729,44 +13729,44 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s43
-; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s42
+; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51
+; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v42
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s44
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s64
+; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s65
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-DPP-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[48:51], 0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4
+; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[80:83], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[80:83], 0 offset:4
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX7LESS-DPP-NEXT: s_or_b64 s[52:53], vcc, s[52:53]
+; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[52:53]
; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB17_1
; GFX7LESS-DPP-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX7LESS-DPP-NEXT: s_endpgm
;
; GFX9-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_default_scope_strictfp:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s54, -1
-; GFX9-DPP-NEXT: s_mov_b32 s55, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s52, s52, s11
-; GFX9-DPP-NEXT: s_addc_u32 s53, s53, 0
+; GFX9-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s82, -1
+; GFX9-DPP-NEXT: s_mov_b32 s83, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s80, s80, s11
+; GFX9-DPP-NEXT: s_addc_u32 s81, s81, 0
; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX9-DPP-NEXT: s_mov_b32 s43, s8
+; GFX9-DPP-NEXT: s_mov_b32 s51, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX9-DPP-NEXT: s_mov_b32 s42, s9
+; GFX9-DPP-NEXT: s_mov_b32 s50, s9
; GFX9-DPP-NEXT: s_addc_u32 s9, s37, 0
-; GFX9-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX9-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX9-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX9-DPP-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
; GFX9-DPP-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
@@ -13775,17 +13775,17 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b32 s33, s10
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
-; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[52:53]
-; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
+; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-DPP-NEXT: s_mov_b32 s12, s43
-; GFX9-DPP-NEXT: s_mov_b32 s13, s42
+; GFX9-DPP-NEXT: s_mov_b32 s12, s51
+; GFX9-DPP-NEXT: s_mov_b32 s13, s50
; GFX9-DPP-NEXT: s_mov_b32 s14, s33
; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[54:55]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -13832,74 +13832,74 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v1, exec_hi, v1
; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
-; GFX9-DPP-NEXT: v_readlane_b32 s45, v9, 63
-; GFX9-DPP-NEXT: v_readlane_b32 s44, v8, 63
+; GFX9-DPP-NEXT: v_readlane_b32 s53, v9, 63
+; GFX9-DPP-NEXT: v_readlane_b32 s52, v8, 63
; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1]
; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9-DPP-NEXT: s_cbranch_execz .LBB17_3
; GFX9-DPP-NEXT: ; %bb.1:
-; GFX9-DPP-NEXT: s_load_dwordx2 s[46:47], s[36:37], 0x24
-; GFX9-DPP-NEXT: s_mov_b64 s[48:49], 0
+; GFX9-DPP-NEXT: s_load_dwordx2 s[62:63], s[36:37], 0x24
+; GFX9-DPP-NEXT: s_mov_b64 s[64:65], 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[46:47]
+; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[62:63]
; GFX9-DPP-NEXT: .LBB17_2: ; %atomicrmw.start
; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-DPP-NEXT: s_waitcnt vmcnt(0)
-; GFX9-DPP-NEXT: v_add_f64 v[3:4], v[1:2], -s[44:45]
+; GFX9-DPP-NEXT: v_add_f64 v[3:4], v[1:2], -s[52:53]
; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44
; GFX9-DPP-NEXT: s_addc_u32 s9, s37, 0
; GFX9-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[52:53]
-; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[52:55], 0 offset:4
-; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[52:55], 0
-; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[52:55], 0 offset:12
-; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[52:55], 0 offset:8
-; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
+; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:4
+; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0
+; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[80:83], 0 offset:12
+; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:8
+; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-DPP-NEXT: s_mov_b32 s12, s43
-; GFX9-DPP-NEXT: s_mov_b32 s13, s42
+; GFX9-DPP-NEXT: s_mov_b32 s12, s51
+; GFX9-DPP-NEXT: s_mov_b32 s13, s50
; GFX9-DPP-NEXT: s_mov_b32 s14, s33
; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[54:55]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s46
-; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s47
+; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s62
+; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s63
; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[52:55], 0
-; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[52:55], 0 offset:4
+; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[80:83], 0
+; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[80:83], 0 offset:4
; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-DPP-NEXT: s_or_b64 s[48:49], vcc, s[48:49]
-; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[48:49]
+; GFX9-DPP-NEXT: s_or_b64 s[64:65], vcc, s[64:65]
+; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[64:65]
; GFX9-DPP-NEXT: s_cbranch_execnz .LBB17_2
; GFX9-DPP-NEXT: .LBB17_3:
; GFX9-DPP-NEXT: s_endpgm
;
; GFX1064-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_default_scope_strictfp:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s66, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s67, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s64, s64, s11
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1064-DPP-NEXT: s_mov_b32 s43, s8
+; GFX1064-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX1064-DPP-NEXT: s_mov_b32 s51, s8
; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1064-DPP-NEXT: s_mov_b32 s42, s9
+; GFX1064-DPP-NEXT: s_mov_b32 s50, s9
; GFX1064-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1064-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX1064-DPP-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
@@ -13908,17 +13908,17 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1064-DPP-NEXT: s_mov_b32 s33, s10
; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-DPP-NEXT: s_mov_b32 s12, s43
+; GFX1064-DPP-NEXT: s_mov_b32 s12, s51
; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1064-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1064-DPP-NEXT: s_mov_b32 s13, s50
; GFX1064-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1064-DPP-NEXT: s_movk_i32 s32, 0x800
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -13963,10 +13963,10 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064-DPP-NEXT: s_cbranch_execz .LBB17_3
; GFX1064-DPP-NEXT: ; %bb.1:
-; GFX1064-DPP-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24
-; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], 0
+; GFX1064-DPP-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24
+; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45]
+; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53]
; GFX1064-DPP-NEXT: .LBB17_2: ; %atomicrmw.start
; GFX1064-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1064-DPP-NEXT: s_waitcnt vmcnt(0)
@@ -13976,55 +13976,55 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX1064-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
+; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX1064-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40
; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s44
+; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s52
; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX1064-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1064-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1064-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1064-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1064-DPP-NEXT: s_mov_b32 s13, s50
; GFX1064-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX1064-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-DPP-NEXT: s_clause 0x1
-; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47]
+; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB17_2
; GFX1064-DPP-NEXT: .LBB17_3:
; GFX1064-DPP-NEXT: s_endpgm
;
; GFX1032-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_default_scope_strictfp:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s66, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s67, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s64, s64, s11
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
-; GFX1032-DPP-NEXT: s_mov_b32 s43, s8
+; GFX1032-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX1032-DPP-NEXT: s_mov_b32 s51, s8
; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1032-DPP-NEXT: s_mov_b32 s42, s9
+; GFX1032-DPP-NEXT: s_mov_b32 s50, s9
; GFX1032-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1032-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX1032-DPP-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
@@ -14033,17 +14033,17 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1032-DPP-NEXT: s_mov_b32 s33, s10
; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-DPP-NEXT: s_mov_b32 s12, s43
+; GFX1032-DPP-NEXT: s_mov_b32 s12, s51
; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX1032-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1032-DPP-NEXT: s_mov_b32 s13, s50
; GFX1032-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -14078,14 +14078,14 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1032-DPP-NEXT: v_mov_b32_e32 v41, v8
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v42, v9
-; GFX1032-DPP-NEXT: s_mov_b32 s46, 0
+; GFX1032-DPP-NEXT: s_mov_b32 s62, 0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB17_3
; GFX1032-DPP-NEXT: ; %bb.1:
-; GFX1032-DPP-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24
+; GFX1032-DPP-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45]
+; GFX1032-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53]
; GFX1032-DPP-NEXT: .LBB17_2: ; %atomicrmw.start
; GFX1032-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1032-DPP-NEXT: s_waitcnt vmcnt(0)
@@ -14095,37 +14095,37 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX1032-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4
-; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0
+; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4
+; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX1032-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s44
+; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s52
; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX1032-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1032-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1032-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1032-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1032-DPP-NEXT: s_mov_b32 s13, s50
; GFX1032-DPP-NEXT: s_mov_b32 s14, s33
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
-; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12
-; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8
-; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
+; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
+; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-DPP-NEXT: s_clause 0x1
-; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
-; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4
+; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0
+; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-DPP-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s46
+; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB17_2
; GFX1032-DPP-NEXT: .LBB17_3:
; GFX1032-DPP-NEXT: s_endpgm
@@ -14133,11 +14133,11 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1164-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_default_scope_strictfp:
; GFX1164-DPP: ; %bb.0:
; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1164-DPP-NEXT: s_mov_b32 s43, s8
+; GFX1164-DPP-NEXT: s_mov_b32 s51, s8
; GFX1164-DPP-NEXT: s_add_u32 s8, s34, 44
-; GFX1164-DPP-NEXT: s_mov_b32 s42, s9
+; GFX1164-DPP-NEXT: s_mov_b32 s50, s9
; GFX1164-DPP-NEXT: s_addc_u32 s9, s35, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1164-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1164-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX1164-DPP-NEXT: s_add_u32 s0, s0, div.float.value at gotpcrel32@lo+4
; GFX1164-DPP-NEXT: s_addc_u32 s1, s1, div.float.value at gotpcrel32@hi+12
@@ -14145,15 +14145,15 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1164-DPP-NEXT: s_mov_b32 s33, s10
; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1164-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1164-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1164-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1164-DPP-NEXT: s_mov_b32 s13, s50
; GFX1164-DPP-NEXT: s_mov_b32 s14, s33
; GFX1164-DPP-NEXT: s_mov_b32 s32, 32
; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0
-; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -14207,10 +14207,10 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1
; GFX1164-DPP-NEXT: s_cbranch_execz .LBB17_3
; GFX1164-DPP-NEXT: ; %bb.1:
-; GFX1164-DPP-NEXT: s_load_b64 s[44:45], s[34:35], 0x24
-; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], 0
+; GFX1164-DPP-NEXT: s_load_b64 s[52:53], s[34:35], 0x24
+; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[44:45]
+; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[52:53]
; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x1
; GFX1164-DPP-NEXT: .p2align 6
; GFX1164-DPP-NEXT: .LBB17_2: ; %atomicrmw.start
@@ -14227,18 +14227,18 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1164-DPP-NEXT: v_mov_b32_e32 v31, v40
; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s44
+; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s52
; GFX1164-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1164-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1164-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1164-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1164-DPP-NEXT: s_mov_b32 s13, s50
; GFX1164-DPP-NEXT: s_mov_b32 s14, s33
; GFX1164-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s45
+; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
@@ -14246,8 +14246,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47]
-; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[46:47]
+; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
+; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63]
; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB17_2
; GFX1164-DPP-NEXT: .LBB17_3:
; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2
@@ -14256,7 +14256,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1132-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_default_scope_strictfp:
; GFX1132-DPP: ; %bb.0:
; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1132-DPP-NEXT: s_mov_b64 s[40:41], s[0:1]
+; GFX1132-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1132-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1132-DPP-NEXT: s_addc_u32 s9, s35, 0
; GFX1132-DPP-NEXT: s_getpc_b64 s[0:1]
@@ -14265,9 +14265,9 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1132-DPP-NEXT: v_mov_b32_e32 v31, v0
; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1132-DPP-NEXT: s_mov_b32 s42, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s43, s13
-; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GFX1132-DPP-NEXT: s_mov_b32 s50, s14
+; GFX1132-DPP-NEXT: s_mov_b32 s51, s13
+; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
@@ -14276,7 +14276,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1132-DPP-NEXT: s_mov_b32 s32, 32
; GFX1132-DPP-NEXT: s_mov_b32 s33, s15
; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0
-; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
@@ -14318,14 +14318,14 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX1132-DPP-NEXT: v_mov_b32_e32 v42, v9
-; GFX1132-DPP-NEXT: s_mov_b32 s46, 0
+; GFX1132-DPP-NEXT: s_mov_b32 s62, 0
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1
; GFX1132-DPP-NEXT: s_cbranch_execz .LBB17_3
; GFX1132-DPP-NEXT: ; %bb.1:
-; GFX1132-DPP-NEXT: s_load_b64 s[44:45], s[34:35], 0x24
+; GFX1132-DPP-NEXT: s_load_b64 s[52:53], s[34:35], 0x24
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[44:45]
+; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[52:53]
; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x1
; GFX1132-DPP-NEXT: .p2align 6
; GFX1132-DPP-NEXT: .LBB17_2: ; %atomicrmw.start
@@ -14340,25 +14340,25 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1132-DPP-NEXT: scratch_store_b64 off, v[1:2], off
; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX1132-DPP-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44
+; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s52
; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0
-; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GFX1132-DPP-NEXT: s_mov_b32 s12, s43
-; GFX1132-DPP-NEXT: s_mov_b32 s13, s42
+; GFX1132-DPP-NEXT: s_mov_b32 s12, s51
+; GFX1132-DPP-NEXT: s_mov_b32 s13, s50
; GFX1132-DPP-NEXT: s_mov_b32 s14, s33
; GFX1132-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8
-; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0
+; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s53 :: v_dual_mov_b32 v4, 0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-DPP-NEXT: scratch_load_b64 v[1:2], off, off
; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-DPP-NEXT: s_or_b32 s46, vcc_lo, s46
-; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46
+; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
+; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB17_2
; GFX1132-DPP-NEXT: .LBB17_3:
; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2
diff --git a/llvm/test/CodeGen/AMDGPU/greedy-alloc-fail-sgpr1024-spill.mir b/llvm/test/CodeGen/AMDGPU/greedy-alloc-fail-sgpr1024-spill.mir
index dde84af57ed25..8ae89ad96a16b 100644
--- a/llvm/test/CodeGen/AMDGPU/greedy-alloc-fail-sgpr1024-spill.mir
+++ b/llvm/test/CodeGen/AMDGPU/greedy-alloc-fail-sgpr1024-spill.mir
@@ -31,102 +31,105 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $sgpr34_sgpr35 = COPY $sgpr8_sgpr9
; CHECK-NEXT: renamable $sgpr33 = COPY $sgpr15
- ; CHECK-NEXT: renamable $sgpr42 = COPY $sgpr14
+ ; CHECK-NEXT: renamable $sgpr50 = COPY $sgpr14
; CHECK-NEXT: renamable $sgpr36_sgpr37 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: renamable $sgpr38_sgpr39 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: renamable $sgpr40_sgpr41 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: renamable $sgpr66_sgpr67 = S_LOAD_DWORDX2_IMM renamable $sgpr34_sgpr35, 0, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
- ; CHECK-NEXT: renamable $sgpr44 = S_MOV_B32 0
- ; CHECK-NEXT: renamable $sgpr45 = S_MOV_B32 0
- ; CHECK-NEXT: renamable $sgpr46 = S_MOV_B32 0
- ; CHECK-NEXT: renamable $sgpr47 = S_MOV_B32 0
- ; CHECK-NEXT: renamable $sgpr48 = S_MOV_B32 0
- ; CHECK-NEXT: renamable $sgpr49 = S_MOV_B32 0
- ; CHECK-NEXT: renamable $sgpr50 = S_MOV_B32 0
- ; CHECK-NEXT: renamable $sgpr51 = S_MOV_B32 0
- ; CHECK-NEXT: renamable $sgpr52 = S_MOV_B32 0
- ; CHECK-NEXT: renamable $sgpr53 = S_MOV_B32 0
- ; CHECK-NEXT: renamable $sgpr54 = S_MOV_B32 0
- ; CHECK-NEXT: renamable $sgpr55 = S_MOV_B32 0
- ; CHECK-NEXT: renamable $sgpr56 = S_MOV_B32 0
- ; CHECK-NEXT: renamable $sgpr57 = S_MOV_B32 0
- ; CHECK-NEXT: renamable $sgpr58 = S_MOV_B32 0
- ; CHECK-NEXT: renamable $sgpr59 = S_MOV_B32 0
- ; CHECK-NEXT: renamable $sgpr60 = S_MOV_B32 0
- ; CHECK-NEXT: renamable $sgpr61 = S_MOV_B32 0
- ; CHECK-NEXT: renamable $sgpr62 = S_MOV_B32 0
- ; CHECK-NEXT: renamable $sgpr63 = S_MOV_B32 0
+ ; CHECK-NEXT: renamable $sgpr46_sgpr47 = COPY $sgpr6_sgpr7
+ ; CHECK-NEXT: renamable $sgpr48_sgpr49 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: renamable $sgpr62_sgpr63 = S_LOAD_DWORDX2_IMM renamable $sgpr34_sgpr35, 0, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
; CHECK-NEXT: renamable $sgpr64 = S_MOV_B32 0
- ; CHECK-NEXT: renamable $sgpr68_sgpr69 = IMPLICIT_DEF
+ ; CHECK-NEXT: renamable $sgpr65 = S_MOV_B32 0
+ ; CHECK-NEXT: renamable $sgpr66 = S_MOV_B32 0
+ ; CHECK-NEXT: renamable $sgpr67 = S_MOV_B32 0
+ ; CHECK-NEXT: renamable $sgpr68 = S_MOV_B32 0
+ ; CHECK-NEXT: renamable $sgpr69 = S_MOV_B32 0
+ ; CHECK-NEXT: renamable $sgpr70 = S_MOV_B32 0
+ ; CHECK-NEXT: renamable $sgpr71 = S_MOV_B32 0
+ ; CHECK-NEXT: renamable $sgpr72 = S_MOV_B32 0
+ ; CHECK-NEXT: renamable $sgpr73 = S_MOV_B32 0
+ ; CHECK-NEXT: renamable $sgpr74 = S_MOV_B32 0
+ ; CHECK-NEXT: renamable $sgpr75 = S_MOV_B32 0
+ ; CHECK-NEXT: renamable $sgpr76 = S_MOV_B32 0
+ ; CHECK-NEXT: renamable $sgpr77 = S_MOV_B32 0
+ ; CHECK-NEXT: renamable $sgpr78 = S_MOV_B32 0
+ ; CHECK-NEXT: renamable $sgpr79 = S_MOV_B32 0
+ ; CHECK-NEXT: renamable $sgpr80 = S_MOV_B32 0
+ ; CHECK-NEXT: renamable $sgpr81 = S_MOV_B32 0
+ ; CHECK-NEXT: renamable $sgpr82 = S_MOV_B32 0
+ ; CHECK-NEXT: renamable $sgpr83 = S_MOV_B32 0
+ ; CHECK-NEXT: renamable $sgpr84 = S_MOV_B32 0
+ ; CHECK-NEXT: SI_SPILL_S1024_SAVE renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s1024) into %stack.0, align 4, addrspace 5)
+ ; CHECK-NEXT: renamable $sgpr52_sgpr53 = IMPLICIT_DEF
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
- ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL renamable $sgpr68_sgpr69, 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL renamable $sgpr52_sgpr53, 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY killed renamable $sgpr40_sgpr41
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY killed renamable $sgpr38_sgpr39
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY killed renamable $sgpr48_sgpr49
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY killed renamable $sgpr46_sgpr47
; CHECK-NEXT: $sgpr8_sgpr9 = COPY killed renamable $sgpr34_sgpr35
; CHECK-NEXT: $sgpr10_sgpr11 = COPY killed renamable $sgpr36_sgpr37
- ; CHECK-NEXT: $sgpr12 = COPY killed renamable $sgpr42
+ ; CHECK-NEXT: $sgpr12 = COPY killed renamable $sgpr50
; CHECK-NEXT: $sgpr13 = COPY killed renamable $sgpr33
- ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr68_sgpr69, 0, csr_amdgpu, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr52_sgpr53, 0, csr_amdgpu, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
; CHECK-NEXT: renamable $sgpr4_sgpr5 = COPY $exec, implicit-def $exec
; CHECK-NEXT: dead renamable $sgpr6_sgpr7 = IMPLICIT_DEF
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000)
- ; CHECK-NEXT: liveins: $sgpr4_sgpr5, $sgpr66_sgpr67:0x000000000000000F, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x000003FFFFFFFFFF
+ ; CHECK-NEXT: liveins: $sgpr4_sgpr5, $sgpr62_sgpr63:0x000000000000000F
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $sgpr6_sgpr7 = COPY $exec, implicit-def $exec
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
- ; CHECK-NEXT: liveins: $sgpr4_sgpr5, $sgpr66_sgpr67:0x000000000000000F, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x000003FFFFFFFFFF
+ ; CHECK-NEXT: liveins: $sgpr4_sgpr5, $sgpr62_sgpr63:0x000000000000000F
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_1024 = COPY renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75
- ; CHECK-NEXT: renamable $sgpr6 = S_LSHL_B32 renamable $sgpr67, 1, implicit-def dead $scc
+ ; CHECK-NEXT: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = SI_SPILL_S1024_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.0, align 4, addrspace 5)
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_1024 = COPY renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; CHECK-NEXT: renamable $sgpr6 = S_LSHL_B32 renamable $sgpr63, 1, implicit-def dead $scc
; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vreg_1024 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32 [[COPY]], 0, killed $sgpr6, 3, implicit-def $m0, implicit $m0, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.1(0x40000000)
- ; CHECK-NEXT: liveins: $sgpr4_sgpr5, $sgpr66_sgpr67:0x000000000000000F, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x000003FFFFFFFFFF
+ ; CHECK-NEXT: liveins: $sgpr4_sgpr5, $sgpr62_sgpr63:0x000000000000000F, $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95:0x0000000000000003
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $sgpr6_sgpr7 = S_OR_SAVEEXEC_B64 renamable $sgpr4_sgpr5, implicit-def $exec, implicit-def $scc, implicit $exec
- ; CHECK-NEXT: renamable $sgpr68 = COPY renamable $sgpr44
- ; CHECK-NEXT: renamable $sgpr69 = COPY renamable $sgpr44
- ; CHECK-NEXT: renamable $sgpr70 = COPY renamable $sgpr44
- ; CHECK-NEXT: renamable $sgpr71 = COPY renamable $sgpr44
- ; CHECK-NEXT: renamable $sgpr72 = COPY renamable $sgpr44
- ; CHECK-NEXT: renamable $sgpr73 = COPY renamable $sgpr44
- ; CHECK-NEXT: renamable $sgpr74 = COPY renamable $sgpr44
- ; CHECK-NEXT: renamable $sgpr75 = COPY renamable $sgpr44
- ; CHECK-NEXT: renamable $sgpr76 = COPY renamable $sgpr44
- ; CHECK-NEXT: renamable $sgpr77 = COPY renamable $sgpr44
- ; CHECK-NEXT: renamable $sgpr78 = COPY renamable $sgpr44
- ; CHECK-NEXT: renamable $sgpr79 = COPY renamable $sgpr44
- ; CHECK-NEXT: renamable $sgpr80 = COPY renamable $sgpr44
- ; CHECK-NEXT: renamable $sgpr81 = COPY renamable $sgpr44
- ; CHECK-NEXT: renamable $sgpr82 = COPY renamable $sgpr44
- ; CHECK-NEXT: renamable $sgpr83 = COPY renamable $sgpr44
- ; CHECK-NEXT: renamable $sgpr84 = COPY renamable $sgpr44
- ; CHECK-NEXT: renamable $sgpr85 = COPY renamable $sgpr44
- ; CHECK-NEXT: renamable $sgpr86 = COPY renamable $sgpr44
- ; CHECK-NEXT: renamable $sgpr87 = COPY renamable $sgpr44
- ; CHECK-NEXT: renamable $sgpr88 = COPY renamable $sgpr44
- ; CHECK-NEXT: renamable $sgpr89 = COPY renamable $sgpr44
- ; CHECK-NEXT: dead [[COPY1:%[0-9]+]]:vreg_1024 = COPY renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99, implicit $exec
+ ; CHECK-NEXT: renamable $sgpr36 = COPY renamable $sgpr64
+ ; CHECK-NEXT: renamable $sgpr37 = COPY renamable $sgpr64
+ ; CHECK-NEXT: renamable $sgpr38 = COPY renamable $sgpr64
+ ; CHECK-NEXT: renamable $sgpr39 = COPY renamable $sgpr64
+ ; CHECK-NEXT: renamable $sgpr40 = COPY renamable $sgpr64
+ ; CHECK-NEXT: renamable $sgpr41 = COPY renamable $sgpr64
+ ; CHECK-NEXT: renamable $sgpr42 = COPY renamable $sgpr64
+ ; CHECK-NEXT: renamable $sgpr43 = COPY renamable $sgpr64
+ ; CHECK-NEXT: renamable $sgpr44 = COPY renamable $sgpr64
+ ; CHECK-NEXT: renamable $sgpr45 = COPY renamable $sgpr64
+ ; CHECK-NEXT: renamable $sgpr46 = COPY renamable $sgpr64
+ ; CHECK-NEXT: renamable $sgpr47 = COPY renamable $sgpr64
+ ; CHECK-NEXT: renamable $sgpr48 = COPY renamable $sgpr64
+ ; CHECK-NEXT: renamable $sgpr49 = COPY renamable $sgpr64
+ ; CHECK-NEXT: renamable $sgpr50 = COPY renamable $sgpr64
+ ; CHECK-NEXT: renamable $sgpr51 = COPY renamable $sgpr64
+ ; CHECK-NEXT: renamable $sgpr52 = COPY renamable $sgpr64
+ ; CHECK-NEXT: renamable $sgpr53 = COPY renamable $sgpr64
+ ; CHECK-NEXT: renamable $sgpr54 = COPY renamable $sgpr64
+ ; CHECK-NEXT: renamable $sgpr55 = COPY renamable $sgpr64
+ ; CHECK-NEXT: renamable $sgpr56 = COPY renamable $sgpr64
+ ; CHECK-NEXT: renamable $sgpr57 = COPY killed renamable $sgpr64
+ ; CHECK-NEXT: dead [[COPY1:%[0-9]+]]:vreg_1024 = COPY renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67, implicit $exec
; CHECK-NEXT: $exec = S_XOR_B64_term $exec, killed renamable $sgpr6_sgpr7, implicit-def $scc
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.5, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.5(0x80000000)
- ; CHECK-NEXT: liveins: $sgpr6_sgpr7, $sgpr66_sgpr67:0x0000000000000003, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x000003FFFFFFFFFF
+ ; CHECK-NEXT: liveins: $sgpr6_sgpr7, $sgpr62_sgpr63:0x0000000000000003
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr6_sgpr7, implicit-def $scc
- ; CHECK-NEXT: dead renamable $sgpr4 = S_LSHL_B32 killed renamable $sgpr66, 1, implicit-def dead $scc
- ; CHECK-NEXT: dead [[COPY2:%[0-9]+]]:vreg_1024 = COPY renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75
+ ; CHECK-NEXT: dead renamable $sgpr4 = S_LSHL_B32 killed renamable $sgpr62, 1, implicit-def dead $scc
+ ; CHECK-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 = SI_SPILL_S1024_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.0, align 4, addrspace 5)
+ ; CHECK-NEXT: dead [[COPY2:%[0-9]+]]:vreg_1024 = COPY killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
bb.0:
@@ -211,7 +214,6 @@ body: |
%15.sub19:sgpr_1024 = COPY %7.sub0
%15.sub20:sgpr_1024 = COPY %7.sub0
%15.sub21:sgpr_1024 = COPY %7.sub0
- ; Spill code ends up getting inserted here, and we end up with many unspillable sgpr1024 ranges
%16:vreg_1024 = COPY %15, implicit $exec
$exec = S_XOR_B64_term $exec, %14, implicit-def $scc
S_CBRANCH_EXECZ %bb.5, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll b/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll
index 5dff660912e40..1a8557d25fb92 100644
--- a/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll
+++ b/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll
@@ -10,59 +10,51 @@ define void @main(i1 %arg) #0 {
; CHECK-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
; CHECK-NEXT: s_mov_b64 exec, s[4:5]
-; CHECK-NEXT: v_writelane_b32 v5, s30, 0
-; CHECK-NEXT: v_writelane_b32 v5, s31, 1
-; CHECK-NEXT: v_writelane_b32 v5, s36, 2
-; CHECK-NEXT: v_writelane_b32 v5, s37, 3
-; CHECK-NEXT: v_writelane_b32 v5, s38, 4
-; CHECK-NEXT: v_writelane_b32 v5, s39, 5
-; CHECK-NEXT: v_writelane_b32 v5, s40, 6
-; CHECK-NEXT: v_writelane_b32 v5, s41, 7
-; CHECK-NEXT: v_writelane_b32 v5, s42, 8
-; CHECK-NEXT: v_writelane_b32 v5, s43, 9
-; CHECK-NEXT: v_writelane_b32 v5, s44, 10
-; CHECK-NEXT: v_writelane_b32 v5, s45, 11
-; CHECK-NEXT: v_writelane_b32 v5, s46, 12
-; CHECK-NEXT: v_writelane_b32 v5, s47, 13
-; CHECK-NEXT: v_writelane_b32 v5, s48, 14
-; CHECK-NEXT: v_writelane_b32 v5, s49, 15
+; CHECK-NEXT: v_writelane_b32 v5, s36, 0
+; CHECK-NEXT: v_writelane_b32 v5, s37, 1
+; CHECK-NEXT: v_writelane_b32 v5, s46, 2
+; CHECK-NEXT: v_writelane_b32 v5, s47, 3
+; CHECK-NEXT: v_writelane_b32 v5, s48, 4
+; CHECK-NEXT: v_writelane_b32 v5, s49, 5
+; CHECK-NEXT: v_writelane_b32 v5, s50, 6
+; CHECK-NEXT: v_writelane_b32 v5, s51, 7
; CHECK-NEXT: s_getpc_b64 s[24:25]
-; CHECK-NEXT: v_writelane_b32 v5, s50, 16
+; CHECK-NEXT: v_writelane_b32 v5, s52, 8
; CHECK-NEXT: s_movk_i32 s4, 0xf0
; CHECK-NEXT: s_mov_b32 s5, s24
-; CHECK-NEXT: v_writelane_b32 v5, s51, 17
-; CHECK-NEXT: s_load_dwordx16 s[36:51], s[4:5], 0x0
+; CHECK-NEXT: v_writelane_b32 v5, s53, 9
+; CHECK-NEXT: s_load_dwordx16 s[44:59], s[4:5], 0x0
; CHECK-NEXT: ; implicit-def: $vgpr7 : SGPR spill to VGPR lane
; CHECK-NEXT: s_mov_b64 s[4:5], 0
-; CHECK-NEXT: s_load_dwordx4 s[28:31], s[4:5], 0x0
+; CHECK-NEXT: s_load_dwordx4 s[40:43], s[4:5], 0x0
; CHECK-NEXT: s_movk_i32 s20, 0x130
; CHECK-NEXT: s_mov_b32 s21, s24
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
-; CHECK-NEXT: v_writelane_b32 v7, s36, 0
-; CHECK-NEXT: v_writelane_b32 v7, s37, 1
-; CHECK-NEXT: v_writelane_b32 v7, s38, 2
-; CHECK-NEXT: v_writelane_b32 v7, s39, 3
-; CHECK-NEXT: v_writelane_b32 v7, s40, 4
-; CHECK-NEXT: v_writelane_b32 v7, s41, 5
-; CHECK-NEXT: v_writelane_b32 v7, s42, 6
-; CHECK-NEXT: v_writelane_b32 v7, s43, 7
-; CHECK-NEXT: v_writelane_b32 v7, s44, 8
-; CHECK-NEXT: v_writelane_b32 v7, s45, 9
-; CHECK-NEXT: v_writelane_b32 v7, s46, 10
+; CHECK-NEXT: v_writelane_b32 v7, s44, 0
+; CHECK-NEXT: v_writelane_b32 v7, s45, 1
+; CHECK-NEXT: v_writelane_b32 v7, s46, 2
+; CHECK-NEXT: v_writelane_b32 v7, s47, 3
+; CHECK-NEXT: v_writelane_b32 v7, s48, 4
+; CHECK-NEXT: v_writelane_b32 v7, s49, 5
+; CHECK-NEXT: v_writelane_b32 v7, s50, 6
+; CHECK-NEXT: v_writelane_b32 v7, s51, 7
+; CHECK-NEXT: v_writelane_b32 v7, s52, 8
+; CHECK-NEXT: v_writelane_b32 v7, s53, 9
+; CHECK-NEXT: v_writelane_b32 v7, s54, 10
; CHECK-NEXT: s_load_dwordx16 s[4:19], s[20:21], 0x0
-; CHECK-NEXT: v_writelane_b32 v7, s47, 11
-; CHECK-NEXT: v_writelane_b32 v7, s48, 12
+; CHECK-NEXT: v_writelane_b32 v7, s55, 11
+; CHECK-NEXT: v_writelane_b32 v7, s56, 12
; CHECK-NEXT: s_mov_b32 s20, 0
; CHECK-NEXT: v_mov_b32_e32 v1, 0
-; CHECK-NEXT: v_writelane_b32 v7, s49, 13
-; CHECK-NEXT: v_mov_b32_e32 v2, s28
+; CHECK-NEXT: v_writelane_b32 v7, s57, 13
+; CHECK-NEXT: v_mov_b32_e32 v2, s40
; CHECK-NEXT: v_mov_b32_e32 v3, v1
; CHECK-NEXT: s_mov_b32 s21, s20
; CHECK-NEXT: s_mov_b32 s22, s20
; CHECK-NEXT: s_mov_b32 s23, s20
-; CHECK-NEXT: v_writelane_b32 v7, s50, 14
-; CHECK-NEXT: v_writelane_b32 v7, s51, 15
-; CHECK-NEXT: image_sample_lz v3, v[2:3], s[44:51], s[20:23] dmask:0x1
+; CHECK-NEXT: v_writelane_b32 v7, s58, 14
+; CHECK-NEXT: v_writelane_b32 v7, s59, 15
+; CHECK-NEXT: image_sample_lz v3, v[2:3], s[52:59], s[20:23] dmask:0x1
; CHECK-NEXT: v_mov_b32_e32 v2, v1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: v_writelane_b32 v7, s4, 16
@@ -78,44 +70,34 @@ define void @main(i1 %arg) #0 {
; CHECK-NEXT: v_writelane_b32 v7, s13, 25
; CHECK-NEXT: v_writelane_b32 v7, s14, 26
; CHECK-NEXT: v_writelane_b32 v7, s15, 27
-; CHECK-NEXT: v_writelane_b32 v5, s52, 18
; CHECK-NEXT: v_writelane_b32 v7, s16, 28
-; CHECK-NEXT: v_writelane_b32 v5, s53, 19
; CHECK-NEXT: v_writelane_b32 v7, s17, 29
-; CHECK-NEXT: v_writelane_b32 v5, s54, 20
; CHECK-NEXT: v_writelane_b32 v7, s18, 30
; CHECK-NEXT: s_mov_b32 s26, 48
; CHECK-NEXT: s_mov_b32 s27, s24
-; CHECK-NEXT: v_writelane_b32 v5, s55, 21
; CHECK-NEXT: v_writelane_b32 v7, s19, 31
; CHECK-NEXT: s_load_dwordx8 s[4:11], s[26:27], 0x0
-; CHECK-NEXT: v_writelane_b32 v5, s56, 22
-; CHECK-NEXT: v_writelane_b32 v5, s57, 23
-; CHECK-NEXT: v_writelane_b32 v5, s58, 24
-; CHECK-NEXT: v_writelane_b32 v5, s59, 25
-; CHECK-NEXT: v_writelane_b32 v5, s60, 26
+; CHECK-NEXT: v_writelane_b32 v5, s62, 10
+; CHECK-NEXT: v_writelane_b32 v5, s63, 11
+; CHECK-NEXT: v_writelane_b32 v5, s64, 12
+; CHECK-NEXT: v_writelane_b32 v5, s65, 13
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: v_writelane_b32 v7, s4, 32
-; CHECK-NEXT: v_writelane_b32 v5, s61, 27
; CHECK-NEXT: v_writelane_b32 v7, s5, 33
-; CHECK-NEXT: v_writelane_b32 v5, s62, 28
; CHECK-NEXT: v_writelane_b32 v7, s6, 34
-; CHECK-NEXT: v_writelane_b32 v5, s63, 29
; CHECK-NEXT: v_writelane_b32 v7, s7, 35
-; CHECK-NEXT: v_writelane_b32 v5, s64, 30
; CHECK-NEXT: v_writelane_b32 v7, s8, 36
-; CHECK-NEXT: v_writelane_b32 v5, s65, 31
; CHECK-NEXT: v_writelane_b32 v7, s9, 37
-; CHECK-NEXT: v_writelane_b32 v5, s66, 32
+; CHECK-NEXT: v_writelane_b32 v5, s66, 14
; CHECK-NEXT: s_movk_i32 s28, 0x1f0
-; CHECK-NEXT: s_movk_i32 s30, 0x2f0
+; CHECK-NEXT: s_movk_i32 s70, 0x2f0
; CHECK-NEXT: s_mov_b32 s29, s24
-; CHECK-NEXT: s_mov_b32 s31, s24
+; CHECK-NEXT: s_mov_b32 s71, s24
; CHECK-NEXT: v_writelane_b32 v7, s10, 38
-; CHECK-NEXT: v_writelane_b32 v5, s67, 33
+; CHECK-NEXT: v_writelane_b32 v5, s67, 15
; CHECK-NEXT: v_writelane_b32 v7, s11, 39
; CHECK-NEXT: s_load_dwordx16 s[52:67], s[28:29], 0x0
-; CHECK-NEXT: s_load_dwordx16 s[4:19], s[30:31], 0x0
+; CHECK-NEXT: s_load_dwordx16 s[4:19], s[70:71], 0x0
; CHECK-NEXT: v_and_b32_e32 v0, 1, v0
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
; CHECK-NEXT: s_xor_b64 s[24:25], vcc, -1
@@ -334,40 +316,22 @@ define void @main(i1 %arg) #0 {
; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
; CHECK-NEXT: .LBB0_10: ; %UnifiedReturnBlock
; CHECK-NEXT: s_or_b64 exec, exec, s[20:21]
-; CHECK-NEXT: v_readlane_b32 s67, v5, 33
-; CHECK-NEXT: v_readlane_b32 s66, v5, 32
-; CHECK-NEXT: v_readlane_b32 s65, v5, 31
-; CHECK-NEXT: v_readlane_b32 s64, v5, 30
-; CHECK-NEXT: v_readlane_b32 s63, v5, 29
-; CHECK-NEXT: v_readlane_b32 s62, v5, 28
-; CHECK-NEXT: v_readlane_b32 s61, v5, 27
-; CHECK-NEXT: v_readlane_b32 s60, v5, 26
-; CHECK-NEXT: v_readlane_b32 s59, v5, 25
-; CHECK-NEXT: v_readlane_b32 s58, v5, 24
-; CHECK-NEXT: v_readlane_b32 s57, v5, 23
-; CHECK-NEXT: v_readlane_b32 s56, v5, 22
-; CHECK-NEXT: v_readlane_b32 s55, v5, 21
-; CHECK-NEXT: v_readlane_b32 s54, v5, 20
-; CHECK-NEXT: v_readlane_b32 s53, v5, 19
-; CHECK-NEXT: v_readlane_b32 s52, v5, 18
-; CHECK-NEXT: v_readlane_b32 s51, v5, 17
-; CHECK-NEXT: v_readlane_b32 s50, v5, 16
-; CHECK-NEXT: v_readlane_b32 s49, v5, 15
-; CHECK-NEXT: v_readlane_b32 s48, v5, 14
-; CHECK-NEXT: v_readlane_b32 s47, v5, 13
-; CHECK-NEXT: v_readlane_b32 s46, v5, 12
-; CHECK-NEXT: v_readlane_b32 s45, v5, 11
-; CHECK-NEXT: v_readlane_b32 s44, v5, 10
-; CHECK-NEXT: v_readlane_b32 s43, v5, 9
-; CHECK-NEXT: v_readlane_b32 s42, v5, 8
-; CHECK-NEXT: v_readlane_b32 s41, v5, 7
-; CHECK-NEXT: v_readlane_b32 s40, v5, 6
-; CHECK-NEXT: v_readlane_b32 s39, v5, 5
-; CHECK-NEXT: v_readlane_b32 s38, v5, 4
-; CHECK-NEXT: v_readlane_b32 s37, v5, 3
-; CHECK-NEXT: v_readlane_b32 s36, v5, 2
-; CHECK-NEXT: v_readlane_b32 s31, v5, 1
-; CHECK-NEXT: v_readlane_b32 s30, v5, 0
+; CHECK-NEXT: v_readlane_b32 s67, v5, 15
+; CHECK-NEXT: v_readlane_b32 s66, v5, 14
+; CHECK-NEXT: v_readlane_b32 s65, v5, 13
+; CHECK-NEXT: v_readlane_b32 s64, v5, 12
+; CHECK-NEXT: v_readlane_b32 s63, v5, 11
+; CHECK-NEXT: v_readlane_b32 s62, v5, 10
+; CHECK-NEXT: v_readlane_b32 s53, v5, 9
+; CHECK-NEXT: v_readlane_b32 s52, v5, 8
+; CHECK-NEXT: v_readlane_b32 s51, v5, 7
+; CHECK-NEXT: v_readlane_b32 s50, v5, 6
+; CHECK-NEXT: v_readlane_b32 s49, v5, 5
+; CHECK-NEXT: v_readlane_b32 s48, v5, 4
+; CHECK-NEXT: v_readlane_b32 s47, v5, 3
+; CHECK-NEXT: v_readlane_b32 s46, v5, 2
+; CHECK-NEXT: v_readlane_b32 s37, v5, 1
+; CHECK-NEXT: v_readlane_b32 s36, v5, 0
; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1
; CHECK-NEXT: buffer_load_dword v5, off, s[0:3], s32 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-call.ll b/llvm/test/CodeGen/AMDGPU/indirect-call.ll
index 55da485b91f67..8487e195de8e2 100644
--- a/llvm/test/CodeGen/AMDGPU/indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-call.ll
@@ -134,59 +134,59 @@ define void @test_indirect_call_vgpr_ptr(ptr %fptr) {
; GCN-NEXT: v_writelane_b32 v40, s35, 3
; GCN-NEXT: v_writelane_b32 v40, s36, 4
; GCN-NEXT: v_writelane_b32 v40, s37, 5
-; GCN-NEXT: v_writelane_b32 v40, s38, 6
-; GCN-NEXT: v_writelane_b32 v40, s39, 7
-; GCN-NEXT: v_writelane_b32 v40, s40, 8
-; GCN-NEXT: v_writelane_b32 v40, s41, 9
-; GCN-NEXT: v_writelane_b32 v40, s42, 10
-; GCN-NEXT: v_writelane_b32 v40, s43, 11
-; GCN-NEXT: v_writelane_b32 v40, s44, 12
-; GCN-NEXT: v_writelane_b32 v40, s45, 13
-; GCN-NEXT: v_writelane_b32 v40, s46, 14
-; GCN-NEXT: v_writelane_b32 v40, s47, 15
-; GCN-NEXT: v_writelane_b32 v40, s48, 16
-; GCN-NEXT: v_writelane_b32 v40, s49, 17
-; GCN-NEXT: s_mov_b32 s42, s15
-; GCN-NEXT: s_mov_b32 s43, s14
-; GCN-NEXT: s_mov_b32 s44, s13
-; GCN-NEXT: s_mov_b32 s45, s12
+; GCN-NEXT: v_writelane_b32 v40, s46, 6
+; GCN-NEXT: v_writelane_b32 v40, s47, 7
+; GCN-NEXT: v_writelane_b32 v40, s48, 8
+; GCN-NEXT: v_writelane_b32 v40, s49, 9
+; GCN-NEXT: v_writelane_b32 v40, s50, 10
+; GCN-NEXT: v_writelane_b32 v40, s51, 11
+; GCN-NEXT: v_writelane_b32 v40, s52, 12
+; GCN-NEXT: v_writelane_b32 v40, s53, 13
+; GCN-NEXT: v_writelane_b32 v40, s62, 14
+; GCN-NEXT: v_writelane_b32 v40, s63, 15
+; GCN-NEXT: v_writelane_b32 v40, s64, 16
+; GCN-NEXT: v_writelane_b32 v40, s65, 17
+; GCN-NEXT: s_mov_b32 s50, s15
+; GCN-NEXT: s_mov_b32 s51, s14
+; GCN-NEXT: s_mov_b32 s52, s13
+; GCN-NEXT: s_mov_b32 s53, s12
; GCN-NEXT: s_mov_b64 s[34:35], s[10:11]
; GCN-NEXT: s_mov_b64 s[36:37], s[8:9]
-; GCN-NEXT: s_mov_b64 s[38:39], s[6:7]
-; GCN-NEXT: s_mov_b64 s[40:41], s[4:5]
-; GCN-NEXT: s_mov_b64 s[46:47], exec
+; GCN-NEXT: s_mov_b64 s[46:47], s[6:7]
+; GCN-NEXT: s_mov_b64 s[48:49], s[4:5]
+; GCN-NEXT: s_mov_b64 s[62:63], exec
; GCN-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1
; GCN-NEXT: v_readfirstlane_b32 s16, v0
; GCN-NEXT: v_readfirstlane_b32 s17, v1
; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
-; GCN-NEXT: s_and_saveexec_b64 s[48:49], vcc
-; GCN-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GCN-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GCN-NEXT: s_and_saveexec_b64 s[64:65], vcc
+; GCN-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GCN-NEXT: s_mov_b64 s[6:7], s[46:47]
; GCN-NEXT: s_mov_b64 s[8:9], s[36:37]
; GCN-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GCN-NEXT: s_mov_b32 s12, s45
-; GCN-NEXT: s_mov_b32 s13, s44
-; GCN-NEXT: s_mov_b32 s14, s43
-; GCN-NEXT: s_mov_b32 s15, s42
+; GCN-NEXT: s_mov_b32 s12, s53
+; GCN-NEXT: s_mov_b32 s13, s52
+; GCN-NEXT: s_mov_b32 s14, s51
+; GCN-NEXT: s_mov_b32 s15, s50
; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1
; GCN-NEXT: ; implicit-def: $vgpr31
-; GCN-NEXT: s_xor_b64 exec, exec, s[48:49]
+; GCN-NEXT: s_xor_b64 exec, exec, s[64:65]
; GCN-NEXT: s_cbranch_execnz .LBB2_1
; GCN-NEXT: ; %bb.2:
-; GCN-NEXT: s_mov_b64 exec, s[46:47]
-; GCN-NEXT: v_readlane_b32 s49, v40, 17
-; GCN-NEXT: v_readlane_b32 s48, v40, 16
-; GCN-NEXT: v_readlane_b32 s47, v40, 15
-; GCN-NEXT: v_readlane_b32 s46, v40, 14
-; GCN-NEXT: v_readlane_b32 s45, v40, 13
-; GCN-NEXT: v_readlane_b32 s44, v40, 12
-; GCN-NEXT: v_readlane_b32 s43, v40, 11
-; GCN-NEXT: v_readlane_b32 s42, v40, 10
-; GCN-NEXT: v_readlane_b32 s41, v40, 9
-; GCN-NEXT: v_readlane_b32 s40, v40, 8
-; GCN-NEXT: v_readlane_b32 s39, v40, 7
-; GCN-NEXT: v_readlane_b32 s38, v40, 6
+; GCN-NEXT: s_mov_b64 exec, s[62:63]
+; GCN-NEXT: v_readlane_b32 s65, v40, 17
+; GCN-NEXT: v_readlane_b32 s64, v40, 16
+; GCN-NEXT: v_readlane_b32 s63, v40, 15
+; GCN-NEXT: v_readlane_b32 s62, v40, 14
+; GCN-NEXT: v_readlane_b32 s53, v40, 13
+; GCN-NEXT: v_readlane_b32 s52, v40, 12
+; GCN-NEXT: v_readlane_b32 s51, v40, 11
+; GCN-NEXT: v_readlane_b32 s50, v40, 10
+; GCN-NEXT: v_readlane_b32 s49, v40, 9
+; GCN-NEXT: v_readlane_b32 s48, v40, 8
+; GCN-NEXT: v_readlane_b32 s47, v40, 7
+; GCN-NEXT: v_readlane_b32 s46, v40, 6
; GCN-NEXT: v_readlane_b32 s37, v40, 5
; GCN-NEXT: v_readlane_b32 s36, v40, 4
; GCN-NEXT: v_readlane_b32 s35, v40, 3
@@ -218,59 +218,59 @@ define void @test_indirect_call_vgpr_ptr(ptr %fptr) {
; GISEL-NEXT: v_writelane_b32 v40, s35, 3
; GISEL-NEXT: v_writelane_b32 v40, s36, 4
; GISEL-NEXT: v_writelane_b32 v40, s37, 5
-; GISEL-NEXT: v_writelane_b32 v40, s38, 6
-; GISEL-NEXT: v_writelane_b32 v40, s39, 7
-; GISEL-NEXT: v_writelane_b32 v40, s40, 8
-; GISEL-NEXT: v_writelane_b32 v40, s41, 9
-; GISEL-NEXT: v_writelane_b32 v40, s42, 10
-; GISEL-NEXT: v_writelane_b32 v40, s43, 11
-; GISEL-NEXT: v_writelane_b32 v40, s44, 12
-; GISEL-NEXT: v_writelane_b32 v40, s45, 13
-; GISEL-NEXT: v_writelane_b32 v40, s46, 14
-; GISEL-NEXT: v_writelane_b32 v40, s47, 15
-; GISEL-NEXT: v_writelane_b32 v40, s48, 16
-; GISEL-NEXT: v_writelane_b32 v40, s49, 17
-; GISEL-NEXT: s_mov_b32 s42, s15
-; GISEL-NEXT: s_mov_b32 s43, s14
-; GISEL-NEXT: s_mov_b32 s44, s13
-; GISEL-NEXT: s_mov_b32 s45, s12
+; GISEL-NEXT: v_writelane_b32 v40, s46, 6
+; GISEL-NEXT: v_writelane_b32 v40, s47, 7
+; GISEL-NEXT: v_writelane_b32 v40, s48, 8
+; GISEL-NEXT: v_writelane_b32 v40, s49, 9
+; GISEL-NEXT: v_writelane_b32 v40, s50, 10
+; GISEL-NEXT: v_writelane_b32 v40, s51, 11
+; GISEL-NEXT: v_writelane_b32 v40, s52, 12
+; GISEL-NEXT: v_writelane_b32 v40, s53, 13
+; GISEL-NEXT: v_writelane_b32 v40, s62, 14
+; GISEL-NEXT: v_writelane_b32 v40, s63, 15
+; GISEL-NEXT: v_writelane_b32 v40, s64, 16
+; GISEL-NEXT: v_writelane_b32 v40, s65, 17
+; GISEL-NEXT: s_mov_b32 s50, s15
+; GISEL-NEXT: s_mov_b32 s51, s14
+; GISEL-NEXT: s_mov_b32 s52, s13
+; GISEL-NEXT: s_mov_b32 s53, s12
; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11]
; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9]
-; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7]
-; GISEL-NEXT: s_mov_b64 s[40:41], s[4:5]
-; GISEL-NEXT: s_mov_b64 s[46:47], exec
+; GISEL-NEXT: s_mov_b64 s[46:47], s[6:7]
+; GISEL-NEXT: s_mov_b64 s[48:49], s[4:5]
+; GISEL-NEXT: s_mov_b64 s[62:63], exec
; GISEL-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1
; GISEL-NEXT: v_readfirstlane_b32 s16, v0
; GISEL-NEXT: v_readfirstlane_b32 s17, v1
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
-; GISEL-NEXT: s_and_saveexec_b64 s[48:49], vcc
-; GISEL-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GISEL-NEXT: s_and_saveexec_b64 s[64:65], vcc
+; GISEL-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GISEL-NEXT: s_mov_b64 s[6:7], s[46:47]
; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37]
; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GISEL-NEXT: s_mov_b32 s12, s45
-; GISEL-NEXT: s_mov_b32 s13, s44
-; GISEL-NEXT: s_mov_b32 s14, s43
-; GISEL-NEXT: s_mov_b32 s15, s42
+; GISEL-NEXT: s_mov_b32 s12, s53
+; GISEL-NEXT: s_mov_b32 s13, s52
+; GISEL-NEXT: s_mov_b32 s14, s51
+; GISEL-NEXT: s_mov_b32 s15, s50
; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GISEL-NEXT: ; implicit-def: $vgpr0
; GISEL-NEXT: ; implicit-def: $vgpr31
-; GISEL-NEXT: s_xor_b64 exec, exec, s[48:49]
+; GISEL-NEXT: s_xor_b64 exec, exec, s[64:65]
; GISEL-NEXT: s_cbranch_execnz .LBB2_1
; GISEL-NEXT: ; %bb.2:
-; GISEL-NEXT: s_mov_b64 exec, s[46:47]
-; GISEL-NEXT: v_readlane_b32 s49, v40, 17
-; GISEL-NEXT: v_readlane_b32 s48, v40, 16
-; GISEL-NEXT: v_readlane_b32 s47, v40, 15
-; GISEL-NEXT: v_readlane_b32 s46, v40, 14
-; GISEL-NEXT: v_readlane_b32 s45, v40, 13
-; GISEL-NEXT: v_readlane_b32 s44, v40, 12
-; GISEL-NEXT: v_readlane_b32 s43, v40, 11
-; GISEL-NEXT: v_readlane_b32 s42, v40, 10
-; GISEL-NEXT: v_readlane_b32 s41, v40, 9
-; GISEL-NEXT: v_readlane_b32 s40, v40, 8
-; GISEL-NEXT: v_readlane_b32 s39, v40, 7
-; GISEL-NEXT: v_readlane_b32 s38, v40, 6
+; GISEL-NEXT: s_mov_b64 exec, s[62:63]
+; GISEL-NEXT: v_readlane_b32 s65, v40, 17
+; GISEL-NEXT: v_readlane_b32 s64, v40, 16
+; GISEL-NEXT: v_readlane_b32 s63, v40, 15
+; GISEL-NEXT: v_readlane_b32 s62, v40, 14
+; GISEL-NEXT: v_readlane_b32 s53, v40, 13
+; GISEL-NEXT: v_readlane_b32 s52, v40, 12
+; GISEL-NEXT: v_readlane_b32 s51, v40, 11
+; GISEL-NEXT: v_readlane_b32 s50, v40, 10
+; GISEL-NEXT: v_readlane_b32 s49, v40, 9
+; GISEL-NEXT: v_readlane_b32 s48, v40, 8
+; GISEL-NEXT: v_readlane_b32 s47, v40, 7
+; GISEL-NEXT: v_readlane_b32 s46, v40, 6
; GISEL-NEXT: v_readlane_b32 s37, v40, 5
; GISEL-NEXT: v_readlane_b32 s36, v40, 4
; GISEL-NEXT: v_readlane_b32 s35, v40, 3
@@ -306,62 +306,62 @@ define void @test_indirect_call_vgpr_ptr_arg(ptr %fptr) {
; GCN-NEXT: v_writelane_b32 v40, s35, 3
; GCN-NEXT: v_writelane_b32 v40, s36, 4
; GCN-NEXT: v_writelane_b32 v40, s37, 5
-; GCN-NEXT: v_writelane_b32 v40, s38, 6
-; GCN-NEXT: v_writelane_b32 v40, s39, 7
-; GCN-NEXT: v_writelane_b32 v40, s40, 8
-; GCN-NEXT: v_writelane_b32 v40, s41, 9
-; GCN-NEXT: v_writelane_b32 v40, s42, 10
-; GCN-NEXT: v_writelane_b32 v40, s43, 11
-; GCN-NEXT: v_writelane_b32 v40, s44, 12
-; GCN-NEXT: v_writelane_b32 v40, s45, 13
-; GCN-NEXT: v_writelane_b32 v40, s46, 14
-; GCN-NEXT: v_writelane_b32 v40, s47, 15
-; GCN-NEXT: v_writelane_b32 v40, s48, 16
-; GCN-NEXT: v_writelane_b32 v40, s49, 17
-; GCN-NEXT: s_mov_b32 s42, s15
-; GCN-NEXT: s_mov_b32 s43, s14
-; GCN-NEXT: s_mov_b32 s44, s13
-; GCN-NEXT: s_mov_b32 s45, s12
+; GCN-NEXT: v_writelane_b32 v40, s46, 6
+; GCN-NEXT: v_writelane_b32 v40, s47, 7
+; GCN-NEXT: v_writelane_b32 v40, s48, 8
+; GCN-NEXT: v_writelane_b32 v40, s49, 9
+; GCN-NEXT: v_writelane_b32 v40, s50, 10
+; GCN-NEXT: v_writelane_b32 v40, s51, 11
+; GCN-NEXT: v_writelane_b32 v40, s52, 12
+; GCN-NEXT: v_writelane_b32 v40, s53, 13
+; GCN-NEXT: v_writelane_b32 v40, s62, 14
+; GCN-NEXT: v_writelane_b32 v40, s63, 15
+; GCN-NEXT: v_writelane_b32 v40, s64, 16
+; GCN-NEXT: v_writelane_b32 v40, s65, 17
+; GCN-NEXT: s_mov_b32 s50, s15
+; GCN-NEXT: s_mov_b32 s51, s14
+; GCN-NEXT: s_mov_b32 s52, s13
+; GCN-NEXT: s_mov_b32 s53, s12
; GCN-NEXT: s_mov_b64 s[34:35], s[10:11]
; GCN-NEXT: s_mov_b64 s[36:37], s[8:9]
-; GCN-NEXT: s_mov_b64 s[38:39], s[6:7]
-; GCN-NEXT: s_mov_b64 s[40:41], s[4:5]
-; GCN-NEXT: s_mov_b64 s[46:47], exec
+; GCN-NEXT: s_mov_b64 s[46:47], s[6:7]
+; GCN-NEXT: s_mov_b64 s[48:49], s[4:5]
+; GCN-NEXT: s_mov_b64 s[62:63], exec
; GCN-NEXT: v_mov_b32_e32 v2, 0x7b
; GCN-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
; GCN-NEXT: v_readfirstlane_b32 s16, v0
; GCN-NEXT: v_readfirstlane_b32 s17, v1
; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
-; GCN-NEXT: s_and_saveexec_b64 s[48:49], vcc
-; GCN-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GCN-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GCN-NEXT: s_and_saveexec_b64 s[64:65], vcc
+; GCN-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GCN-NEXT: s_mov_b64 s[6:7], s[46:47]
; GCN-NEXT: s_mov_b64 s[8:9], s[36:37]
; GCN-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GCN-NEXT: s_mov_b32 s12, s45
-; GCN-NEXT: s_mov_b32 s13, s44
-; GCN-NEXT: s_mov_b32 s14, s43
-; GCN-NEXT: s_mov_b32 s15, s42
+; GCN-NEXT: s_mov_b32 s12, s53
+; GCN-NEXT: s_mov_b32 s13, s52
+; GCN-NEXT: s_mov_b32 s14, s51
+; GCN-NEXT: s_mov_b32 s15, s50
; GCN-NEXT: v_mov_b32_e32 v0, v2
; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1
; GCN-NEXT: ; implicit-def: $vgpr31
; GCN-NEXT: ; implicit-def: $vgpr2
-; GCN-NEXT: s_xor_b64 exec, exec, s[48:49]
+; GCN-NEXT: s_xor_b64 exec, exec, s[64:65]
; GCN-NEXT: s_cbranch_execnz .LBB3_1
; GCN-NEXT: ; %bb.2:
-; GCN-NEXT: s_mov_b64 exec, s[46:47]
-; GCN-NEXT: v_readlane_b32 s49, v40, 17
-; GCN-NEXT: v_readlane_b32 s48, v40, 16
-; GCN-NEXT: v_readlane_b32 s47, v40, 15
-; GCN-NEXT: v_readlane_b32 s46, v40, 14
-; GCN-NEXT: v_readlane_b32 s45, v40, 13
-; GCN-NEXT: v_readlane_b32 s44, v40, 12
-; GCN-NEXT: v_readlane_b32 s43, v40, 11
-; GCN-NEXT: v_readlane_b32 s42, v40, 10
-; GCN-NEXT: v_readlane_b32 s41, v40, 9
-; GCN-NEXT: v_readlane_b32 s40, v40, 8
-; GCN-NEXT: v_readlane_b32 s39, v40, 7
-; GCN-NEXT: v_readlane_b32 s38, v40, 6
+; GCN-NEXT: s_mov_b64 exec, s[62:63]
+; GCN-NEXT: v_readlane_b32 s65, v40, 17
+; GCN-NEXT: v_readlane_b32 s64, v40, 16
+; GCN-NEXT: v_readlane_b32 s63, v40, 15
+; GCN-NEXT: v_readlane_b32 s62, v40, 14
+; GCN-NEXT: v_readlane_b32 s53, v40, 13
+; GCN-NEXT: v_readlane_b32 s52, v40, 12
+; GCN-NEXT: v_readlane_b32 s51, v40, 11
+; GCN-NEXT: v_readlane_b32 s50, v40, 10
+; GCN-NEXT: v_readlane_b32 s49, v40, 9
+; GCN-NEXT: v_readlane_b32 s48, v40, 8
+; GCN-NEXT: v_readlane_b32 s47, v40, 7
+; GCN-NEXT: v_readlane_b32 s46, v40, 6
; GCN-NEXT: v_readlane_b32 s37, v40, 5
; GCN-NEXT: v_readlane_b32 s36, v40, 4
; GCN-NEXT: v_readlane_b32 s35, v40, 3
@@ -393,60 +393,60 @@ define void @test_indirect_call_vgpr_ptr_arg(ptr %fptr) {
; GISEL-NEXT: v_writelane_b32 v40, s35, 3
; GISEL-NEXT: v_writelane_b32 v40, s36, 4
; GISEL-NEXT: v_writelane_b32 v40, s37, 5
-; GISEL-NEXT: v_writelane_b32 v40, s38, 6
-; GISEL-NEXT: v_writelane_b32 v40, s39, 7
-; GISEL-NEXT: v_writelane_b32 v40, s40, 8
-; GISEL-NEXT: v_writelane_b32 v40, s41, 9
-; GISEL-NEXT: v_writelane_b32 v40, s42, 10
-; GISEL-NEXT: v_writelane_b32 v40, s43, 11
-; GISEL-NEXT: v_writelane_b32 v40, s44, 12
-; GISEL-NEXT: v_writelane_b32 v40, s45, 13
-; GISEL-NEXT: v_writelane_b32 v40, s46, 14
-; GISEL-NEXT: v_writelane_b32 v40, s47, 15
-; GISEL-NEXT: v_writelane_b32 v40, s48, 16
-; GISEL-NEXT: v_writelane_b32 v40, s49, 17
-; GISEL-NEXT: s_mov_b32 s42, s15
-; GISEL-NEXT: s_mov_b32 s43, s14
-; GISEL-NEXT: s_mov_b32 s44, s13
-; GISEL-NEXT: s_mov_b32 s45, s12
+; GISEL-NEXT: v_writelane_b32 v40, s46, 6
+; GISEL-NEXT: v_writelane_b32 v40, s47, 7
+; GISEL-NEXT: v_writelane_b32 v40, s48, 8
+; GISEL-NEXT: v_writelane_b32 v40, s49, 9
+; GISEL-NEXT: v_writelane_b32 v40, s50, 10
+; GISEL-NEXT: v_writelane_b32 v40, s51, 11
+; GISEL-NEXT: v_writelane_b32 v40, s52, 12
+; GISEL-NEXT: v_writelane_b32 v40, s53, 13
+; GISEL-NEXT: v_writelane_b32 v40, s62, 14
+; GISEL-NEXT: v_writelane_b32 v40, s63, 15
+; GISEL-NEXT: v_writelane_b32 v40, s64, 16
+; GISEL-NEXT: v_writelane_b32 v40, s65, 17
+; GISEL-NEXT: s_mov_b32 s50, s15
+; GISEL-NEXT: s_mov_b32 s51, s14
+; GISEL-NEXT: s_mov_b32 s52, s13
+; GISEL-NEXT: s_mov_b32 s53, s12
; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11]
; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9]
-; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7]
-; GISEL-NEXT: s_mov_b64 s[40:41], s[4:5]
-; GISEL-NEXT: s_mov_b64 s[46:47], exec
+; GISEL-NEXT: s_mov_b64 s[46:47], s[6:7]
+; GISEL-NEXT: s_mov_b64 s[48:49], s[4:5]
+; GISEL-NEXT: s_mov_b64 s[62:63], exec
; GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
; GISEL-NEXT: v_readfirstlane_b32 s16, v0
; GISEL-NEXT: v_readfirstlane_b32 s17, v1
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
-; GISEL-NEXT: s_and_saveexec_b64 s[48:49], vcc
+; GISEL-NEXT: s_and_saveexec_b64 s[64:65], vcc
; GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GISEL-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GISEL-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GISEL-NEXT: s_mov_b64 s[6:7], s[46:47]
; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37]
; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GISEL-NEXT: s_mov_b32 s12, s45
-; GISEL-NEXT: s_mov_b32 s13, s44
-; GISEL-NEXT: s_mov_b32 s14, s43
-; GISEL-NEXT: s_mov_b32 s15, s42
+; GISEL-NEXT: s_mov_b32 s12, s53
+; GISEL-NEXT: s_mov_b32 s13, s52
+; GISEL-NEXT: s_mov_b32 s14, s51
+; GISEL-NEXT: s_mov_b32 s15, s50
; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GISEL-NEXT: ; implicit-def: $vgpr0
; GISEL-NEXT: ; implicit-def: $vgpr31
-; GISEL-NEXT: s_xor_b64 exec, exec, s[48:49]
+; GISEL-NEXT: s_xor_b64 exec, exec, s[64:65]
; GISEL-NEXT: s_cbranch_execnz .LBB3_1
; GISEL-NEXT: ; %bb.2:
-; GISEL-NEXT: s_mov_b64 exec, s[46:47]
-; GISEL-NEXT: v_readlane_b32 s49, v40, 17
-; GISEL-NEXT: v_readlane_b32 s48, v40, 16
-; GISEL-NEXT: v_readlane_b32 s47, v40, 15
-; GISEL-NEXT: v_readlane_b32 s46, v40, 14
-; GISEL-NEXT: v_readlane_b32 s45, v40, 13
-; GISEL-NEXT: v_readlane_b32 s44, v40, 12
-; GISEL-NEXT: v_readlane_b32 s43, v40, 11
-; GISEL-NEXT: v_readlane_b32 s42, v40, 10
-; GISEL-NEXT: v_readlane_b32 s41, v40, 9
-; GISEL-NEXT: v_readlane_b32 s40, v40, 8
-; GISEL-NEXT: v_readlane_b32 s39, v40, 7
-; GISEL-NEXT: v_readlane_b32 s38, v40, 6
+; GISEL-NEXT: s_mov_b64 exec, s[62:63]
+; GISEL-NEXT: v_readlane_b32 s65, v40, 17
+; GISEL-NEXT: v_readlane_b32 s64, v40, 16
+; GISEL-NEXT: v_readlane_b32 s63, v40, 15
+; GISEL-NEXT: v_readlane_b32 s62, v40, 14
+; GISEL-NEXT: v_readlane_b32 s53, v40, 13
+; GISEL-NEXT: v_readlane_b32 s52, v40, 12
+; GISEL-NEXT: v_readlane_b32 s51, v40, 11
+; GISEL-NEXT: v_readlane_b32 s50, v40, 10
+; GISEL-NEXT: v_readlane_b32 s49, v40, 9
+; GISEL-NEXT: v_readlane_b32 s48, v40, 8
+; GISEL-NEXT: v_readlane_b32 s47, v40, 7
+; GISEL-NEXT: v_readlane_b32 s46, v40, 6
; GISEL-NEXT: v_readlane_b32 s37, v40, 5
; GISEL-NEXT: v_readlane_b32 s36, v40, 4
; GISEL-NEXT: v_readlane_b32 s35, v40, 3
@@ -482,61 +482,61 @@ define i32 @test_indirect_call_vgpr_ptr_ret(ptr %fptr) {
; GCN-NEXT: v_writelane_b32 v40, s35, 3
; GCN-NEXT: v_writelane_b32 v40, s36, 4
; GCN-NEXT: v_writelane_b32 v40, s37, 5
-; GCN-NEXT: v_writelane_b32 v40, s38, 6
-; GCN-NEXT: v_writelane_b32 v40, s39, 7
-; GCN-NEXT: v_writelane_b32 v40, s40, 8
-; GCN-NEXT: v_writelane_b32 v40, s41, 9
-; GCN-NEXT: v_writelane_b32 v40, s42, 10
-; GCN-NEXT: v_writelane_b32 v40, s43, 11
-; GCN-NEXT: v_writelane_b32 v40, s44, 12
-; GCN-NEXT: v_writelane_b32 v40, s45, 13
-; GCN-NEXT: v_writelane_b32 v40, s46, 14
-; GCN-NEXT: v_writelane_b32 v40, s47, 15
-; GCN-NEXT: v_writelane_b32 v40, s48, 16
-; GCN-NEXT: v_writelane_b32 v40, s49, 17
-; GCN-NEXT: s_mov_b32 s42, s15
-; GCN-NEXT: s_mov_b32 s43, s14
-; GCN-NEXT: s_mov_b32 s44, s13
-; GCN-NEXT: s_mov_b32 s45, s12
+; GCN-NEXT: v_writelane_b32 v40, s46, 6
+; GCN-NEXT: v_writelane_b32 v40, s47, 7
+; GCN-NEXT: v_writelane_b32 v40, s48, 8
+; GCN-NEXT: v_writelane_b32 v40, s49, 9
+; GCN-NEXT: v_writelane_b32 v40, s50, 10
+; GCN-NEXT: v_writelane_b32 v40, s51, 11
+; GCN-NEXT: v_writelane_b32 v40, s52, 12
+; GCN-NEXT: v_writelane_b32 v40, s53, 13
+; GCN-NEXT: v_writelane_b32 v40, s62, 14
+; GCN-NEXT: v_writelane_b32 v40, s63, 15
+; GCN-NEXT: v_writelane_b32 v40, s64, 16
+; GCN-NEXT: v_writelane_b32 v40, s65, 17
+; GCN-NEXT: s_mov_b32 s50, s15
+; GCN-NEXT: s_mov_b32 s51, s14
+; GCN-NEXT: s_mov_b32 s52, s13
+; GCN-NEXT: s_mov_b32 s53, s12
; GCN-NEXT: s_mov_b64 s[34:35], s[10:11]
; GCN-NEXT: s_mov_b64 s[36:37], s[8:9]
-; GCN-NEXT: s_mov_b64 s[38:39], s[6:7]
-; GCN-NEXT: s_mov_b64 s[40:41], s[4:5]
-; GCN-NEXT: s_mov_b64 s[46:47], exec
+; GCN-NEXT: s_mov_b64 s[46:47], s[6:7]
+; GCN-NEXT: s_mov_b64 s[48:49], s[4:5]
+; GCN-NEXT: s_mov_b64 s[62:63], exec
; GCN-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GCN-NEXT: v_readfirstlane_b32 s16, v0
; GCN-NEXT: v_readfirstlane_b32 s17, v1
; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
-; GCN-NEXT: s_and_saveexec_b64 s[48:49], vcc
-; GCN-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GCN-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GCN-NEXT: s_and_saveexec_b64 s[64:65], vcc
+; GCN-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GCN-NEXT: s_mov_b64 s[6:7], s[46:47]
; GCN-NEXT: s_mov_b64 s[8:9], s[36:37]
; GCN-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GCN-NEXT: s_mov_b32 s12, s45
-; GCN-NEXT: s_mov_b32 s13, s44
-; GCN-NEXT: s_mov_b32 s14, s43
-; GCN-NEXT: s_mov_b32 s15, s42
+; GCN-NEXT: s_mov_b32 s12, s53
+; GCN-NEXT: s_mov_b32 s13, s52
+; GCN-NEXT: s_mov_b32 s14, s51
+; GCN-NEXT: s_mov_b32 s15, s50
; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GCN-NEXT: v_mov_b32_e32 v2, v0
; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1
; GCN-NEXT: ; implicit-def: $vgpr31
-; GCN-NEXT: s_xor_b64 exec, exec, s[48:49]
+; GCN-NEXT: s_xor_b64 exec, exec, s[64:65]
; GCN-NEXT: s_cbranch_execnz .LBB4_1
; GCN-NEXT: ; %bb.2:
-; GCN-NEXT: s_mov_b64 exec, s[46:47]
+; GCN-NEXT: s_mov_b64 exec, s[62:63]
; GCN-NEXT: v_add_i32_e32 v0, vcc, 1, v2
-; GCN-NEXT: v_readlane_b32 s49, v40, 17
-; GCN-NEXT: v_readlane_b32 s48, v40, 16
-; GCN-NEXT: v_readlane_b32 s47, v40, 15
-; GCN-NEXT: v_readlane_b32 s46, v40, 14
-; GCN-NEXT: v_readlane_b32 s45, v40, 13
-; GCN-NEXT: v_readlane_b32 s44, v40, 12
-; GCN-NEXT: v_readlane_b32 s43, v40, 11
-; GCN-NEXT: v_readlane_b32 s42, v40, 10
-; GCN-NEXT: v_readlane_b32 s41, v40, 9
-; GCN-NEXT: v_readlane_b32 s40, v40, 8
-; GCN-NEXT: v_readlane_b32 s39, v40, 7
-; GCN-NEXT: v_readlane_b32 s38, v40, 6
+; GCN-NEXT: v_readlane_b32 s65, v40, 17
+; GCN-NEXT: v_readlane_b32 s64, v40, 16
+; GCN-NEXT: v_readlane_b32 s63, v40, 15
+; GCN-NEXT: v_readlane_b32 s62, v40, 14
+; GCN-NEXT: v_readlane_b32 s53, v40, 13
+; GCN-NEXT: v_readlane_b32 s52, v40, 12
+; GCN-NEXT: v_readlane_b32 s51, v40, 11
+; GCN-NEXT: v_readlane_b32 s50, v40, 10
+; GCN-NEXT: v_readlane_b32 s49, v40, 9
+; GCN-NEXT: v_readlane_b32 s48, v40, 8
+; GCN-NEXT: v_readlane_b32 s47, v40, 7
+; GCN-NEXT: v_readlane_b32 s46, v40, 6
; GCN-NEXT: v_readlane_b32 s37, v40, 5
; GCN-NEXT: v_readlane_b32 s36, v40, 4
; GCN-NEXT: v_readlane_b32 s35, v40, 3
@@ -568,61 +568,61 @@ define i32 @test_indirect_call_vgpr_ptr_ret(ptr %fptr) {
; GISEL-NEXT: v_writelane_b32 v40, s35, 3
; GISEL-NEXT: v_writelane_b32 v40, s36, 4
; GISEL-NEXT: v_writelane_b32 v40, s37, 5
-; GISEL-NEXT: v_writelane_b32 v40, s38, 6
-; GISEL-NEXT: v_writelane_b32 v40, s39, 7
-; GISEL-NEXT: v_writelane_b32 v40, s40, 8
-; GISEL-NEXT: v_writelane_b32 v40, s41, 9
-; GISEL-NEXT: v_writelane_b32 v40, s42, 10
-; GISEL-NEXT: v_writelane_b32 v40, s43, 11
-; GISEL-NEXT: v_writelane_b32 v40, s44, 12
-; GISEL-NEXT: v_writelane_b32 v40, s45, 13
-; GISEL-NEXT: v_writelane_b32 v40, s46, 14
-; GISEL-NEXT: v_writelane_b32 v40, s47, 15
-; GISEL-NEXT: v_writelane_b32 v40, s48, 16
-; GISEL-NEXT: v_writelane_b32 v40, s49, 17
-; GISEL-NEXT: s_mov_b32 s42, s15
-; GISEL-NEXT: s_mov_b32 s43, s14
-; GISEL-NEXT: s_mov_b32 s44, s13
-; GISEL-NEXT: s_mov_b32 s45, s12
+; GISEL-NEXT: v_writelane_b32 v40, s46, 6
+; GISEL-NEXT: v_writelane_b32 v40, s47, 7
+; GISEL-NEXT: v_writelane_b32 v40, s48, 8
+; GISEL-NEXT: v_writelane_b32 v40, s49, 9
+; GISEL-NEXT: v_writelane_b32 v40, s50, 10
+; GISEL-NEXT: v_writelane_b32 v40, s51, 11
+; GISEL-NEXT: v_writelane_b32 v40, s52, 12
+; GISEL-NEXT: v_writelane_b32 v40, s53, 13
+; GISEL-NEXT: v_writelane_b32 v40, s62, 14
+; GISEL-NEXT: v_writelane_b32 v40, s63, 15
+; GISEL-NEXT: v_writelane_b32 v40, s64, 16
+; GISEL-NEXT: v_writelane_b32 v40, s65, 17
+; GISEL-NEXT: s_mov_b32 s50, s15
+; GISEL-NEXT: s_mov_b32 s51, s14
+; GISEL-NEXT: s_mov_b32 s52, s13
+; GISEL-NEXT: s_mov_b32 s53, s12
; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11]
; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9]
-; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7]
-; GISEL-NEXT: s_mov_b64 s[40:41], s[4:5]
-; GISEL-NEXT: s_mov_b64 s[46:47], exec
+; GISEL-NEXT: s_mov_b64 s[46:47], s[6:7]
+; GISEL-NEXT: s_mov_b64 s[48:49], s[4:5]
+; GISEL-NEXT: s_mov_b64 s[62:63], exec
; GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GISEL-NEXT: v_readfirstlane_b32 s16, v0
; GISEL-NEXT: v_readfirstlane_b32 s17, v1
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
-; GISEL-NEXT: s_and_saveexec_b64 s[48:49], vcc
-; GISEL-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GISEL-NEXT: s_and_saveexec_b64 s[64:65], vcc
+; GISEL-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GISEL-NEXT: s_mov_b64 s[6:7], s[46:47]
; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37]
; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GISEL-NEXT: s_mov_b32 s12, s45
-; GISEL-NEXT: s_mov_b32 s13, s44
-; GISEL-NEXT: s_mov_b32 s14, s43
-; GISEL-NEXT: s_mov_b32 s15, s42
+; GISEL-NEXT: s_mov_b32 s12, s53
+; GISEL-NEXT: s_mov_b32 s13, s52
+; GISEL-NEXT: s_mov_b32 s14, s51
+; GISEL-NEXT: s_mov_b32 s15, s50
; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GISEL-NEXT: v_mov_b32_e32 v1, v0
; GISEL-NEXT: ; implicit-def: $vgpr0
; GISEL-NEXT: ; implicit-def: $vgpr31
-; GISEL-NEXT: s_xor_b64 exec, exec, s[48:49]
+; GISEL-NEXT: s_xor_b64 exec, exec, s[64:65]
; GISEL-NEXT: s_cbranch_execnz .LBB4_1
; GISEL-NEXT: ; %bb.2:
-; GISEL-NEXT: s_mov_b64 exec, s[46:47]
+; GISEL-NEXT: s_mov_b64 exec, s[62:63]
; GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v1
-; GISEL-NEXT: v_readlane_b32 s49, v40, 17
-; GISEL-NEXT: v_readlane_b32 s48, v40, 16
-; GISEL-NEXT: v_readlane_b32 s47, v40, 15
-; GISEL-NEXT: v_readlane_b32 s46, v40, 14
-; GISEL-NEXT: v_readlane_b32 s45, v40, 13
-; GISEL-NEXT: v_readlane_b32 s44, v40, 12
-; GISEL-NEXT: v_readlane_b32 s43, v40, 11
-; GISEL-NEXT: v_readlane_b32 s42, v40, 10
-; GISEL-NEXT: v_readlane_b32 s41, v40, 9
-; GISEL-NEXT: v_readlane_b32 s40, v40, 8
-; GISEL-NEXT: v_readlane_b32 s39, v40, 7
-; GISEL-NEXT: v_readlane_b32 s38, v40, 6
+; GISEL-NEXT: v_readlane_b32 s65, v40, 17
+; GISEL-NEXT: v_readlane_b32 s64, v40, 16
+; GISEL-NEXT: v_readlane_b32 s63, v40, 15
+; GISEL-NEXT: v_readlane_b32 s62, v40, 14
+; GISEL-NEXT: v_readlane_b32 s53, v40, 13
+; GISEL-NEXT: v_readlane_b32 s52, v40, 12
+; GISEL-NEXT: v_readlane_b32 s51, v40, 11
+; GISEL-NEXT: v_readlane_b32 s50, v40, 10
+; GISEL-NEXT: v_readlane_b32 s49, v40, 9
+; GISEL-NEXT: v_readlane_b32 s48, v40, 8
+; GISEL-NEXT: v_readlane_b32 s47, v40, 7
+; GISEL-NEXT: v_readlane_b32 s46, v40, 6
; GISEL-NEXT: v_readlane_b32 s37, v40, 5
; GISEL-NEXT: v_readlane_b32 s36, v40, 4
; GISEL-NEXT: v_readlane_b32 s35, v40, 3
@@ -659,70 +659,70 @@ define void @test_indirect_call_vgpr_ptr_in_branch(ptr %fptr, i1 %cond) {
; GCN-NEXT: v_writelane_b32 v40, s35, 3
; GCN-NEXT: v_writelane_b32 v40, s36, 4
; GCN-NEXT: v_writelane_b32 v40, s37, 5
-; GCN-NEXT: v_writelane_b32 v40, s38, 6
-; GCN-NEXT: v_writelane_b32 v40, s39, 7
-; GCN-NEXT: v_writelane_b32 v40, s40, 8
-; GCN-NEXT: v_writelane_b32 v40, s41, 9
-; GCN-NEXT: v_writelane_b32 v40, s42, 10
-; GCN-NEXT: v_writelane_b32 v40, s43, 11
-; GCN-NEXT: v_writelane_b32 v40, s44, 12
-; GCN-NEXT: v_writelane_b32 v40, s45, 13
-; GCN-NEXT: v_writelane_b32 v40, s46, 14
-; GCN-NEXT: v_writelane_b32 v40, s47, 15
-; GCN-NEXT: v_writelane_b32 v40, s48, 16
-; GCN-NEXT: v_writelane_b32 v40, s49, 17
-; GCN-NEXT: v_writelane_b32 v40, s50, 18
-; GCN-NEXT: v_writelane_b32 v40, s51, 19
-; GCN-NEXT: s_mov_b32 s42, s15
-; GCN-NEXT: s_mov_b32 s43, s14
-; GCN-NEXT: s_mov_b32 s44, s13
-; GCN-NEXT: s_mov_b32 s45, s12
+; GCN-NEXT: v_writelane_b32 v40, s46, 6
+; GCN-NEXT: v_writelane_b32 v40, s47, 7
+; GCN-NEXT: v_writelane_b32 v40, s48, 8
+; GCN-NEXT: v_writelane_b32 v40, s49, 9
+; GCN-NEXT: v_writelane_b32 v40, s50, 10
+; GCN-NEXT: v_writelane_b32 v40, s51, 11
+; GCN-NEXT: v_writelane_b32 v40, s52, 12
+; GCN-NEXT: v_writelane_b32 v40, s53, 13
+; GCN-NEXT: v_writelane_b32 v40, s62, 14
+; GCN-NEXT: v_writelane_b32 v40, s63, 15
+; GCN-NEXT: v_writelane_b32 v40, s64, 16
+; GCN-NEXT: v_writelane_b32 v40, s65, 17
+; GCN-NEXT: v_writelane_b32 v40, s66, 18
+; GCN-NEXT: v_writelane_b32 v40, s67, 19
+; GCN-NEXT: s_mov_b32 s50, s15
+; GCN-NEXT: s_mov_b32 s51, s14
+; GCN-NEXT: s_mov_b32 s52, s13
+; GCN-NEXT: s_mov_b32 s53, s12
; GCN-NEXT: s_mov_b64 s[34:35], s[10:11]
; GCN-NEXT: s_mov_b64 s[36:37], s[8:9]
-; GCN-NEXT: s_mov_b64 s[38:39], s[6:7]
-; GCN-NEXT: s_mov_b64 s[40:41], s[4:5]
+; GCN-NEXT: s_mov_b64 s[46:47], s[6:7]
+; GCN-NEXT: s_mov_b64 s[48:49], s[4:5]
; GCN-NEXT: v_and_b32_e32 v2, 1, v2
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GCN-NEXT: s_and_saveexec_b64 s[46:47], vcc
+; GCN-NEXT: s_and_saveexec_b64 s[62:63], vcc
; GCN-NEXT: s_cbranch_execz .LBB5_4
; GCN-NEXT: ; %bb.1: ; %bb1
-; GCN-NEXT: s_mov_b64 s[48:49], exec
+; GCN-NEXT: s_mov_b64 s[64:65], exec
; GCN-NEXT: .LBB5_2: ; =>This Inner Loop Header: Depth=1
; GCN-NEXT: v_readfirstlane_b32 s16, v0
; GCN-NEXT: v_readfirstlane_b32 s17, v1
; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
-; GCN-NEXT: s_and_saveexec_b64 s[50:51], vcc
-; GCN-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GCN-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GCN-NEXT: s_and_saveexec_b64 s[66:67], vcc
+; GCN-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GCN-NEXT: s_mov_b64 s[6:7], s[46:47]
; GCN-NEXT: s_mov_b64 s[8:9], s[36:37]
; GCN-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GCN-NEXT: s_mov_b32 s12, s45
-; GCN-NEXT: s_mov_b32 s13, s44
-; GCN-NEXT: s_mov_b32 s14, s43
-; GCN-NEXT: s_mov_b32 s15, s42
+; GCN-NEXT: s_mov_b32 s12, s53
+; GCN-NEXT: s_mov_b32 s13, s52
+; GCN-NEXT: s_mov_b32 s14, s51
+; GCN-NEXT: s_mov_b32 s15, s50
; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1
; GCN-NEXT: ; implicit-def: $vgpr31
-; GCN-NEXT: s_xor_b64 exec, exec, s[50:51]
+; GCN-NEXT: s_xor_b64 exec, exec, s[66:67]
; GCN-NEXT: s_cbranch_execnz .LBB5_2
; GCN-NEXT: ; %bb.3:
-; GCN-NEXT: s_mov_b64 exec, s[48:49]
+; GCN-NEXT: s_mov_b64 exec, s[64:65]
; GCN-NEXT: .LBB5_4: ; %bb2
-; GCN-NEXT: s_or_b64 exec, exec, s[46:47]
-; GCN-NEXT: v_readlane_b32 s51, v40, 19
-; GCN-NEXT: v_readlane_b32 s50, v40, 18
-; GCN-NEXT: v_readlane_b32 s49, v40, 17
-; GCN-NEXT: v_readlane_b32 s48, v40, 16
-; GCN-NEXT: v_readlane_b32 s47, v40, 15
-; GCN-NEXT: v_readlane_b32 s46, v40, 14
-; GCN-NEXT: v_readlane_b32 s45, v40, 13
-; GCN-NEXT: v_readlane_b32 s44, v40, 12
-; GCN-NEXT: v_readlane_b32 s43, v40, 11
-; GCN-NEXT: v_readlane_b32 s42, v40, 10
-; GCN-NEXT: v_readlane_b32 s41, v40, 9
-; GCN-NEXT: v_readlane_b32 s40, v40, 8
-; GCN-NEXT: v_readlane_b32 s39, v40, 7
-; GCN-NEXT: v_readlane_b32 s38, v40, 6
+; GCN-NEXT: s_or_b64 exec, exec, s[62:63]
+; GCN-NEXT: v_readlane_b32 s67, v40, 19
+; GCN-NEXT: v_readlane_b32 s66, v40, 18
+; GCN-NEXT: v_readlane_b32 s65, v40, 17
+; GCN-NEXT: v_readlane_b32 s64, v40, 16
+; GCN-NEXT: v_readlane_b32 s63, v40, 15
+; GCN-NEXT: v_readlane_b32 s62, v40, 14
+; GCN-NEXT: v_readlane_b32 s53, v40, 13
+; GCN-NEXT: v_readlane_b32 s52, v40, 12
+; GCN-NEXT: v_readlane_b32 s51, v40, 11
+; GCN-NEXT: v_readlane_b32 s50, v40, 10
+; GCN-NEXT: v_readlane_b32 s49, v40, 9
+; GCN-NEXT: v_readlane_b32 s48, v40, 8
+; GCN-NEXT: v_readlane_b32 s47, v40, 7
+; GCN-NEXT: v_readlane_b32 s46, v40, 6
; GCN-NEXT: v_readlane_b32 s37, v40, 5
; GCN-NEXT: v_readlane_b32 s36, v40, 4
; GCN-NEXT: v_readlane_b32 s35, v40, 3
@@ -754,70 +754,70 @@ define void @test_indirect_call_vgpr_ptr_in_branch(ptr %fptr, i1 %cond) {
; GISEL-NEXT: v_writelane_b32 v40, s35, 3
; GISEL-NEXT: v_writelane_b32 v40, s36, 4
; GISEL-NEXT: v_writelane_b32 v40, s37, 5
-; GISEL-NEXT: v_writelane_b32 v40, s38, 6
-; GISEL-NEXT: v_writelane_b32 v40, s39, 7
-; GISEL-NEXT: v_writelane_b32 v40, s40, 8
-; GISEL-NEXT: v_writelane_b32 v40, s41, 9
-; GISEL-NEXT: v_writelane_b32 v40, s42, 10
-; GISEL-NEXT: v_writelane_b32 v40, s43, 11
-; GISEL-NEXT: v_writelane_b32 v40, s44, 12
-; GISEL-NEXT: v_writelane_b32 v40, s45, 13
-; GISEL-NEXT: v_writelane_b32 v40, s46, 14
-; GISEL-NEXT: v_writelane_b32 v40, s47, 15
-; GISEL-NEXT: v_writelane_b32 v40, s48, 16
-; GISEL-NEXT: v_writelane_b32 v40, s49, 17
-; GISEL-NEXT: v_writelane_b32 v40, s50, 18
-; GISEL-NEXT: v_writelane_b32 v40, s51, 19
-; GISEL-NEXT: s_mov_b32 s42, s15
-; GISEL-NEXT: s_mov_b32 s43, s14
-; GISEL-NEXT: s_mov_b32 s44, s13
-; GISEL-NEXT: s_mov_b32 s45, s12
+; GISEL-NEXT: v_writelane_b32 v40, s46, 6
+; GISEL-NEXT: v_writelane_b32 v40, s47, 7
+; GISEL-NEXT: v_writelane_b32 v40, s48, 8
+; GISEL-NEXT: v_writelane_b32 v40, s49, 9
+; GISEL-NEXT: v_writelane_b32 v40, s50, 10
+; GISEL-NEXT: v_writelane_b32 v40, s51, 11
+; GISEL-NEXT: v_writelane_b32 v40, s52, 12
+; GISEL-NEXT: v_writelane_b32 v40, s53, 13
+; GISEL-NEXT: v_writelane_b32 v40, s62, 14
+; GISEL-NEXT: v_writelane_b32 v40, s63, 15
+; GISEL-NEXT: v_writelane_b32 v40, s64, 16
+; GISEL-NEXT: v_writelane_b32 v40, s65, 17
+; GISEL-NEXT: v_writelane_b32 v40, s66, 18
+; GISEL-NEXT: v_writelane_b32 v40, s67, 19
+; GISEL-NEXT: s_mov_b32 s50, s15
+; GISEL-NEXT: s_mov_b32 s51, s14
+; GISEL-NEXT: s_mov_b32 s52, s13
+; GISEL-NEXT: s_mov_b32 s53, s12
; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11]
; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9]
-; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7]
-; GISEL-NEXT: s_mov_b64 s[40:41], s[4:5]
+; GISEL-NEXT: s_mov_b64 s[46:47], s[6:7]
+; GISEL-NEXT: s_mov_b64 s[48:49], s[4:5]
; GISEL-NEXT: v_and_b32_e32 v2, 1, v2
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
-; GISEL-NEXT: s_and_saveexec_b64 s[46:47], vcc
+; GISEL-NEXT: s_and_saveexec_b64 s[62:63], vcc
; GISEL-NEXT: s_cbranch_execz .LBB5_4
; GISEL-NEXT: ; %bb.1: ; %bb1
-; GISEL-NEXT: s_mov_b64 s[48:49], exec
+; GISEL-NEXT: s_mov_b64 s[64:65], exec
; GISEL-NEXT: .LBB5_2: ; =>This Inner Loop Header: Depth=1
; GISEL-NEXT: v_readfirstlane_b32 s16, v0
; GISEL-NEXT: v_readfirstlane_b32 s17, v1
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
-; GISEL-NEXT: s_and_saveexec_b64 s[50:51], vcc
-; GISEL-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GISEL-NEXT: s_and_saveexec_b64 s[66:67], vcc
+; GISEL-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GISEL-NEXT: s_mov_b64 s[6:7], s[46:47]
; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37]
; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GISEL-NEXT: s_mov_b32 s12, s45
-; GISEL-NEXT: s_mov_b32 s13, s44
-; GISEL-NEXT: s_mov_b32 s14, s43
-; GISEL-NEXT: s_mov_b32 s15, s42
+; GISEL-NEXT: s_mov_b32 s12, s53
+; GISEL-NEXT: s_mov_b32 s13, s52
+; GISEL-NEXT: s_mov_b32 s14, s51
+; GISEL-NEXT: s_mov_b32 s15, s50
; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GISEL-NEXT: ; implicit-def: $vgpr0
; GISEL-NEXT: ; implicit-def: $vgpr31
-; GISEL-NEXT: s_xor_b64 exec, exec, s[50:51]
+; GISEL-NEXT: s_xor_b64 exec, exec, s[66:67]
; GISEL-NEXT: s_cbranch_execnz .LBB5_2
; GISEL-NEXT: ; %bb.3:
-; GISEL-NEXT: s_mov_b64 exec, s[48:49]
+; GISEL-NEXT: s_mov_b64 exec, s[64:65]
; GISEL-NEXT: .LBB5_4: ; %bb2
-; GISEL-NEXT: s_or_b64 exec, exec, s[46:47]
-; GISEL-NEXT: v_readlane_b32 s51, v40, 19
-; GISEL-NEXT: v_readlane_b32 s50, v40, 18
-; GISEL-NEXT: v_readlane_b32 s49, v40, 17
-; GISEL-NEXT: v_readlane_b32 s48, v40, 16
-; GISEL-NEXT: v_readlane_b32 s47, v40, 15
-; GISEL-NEXT: v_readlane_b32 s46, v40, 14
-; GISEL-NEXT: v_readlane_b32 s45, v40, 13
-; GISEL-NEXT: v_readlane_b32 s44, v40, 12
-; GISEL-NEXT: v_readlane_b32 s43, v40, 11
-; GISEL-NEXT: v_readlane_b32 s42, v40, 10
-; GISEL-NEXT: v_readlane_b32 s41, v40, 9
-; GISEL-NEXT: v_readlane_b32 s40, v40, 8
-; GISEL-NEXT: v_readlane_b32 s39, v40, 7
-; GISEL-NEXT: v_readlane_b32 s38, v40, 6
+; GISEL-NEXT: s_or_b64 exec, exec, s[62:63]
+; GISEL-NEXT: v_readlane_b32 s67, v40, 19
+; GISEL-NEXT: v_readlane_b32 s66, v40, 18
+; GISEL-NEXT: v_readlane_b32 s65, v40, 17
+; GISEL-NEXT: v_readlane_b32 s64, v40, 16
+; GISEL-NEXT: v_readlane_b32 s63, v40, 15
+; GISEL-NEXT: v_readlane_b32 s62, v40, 14
+; GISEL-NEXT: v_readlane_b32 s53, v40, 13
+; GISEL-NEXT: v_readlane_b32 s52, v40, 12
+; GISEL-NEXT: v_readlane_b32 s51, v40, 11
+; GISEL-NEXT: v_readlane_b32 s50, v40, 10
+; GISEL-NEXT: v_readlane_b32 s49, v40, 9
+; GISEL-NEXT: v_readlane_b32 s48, v40, 8
+; GISEL-NEXT: v_readlane_b32 s47, v40, 7
+; GISEL-NEXT: v_readlane_b32 s46, v40, 6
; GISEL-NEXT: v_readlane_b32 s37, v40, 5
; GISEL-NEXT: v_readlane_b32 s36, v40, 4
; GISEL-NEXT: v_readlane_b32 s35, v40, 3
@@ -859,32 +859,16 @@ define void @test_indirect_call_vgpr_ptr_inreg_arg(ptr %fptr) {
; GCN-NEXT: v_writelane_b32 v40, s35, 3
; GCN-NEXT: v_writelane_b32 v40, s36, 4
; GCN-NEXT: v_writelane_b32 v40, s37, 5
-; GCN-NEXT: v_writelane_b32 v40, s38, 6
-; GCN-NEXT: v_writelane_b32 v40, s39, 7
-; GCN-NEXT: v_writelane_b32 v40, s40, 8
-; GCN-NEXT: v_writelane_b32 v40, s41, 9
-; GCN-NEXT: v_writelane_b32 v40, s42, 10
-; GCN-NEXT: v_writelane_b32 v40, s43, 11
-; GCN-NEXT: v_writelane_b32 v40, s44, 12
-; GCN-NEXT: v_writelane_b32 v40, s45, 13
-; GCN-NEXT: v_writelane_b32 v40, s46, 14
-; GCN-NEXT: v_writelane_b32 v40, s47, 15
-; GCN-NEXT: v_writelane_b32 v40, s48, 16
-; GCN-NEXT: v_writelane_b32 v40, s49, 17
-; GCN-NEXT: v_writelane_b32 v40, s50, 18
-; GCN-NEXT: v_writelane_b32 v40, s51, 19
-; GCN-NEXT: v_writelane_b32 v40, s52, 20
-; GCN-NEXT: v_writelane_b32 v40, s53, 21
-; GCN-NEXT: v_writelane_b32 v40, s54, 22
-; GCN-NEXT: v_writelane_b32 v40, s55, 23
-; GCN-NEXT: v_writelane_b32 v40, s56, 24
-; GCN-NEXT: v_writelane_b32 v40, s57, 25
-; GCN-NEXT: v_writelane_b32 v40, s58, 26
-; GCN-NEXT: v_writelane_b32 v40, s59, 27
-; GCN-NEXT: v_writelane_b32 v40, s60, 28
-; GCN-NEXT: v_writelane_b32 v40, s61, 29
-; GCN-NEXT: v_writelane_b32 v40, s62, 30
-; GCN-NEXT: v_writelane_b32 v40, s63, 31
+; GCN-NEXT: v_writelane_b32 v40, s46, 6
+; GCN-NEXT: v_writelane_b32 v40, s47, 7
+; GCN-NEXT: v_writelane_b32 v40, s48, 8
+; GCN-NEXT: v_writelane_b32 v40, s49, 9
+; GCN-NEXT: v_writelane_b32 v40, s50, 10
+; GCN-NEXT: v_writelane_b32 v40, s51, 11
+; GCN-NEXT: v_writelane_b32 v40, s52, 12
+; GCN-NEXT: v_writelane_b32 v40, s53, 13
+; GCN-NEXT: v_writelane_b32 v40, s62, 14
+; GCN-NEXT: v_writelane_b32 v40, s63, 15
; GCN-NEXT: s_mov_b64 s[6:7], exec
; GCN-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1
; GCN-NEXT: v_readfirstlane_b32 s8, v0
@@ -898,32 +882,16 @@ define void @test_indirect_call_vgpr_ptr_inreg_arg(ptr %fptr) {
; GCN-NEXT: s_cbranch_execnz .LBB6_1
; GCN-NEXT: ; %bb.2:
; GCN-NEXT: s_mov_b64 exec, s[6:7]
-; GCN-NEXT: v_readlane_b32 s63, v40, 31
-; GCN-NEXT: v_readlane_b32 s62, v40, 30
-; GCN-NEXT: v_readlane_b32 s61, v40, 29
-; GCN-NEXT: v_readlane_b32 s60, v40, 28
-; GCN-NEXT: v_readlane_b32 s59, v40, 27
-; GCN-NEXT: v_readlane_b32 s58, v40, 26
-; GCN-NEXT: v_readlane_b32 s57, v40, 25
-; GCN-NEXT: v_readlane_b32 s56, v40, 24
-; GCN-NEXT: v_readlane_b32 s55, v40, 23
-; GCN-NEXT: v_readlane_b32 s54, v40, 22
-; GCN-NEXT: v_readlane_b32 s53, v40, 21
-; GCN-NEXT: v_readlane_b32 s52, v40, 20
-; GCN-NEXT: v_readlane_b32 s51, v40, 19
-; GCN-NEXT: v_readlane_b32 s50, v40, 18
-; GCN-NEXT: v_readlane_b32 s49, v40, 17
-; GCN-NEXT: v_readlane_b32 s48, v40, 16
-; GCN-NEXT: v_readlane_b32 s47, v40, 15
-; GCN-NEXT: v_readlane_b32 s46, v40, 14
-; GCN-NEXT: v_readlane_b32 s45, v40, 13
-; GCN-NEXT: v_readlane_b32 s44, v40, 12
-; GCN-NEXT: v_readlane_b32 s43, v40, 11
-; GCN-NEXT: v_readlane_b32 s42, v40, 10
-; GCN-NEXT: v_readlane_b32 s41, v40, 9
-; GCN-NEXT: v_readlane_b32 s40, v40, 8
-; GCN-NEXT: v_readlane_b32 s39, v40, 7
-; GCN-NEXT: v_readlane_b32 s38, v40, 6
+; GCN-NEXT: v_readlane_b32 s63, v40, 15
+; GCN-NEXT: v_readlane_b32 s62, v40, 14
+; GCN-NEXT: v_readlane_b32 s53, v40, 13
+; GCN-NEXT: v_readlane_b32 s52, v40, 12
+; GCN-NEXT: v_readlane_b32 s51, v40, 11
+; GCN-NEXT: v_readlane_b32 s50, v40, 10
+; GCN-NEXT: v_readlane_b32 s49, v40, 9
+; GCN-NEXT: v_readlane_b32 s48, v40, 8
+; GCN-NEXT: v_readlane_b32 s47, v40, 7
+; GCN-NEXT: v_readlane_b32 s46, v40, 6
; GCN-NEXT: v_readlane_b32 s37, v40, 5
; GCN-NEXT: v_readlane_b32 s36, v40, 4
; GCN-NEXT: v_readlane_b32 s35, v40, 3
@@ -953,32 +921,16 @@ define void @test_indirect_call_vgpr_ptr_inreg_arg(ptr %fptr) {
; GISEL-NEXT: v_writelane_b32 v40, s35, 3
; GISEL-NEXT: v_writelane_b32 v40, s36, 4
; GISEL-NEXT: v_writelane_b32 v40, s37, 5
-; GISEL-NEXT: v_writelane_b32 v40, s38, 6
-; GISEL-NEXT: v_writelane_b32 v40, s39, 7
-; GISEL-NEXT: v_writelane_b32 v40, s40, 8
-; GISEL-NEXT: v_writelane_b32 v40, s41, 9
-; GISEL-NEXT: v_writelane_b32 v40, s42, 10
-; GISEL-NEXT: v_writelane_b32 v40, s43, 11
-; GISEL-NEXT: v_writelane_b32 v40, s44, 12
-; GISEL-NEXT: v_writelane_b32 v40, s45, 13
-; GISEL-NEXT: v_writelane_b32 v40, s46, 14
-; GISEL-NEXT: v_writelane_b32 v40, s47, 15
-; GISEL-NEXT: v_writelane_b32 v40, s48, 16
-; GISEL-NEXT: v_writelane_b32 v40, s49, 17
-; GISEL-NEXT: v_writelane_b32 v40, s50, 18
-; GISEL-NEXT: v_writelane_b32 v40, s51, 19
-; GISEL-NEXT: v_writelane_b32 v40, s52, 20
-; GISEL-NEXT: v_writelane_b32 v40, s53, 21
-; GISEL-NEXT: v_writelane_b32 v40, s54, 22
-; GISEL-NEXT: v_writelane_b32 v40, s55, 23
-; GISEL-NEXT: v_writelane_b32 v40, s56, 24
-; GISEL-NEXT: v_writelane_b32 v40, s57, 25
-; GISEL-NEXT: v_writelane_b32 v40, s58, 26
-; GISEL-NEXT: v_writelane_b32 v40, s59, 27
-; GISEL-NEXT: v_writelane_b32 v40, s60, 28
-; GISEL-NEXT: v_writelane_b32 v40, s61, 29
-; GISEL-NEXT: v_writelane_b32 v40, s62, 30
-; GISEL-NEXT: v_writelane_b32 v40, s63, 31
+; GISEL-NEXT: v_writelane_b32 v40, s46, 6
+; GISEL-NEXT: v_writelane_b32 v40, s47, 7
+; GISEL-NEXT: v_writelane_b32 v40, s48, 8
+; GISEL-NEXT: v_writelane_b32 v40, s49, 9
+; GISEL-NEXT: v_writelane_b32 v40, s50, 10
+; GISEL-NEXT: v_writelane_b32 v40, s51, 11
+; GISEL-NEXT: v_writelane_b32 v40, s52, 12
+; GISEL-NEXT: v_writelane_b32 v40, s53, 13
+; GISEL-NEXT: v_writelane_b32 v40, s62, 14
+; GISEL-NEXT: v_writelane_b32 v40, s63, 15
; GISEL-NEXT: s_mov_b64 s[6:7], exec
; GISEL-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1
; GISEL-NEXT: v_readfirstlane_b32 s8, v0
@@ -992,32 +944,16 @@ define void @test_indirect_call_vgpr_ptr_inreg_arg(ptr %fptr) {
; GISEL-NEXT: s_cbranch_execnz .LBB6_1
; GISEL-NEXT: ; %bb.2:
; GISEL-NEXT: s_mov_b64 exec, s[6:7]
-; GISEL-NEXT: v_readlane_b32 s63, v40, 31
-; GISEL-NEXT: v_readlane_b32 s62, v40, 30
-; GISEL-NEXT: v_readlane_b32 s61, v40, 29
-; GISEL-NEXT: v_readlane_b32 s60, v40, 28
-; GISEL-NEXT: v_readlane_b32 s59, v40, 27
-; GISEL-NEXT: v_readlane_b32 s58, v40, 26
-; GISEL-NEXT: v_readlane_b32 s57, v40, 25
-; GISEL-NEXT: v_readlane_b32 s56, v40, 24
-; GISEL-NEXT: v_readlane_b32 s55, v40, 23
-; GISEL-NEXT: v_readlane_b32 s54, v40, 22
-; GISEL-NEXT: v_readlane_b32 s53, v40, 21
-; GISEL-NEXT: v_readlane_b32 s52, v40, 20
-; GISEL-NEXT: v_readlane_b32 s51, v40, 19
-; GISEL-NEXT: v_readlane_b32 s50, v40, 18
-; GISEL-NEXT: v_readlane_b32 s49, v40, 17
-; GISEL-NEXT: v_readlane_b32 s48, v40, 16
-; GISEL-NEXT: v_readlane_b32 s47, v40, 15
-; GISEL-NEXT: v_readlane_b32 s46, v40, 14
-; GISEL-NEXT: v_readlane_b32 s45, v40, 13
-; GISEL-NEXT: v_readlane_b32 s44, v40, 12
-; GISEL-NEXT: v_readlane_b32 s43, v40, 11
-; GISEL-NEXT: v_readlane_b32 s42, v40, 10
-; GISEL-NEXT: v_readlane_b32 s41, v40, 9
-; GISEL-NEXT: v_readlane_b32 s40, v40, 8
-; GISEL-NEXT: v_readlane_b32 s39, v40, 7
-; GISEL-NEXT: v_readlane_b32 s38, v40, 6
+; GISEL-NEXT: v_readlane_b32 s63, v40, 15
+; GISEL-NEXT: v_readlane_b32 s62, v40, 14
+; GISEL-NEXT: v_readlane_b32 s53, v40, 13
+; GISEL-NEXT: v_readlane_b32 s52, v40, 12
+; GISEL-NEXT: v_readlane_b32 s51, v40, 11
+; GISEL-NEXT: v_readlane_b32 s50, v40, 10
+; GISEL-NEXT: v_readlane_b32 s49, v40, 9
+; GISEL-NEXT: v_readlane_b32 s48, v40, 8
+; GISEL-NEXT: v_readlane_b32 s47, v40, 7
+; GISEL-NEXT: v_readlane_b32 s46, v40, 6
; GISEL-NEXT: v_readlane_b32 s37, v40, 5
; GISEL-NEXT: v_readlane_b32 s36, v40, 4
; GISEL-NEXT: v_readlane_b32 s35, v40, 3
@@ -1052,32 +988,16 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, ptr %fptr) {
; GCN-NEXT: v_writelane_b32 v41, s35, 3
; GCN-NEXT: v_writelane_b32 v41, s36, 4
; GCN-NEXT: v_writelane_b32 v41, s37, 5
-; GCN-NEXT: v_writelane_b32 v41, s38, 6
-; GCN-NEXT: v_writelane_b32 v41, s39, 7
-; GCN-NEXT: v_writelane_b32 v41, s40, 8
-; GCN-NEXT: v_writelane_b32 v41, s41, 9
-; GCN-NEXT: v_writelane_b32 v41, s42, 10
-; GCN-NEXT: v_writelane_b32 v41, s43, 11
-; GCN-NEXT: v_writelane_b32 v41, s44, 12
-; GCN-NEXT: v_writelane_b32 v41, s45, 13
-; GCN-NEXT: v_writelane_b32 v41, s46, 14
-; GCN-NEXT: v_writelane_b32 v41, s47, 15
-; GCN-NEXT: v_writelane_b32 v41, s48, 16
-; GCN-NEXT: v_writelane_b32 v41, s49, 17
-; GCN-NEXT: v_writelane_b32 v41, s50, 18
-; GCN-NEXT: v_writelane_b32 v41, s51, 19
-; GCN-NEXT: v_writelane_b32 v41, s52, 20
-; GCN-NEXT: v_writelane_b32 v41, s53, 21
-; GCN-NEXT: v_writelane_b32 v41, s54, 22
-; GCN-NEXT: v_writelane_b32 v41, s55, 23
-; GCN-NEXT: v_writelane_b32 v41, s56, 24
-; GCN-NEXT: v_writelane_b32 v41, s57, 25
-; GCN-NEXT: v_writelane_b32 v41, s58, 26
-; GCN-NEXT: v_writelane_b32 v41, s59, 27
-; GCN-NEXT: v_writelane_b32 v41, s60, 28
-; GCN-NEXT: v_writelane_b32 v41, s61, 29
-; GCN-NEXT: v_writelane_b32 v41, s62, 30
-; GCN-NEXT: v_writelane_b32 v41, s63, 31
+; GCN-NEXT: v_writelane_b32 v41, s46, 6
+; GCN-NEXT: v_writelane_b32 v41, s47, 7
+; GCN-NEXT: v_writelane_b32 v41, s48, 8
+; GCN-NEXT: v_writelane_b32 v41, s49, 9
+; GCN-NEXT: v_writelane_b32 v41, s50, 10
+; GCN-NEXT: v_writelane_b32 v41, s51, 11
+; GCN-NEXT: v_writelane_b32 v41, s52, 12
+; GCN-NEXT: v_writelane_b32 v41, s53, 13
+; GCN-NEXT: v_writelane_b32 v41, s62, 14
+; GCN-NEXT: v_writelane_b32 v41, s63, 15
; GCN-NEXT: v_mov_b32_e32 v40, v0
; GCN-NEXT: s_mov_b64 s[4:5], exec
; GCN-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1
@@ -1093,32 +1013,16 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, ptr %fptr) {
; GCN-NEXT: ; %bb.2:
; GCN-NEXT: s_mov_b64 exec, s[4:5]
; GCN-NEXT: v_mov_b32_e32 v0, v40
-; GCN-NEXT: v_readlane_b32 s63, v41, 31
-; GCN-NEXT: v_readlane_b32 s62, v41, 30
-; GCN-NEXT: v_readlane_b32 s61, v41, 29
-; GCN-NEXT: v_readlane_b32 s60, v41, 28
-; GCN-NEXT: v_readlane_b32 s59, v41, 27
-; GCN-NEXT: v_readlane_b32 s58, v41, 26
-; GCN-NEXT: v_readlane_b32 s57, v41, 25
-; GCN-NEXT: v_readlane_b32 s56, v41, 24
-; GCN-NEXT: v_readlane_b32 s55, v41, 23
-; GCN-NEXT: v_readlane_b32 s54, v41, 22
-; GCN-NEXT: v_readlane_b32 s53, v41, 21
-; GCN-NEXT: v_readlane_b32 s52, v41, 20
-; GCN-NEXT: v_readlane_b32 s51, v41, 19
-; GCN-NEXT: v_readlane_b32 s50, v41, 18
-; GCN-NEXT: v_readlane_b32 s49, v41, 17
-; GCN-NEXT: v_readlane_b32 s48, v41, 16
-; GCN-NEXT: v_readlane_b32 s47, v41, 15
-; GCN-NEXT: v_readlane_b32 s46, v41, 14
-; GCN-NEXT: v_readlane_b32 s45, v41, 13
-; GCN-NEXT: v_readlane_b32 s44, v41, 12
-; GCN-NEXT: v_readlane_b32 s43, v41, 11
-; GCN-NEXT: v_readlane_b32 s42, v41, 10
-; GCN-NEXT: v_readlane_b32 s41, v41, 9
-; GCN-NEXT: v_readlane_b32 s40, v41, 8
-; GCN-NEXT: v_readlane_b32 s39, v41, 7
-; GCN-NEXT: v_readlane_b32 s38, v41, 6
+; GCN-NEXT: v_readlane_b32 s63, v41, 15
+; GCN-NEXT: v_readlane_b32 s62, v41, 14
+; GCN-NEXT: v_readlane_b32 s53, v41, 13
+; GCN-NEXT: v_readlane_b32 s52, v41, 12
+; GCN-NEXT: v_readlane_b32 s51, v41, 11
+; GCN-NEXT: v_readlane_b32 s50, v41, 10
+; GCN-NEXT: v_readlane_b32 s49, v41, 9
+; GCN-NEXT: v_readlane_b32 s48, v41, 8
+; GCN-NEXT: v_readlane_b32 s47, v41, 7
+; GCN-NEXT: v_readlane_b32 s46, v41, 6
; GCN-NEXT: v_readlane_b32 s37, v41, 5
; GCN-NEXT: v_readlane_b32 s36, v41, 4
; GCN-NEXT: v_readlane_b32 s35, v41, 3
@@ -1150,32 +1054,16 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, ptr %fptr) {
; GISEL-NEXT: v_writelane_b32 v41, s35, 3
; GISEL-NEXT: v_writelane_b32 v41, s36, 4
; GISEL-NEXT: v_writelane_b32 v41, s37, 5
-; GISEL-NEXT: v_writelane_b32 v41, s38, 6
-; GISEL-NEXT: v_writelane_b32 v41, s39, 7
-; GISEL-NEXT: v_writelane_b32 v41, s40, 8
-; GISEL-NEXT: v_writelane_b32 v41, s41, 9
-; GISEL-NEXT: v_writelane_b32 v41, s42, 10
-; GISEL-NEXT: v_writelane_b32 v41, s43, 11
-; GISEL-NEXT: v_writelane_b32 v41, s44, 12
-; GISEL-NEXT: v_writelane_b32 v41, s45, 13
-; GISEL-NEXT: v_writelane_b32 v41, s46, 14
-; GISEL-NEXT: v_writelane_b32 v41, s47, 15
-; GISEL-NEXT: v_writelane_b32 v41, s48, 16
-; GISEL-NEXT: v_writelane_b32 v41, s49, 17
-; GISEL-NEXT: v_writelane_b32 v41, s50, 18
-; GISEL-NEXT: v_writelane_b32 v41, s51, 19
-; GISEL-NEXT: v_writelane_b32 v41, s52, 20
-; GISEL-NEXT: v_writelane_b32 v41, s53, 21
-; GISEL-NEXT: v_writelane_b32 v41, s54, 22
-; GISEL-NEXT: v_writelane_b32 v41, s55, 23
-; GISEL-NEXT: v_writelane_b32 v41, s56, 24
-; GISEL-NEXT: v_writelane_b32 v41, s57, 25
-; GISEL-NEXT: v_writelane_b32 v41, s58, 26
-; GISEL-NEXT: v_writelane_b32 v41, s59, 27
-; GISEL-NEXT: v_writelane_b32 v41, s60, 28
-; GISEL-NEXT: v_writelane_b32 v41, s61, 29
-; GISEL-NEXT: v_writelane_b32 v41, s62, 30
-; GISEL-NEXT: v_writelane_b32 v41, s63, 31
+; GISEL-NEXT: v_writelane_b32 v41, s46, 6
+; GISEL-NEXT: v_writelane_b32 v41, s47, 7
+; GISEL-NEXT: v_writelane_b32 v41, s48, 8
+; GISEL-NEXT: v_writelane_b32 v41, s49, 9
+; GISEL-NEXT: v_writelane_b32 v41, s50, 10
+; GISEL-NEXT: v_writelane_b32 v41, s51, 11
+; GISEL-NEXT: v_writelane_b32 v41, s52, 12
+; GISEL-NEXT: v_writelane_b32 v41, s53, 13
+; GISEL-NEXT: v_writelane_b32 v41, s62, 14
+; GISEL-NEXT: v_writelane_b32 v41, s63, 15
; GISEL-NEXT: v_mov_b32_e32 v40, v0
; GISEL-NEXT: s_mov_b64 s[4:5], exec
; GISEL-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1
@@ -1191,32 +1079,16 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, ptr %fptr) {
; GISEL-NEXT: ; %bb.2:
; GISEL-NEXT: s_mov_b64 exec, s[4:5]
; GISEL-NEXT: v_mov_b32_e32 v0, v40
-; GISEL-NEXT: v_readlane_b32 s63, v41, 31
-; GISEL-NEXT: v_readlane_b32 s62, v41, 30
-; GISEL-NEXT: v_readlane_b32 s61, v41, 29
-; GISEL-NEXT: v_readlane_b32 s60, v41, 28
-; GISEL-NEXT: v_readlane_b32 s59, v41, 27
-; GISEL-NEXT: v_readlane_b32 s58, v41, 26
-; GISEL-NEXT: v_readlane_b32 s57, v41, 25
-; GISEL-NEXT: v_readlane_b32 s56, v41, 24
-; GISEL-NEXT: v_readlane_b32 s55, v41, 23
-; GISEL-NEXT: v_readlane_b32 s54, v41, 22
-; GISEL-NEXT: v_readlane_b32 s53, v41, 21
-; GISEL-NEXT: v_readlane_b32 s52, v41, 20
-; GISEL-NEXT: v_readlane_b32 s51, v41, 19
-; GISEL-NEXT: v_readlane_b32 s50, v41, 18
-; GISEL-NEXT: v_readlane_b32 s49, v41, 17
-; GISEL-NEXT: v_readlane_b32 s48, v41, 16
-; GISEL-NEXT: v_readlane_b32 s47, v41, 15
-; GISEL-NEXT: v_readlane_b32 s46, v41, 14
-; GISEL-NEXT: v_readlane_b32 s45, v41, 13
-; GISEL-NEXT: v_readlane_b32 s44, v41, 12
-; GISEL-NEXT: v_readlane_b32 s43, v41, 11
-; GISEL-NEXT: v_readlane_b32 s42, v41, 10
-; GISEL-NEXT: v_readlane_b32 s41, v41, 9
-; GISEL-NEXT: v_readlane_b32 s40, v41, 8
-; GISEL-NEXT: v_readlane_b32 s39, v41, 7
-; GISEL-NEXT: v_readlane_b32 s38, v41, 6
+; GISEL-NEXT: v_readlane_b32 s63, v41, 15
+; GISEL-NEXT: v_readlane_b32 s62, v41, 14
+; GISEL-NEXT: v_readlane_b32 s53, v41, 13
+; GISEL-NEXT: v_readlane_b32 s52, v41, 12
+; GISEL-NEXT: v_readlane_b32 s51, v41, 11
+; GISEL-NEXT: v_readlane_b32 s50, v41, 10
+; GISEL-NEXT: v_readlane_b32 s49, v41, 9
+; GISEL-NEXT: v_readlane_b32 s48, v41, 8
+; GISEL-NEXT: v_readlane_b32 s47, v41, 7
+; GISEL-NEXT: v_readlane_b32 s46, v41, 6
; GISEL-NEXT: v_readlane_b32 s37, v41, 5
; GISEL-NEXT: v_readlane_b32 s36, v41, 4
; GISEL-NEXT: v_readlane_b32 s35, v41, 3
@@ -1255,32 +1127,16 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, ptr %fptr) {
; GCN-NEXT: v_writelane_b32 v40, s35, 3
; GCN-NEXT: v_writelane_b32 v40, s36, 4
; GCN-NEXT: v_writelane_b32 v40, s37, 5
-; GCN-NEXT: v_writelane_b32 v40, s38, 6
-; GCN-NEXT: v_writelane_b32 v40, s39, 7
-; GCN-NEXT: v_writelane_b32 v40, s40, 8
-; GCN-NEXT: v_writelane_b32 v40, s41, 9
-; GCN-NEXT: v_writelane_b32 v40, s42, 10
-; GCN-NEXT: v_writelane_b32 v40, s43, 11
-; GCN-NEXT: v_writelane_b32 v40, s44, 12
-; GCN-NEXT: v_writelane_b32 v40, s45, 13
-; GCN-NEXT: v_writelane_b32 v40, s46, 14
-; GCN-NEXT: v_writelane_b32 v40, s47, 15
-; GCN-NEXT: v_writelane_b32 v40, s48, 16
-; GCN-NEXT: v_writelane_b32 v40, s49, 17
-; GCN-NEXT: v_writelane_b32 v40, s50, 18
-; GCN-NEXT: v_writelane_b32 v40, s51, 19
-; GCN-NEXT: v_writelane_b32 v40, s52, 20
-; GCN-NEXT: v_writelane_b32 v40, s53, 21
-; GCN-NEXT: v_writelane_b32 v40, s54, 22
-; GCN-NEXT: v_writelane_b32 v40, s55, 23
-; GCN-NEXT: v_writelane_b32 v40, s56, 24
-; GCN-NEXT: v_writelane_b32 v40, s57, 25
-; GCN-NEXT: v_writelane_b32 v40, s58, 26
-; GCN-NEXT: v_writelane_b32 v40, s59, 27
-; GCN-NEXT: v_writelane_b32 v40, s60, 28
-; GCN-NEXT: v_writelane_b32 v40, s61, 29
-; GCN-NEXT: v_writelane_b32 v40, s62, 30
-; GCN-NEXT: v_writelane_b32 v40, s63, 31
+; GCN-NEXT: v_writelane_b32 v40, s46, 6
+; GCN-NEXT: v_writelane_b32 v40, s47, 7
+; GCN-NEXT: v_writelane_b32 v40, s48, 8
+; GCN-NEXT: v_writelane_b32 v40, s49, 9
+; GCN-NEXT: v_writelane_b32 v40, s50, 10
+; GCN-NEXT: v_writelane_b32 v40, s51, 11
+; GCN-NEXT: v_writelane_b32 v40, s52, 12
+; GCN-NEXT: v_writelane_b32 v40, s53, 13
+; GCN-NEXT: v_writelane_b32 v40, s62, 14
+; GCN-NEXT: v_writelane_b32 v40, s63, 15
; GCN-NEXT: s_mov_b64 s[4:5], exec
; GCN-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
; GCN-NEXT: v_readfirstlane_b32 s8, v1
@@ -1296,32 +1152,16 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, ptr %fptr) {
; GCN-NEXT: ; %bb.2:
; GCN-NEXT: s_mov_b64 exec, s[4:5]
; GCN-NEXT: v_mov_b32_e32 v0, v3
-; GCN-NEXT: v_readlane_b32 s63, v40, 31
-; GCN-NEXT: v_readlane_b32 s62, v40, 30
-; GCN-NEXT: v_readlane_b32 s61, v40, 29
-; GCN-NEXT: v_readlane_b32 s60, v40, 28
-; GCN-NEXT: v_readlane_b32 s59, v40, 27
-; GCN-NEXT: v_readlane_b32 s58, v40, 26
-; GCN-NEXT: v_readlane_b32 s57, v40, 25
-; GCN-NEXT: v_readlane_b32 s56, v40, 24
-; GCN-NEXT: v_readlane_b32 s55, v40, 23
-; GCN-NEXT: v_readlane_b32 s54, v40, 22
-; GCN-NEXT: v_readlane_b32 s53, v40, 21
-; GCN-NEXT: v_readlane_b32 s52, v40, 20
-; GCN-NEXT: v_readlane_b32 s51, v40, 19
-; GCN-NEXT: v_readlane_b32 s50, v40, 18
-; GCN-NEXT: v_readlane_b32 s49, v40, 17
-; GCN-NEXT: v_readlane_b32 s48, v40, 16
-; GCN-NEXT: v_readlane_b32 s47, v40, 15
-; GCN-NEXT: v_readlane_b32 s46, v40, 14
-; GCN-NEXT: v_readlane_b32 s45, v40, 13
-; GCN-NEXT: v_readlane_b32 s44, v40, 12
-; GCN-NEXT: v_readlane_b32 s43, v40, 11
-; GCN-NEXT: v_readlane_b32 s42, v40, 10
-; GCN-NEXT: v_readlane_b32 s41, v40, 9
-; GCN-NEXT: v_readlane_b32 s40, v40, 8
-; GCN-NEXT: v_readlane_b32 s39, v40, 7
-; GCN-NEXT: v_readlane_b32 s38, v40, 6
+; GCN-NEXT: v_readlane_b32 s63, v40, 15
+; GCN-NEXT: v_readlane_b32 s62, v40, 14
+; GCN-NEXT: v_readlane_b32 s53, v40, 13
+; GCN-NEXT: v_readlane_b32 s52, v40, 12
+; GCN-NEXT: v_readlane_b32 s51, v40, 11
+; GCN-NEXT: v_readlane_b32 s50, v40, 10
+; GCN-NEXT: v_readlane_b32 s49, v40, 9
+; GCN-NEXT: v_readlane_b32 s48, v40, 8
+; GCN-NEXT: v_readlane_b32 s47, v40, 7
+; GCN-NEXT: v_readlane_b32 s46, v40, 6
; GCN-NEXT: v_readlane_b32 s37, v40, 5
; GCN-NEXT: v_readlane_b32 s36, v40, 4
; GCN-NEXT: v_readlane_b32 s35, v40, 3
@@ -1351,32 +1191,16 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, ptr %fptr) {
; GISEL-NEXT: v_writelane_b32 v40, s35, 3
; GISEL-NEXT: v_writelane_b32 v40, s36, 4
; GISEL-NEXT: v_writelane_b32 v40, s37, 5
-; GISEL-NEXT: v_writelane_b32 v40, s38, 6
-; GISEL-NEXT: v_writelane_b32 v40, s39, 7
-; GISEL-NEXT: v_writelane_b32 v40, s40, 8
-; GISEL-NEXT: v_writelane_b32 v40, s41, 9
-; GISEL-NEXT: v_writelane_b32 v40, s42, 10
-; GISEL-NEXT: v_writelane_b32 v40, s43, 11
-; GISEL-NEXT: v_writelane_b32 v40, s44, 12
-; GISEL-NEXT: v_writelane_b32 v40, s45, 13
-; GISEL-NEXT: v_writelane_b32 v40, s46, 14
-; GISEL-NEXT: v_writelane_b32 v40, s47, 15
-; GISEL-NEXT: v_writelane_b32 v40, s48, 16
-; GISEL-NEXT: v_writelane_b32 v40, s49, 17
-; GISEL-NEXT: v_writelane_b32 v40, s50, 18
-; GISEL-NEXT: v_writelane_b32 v40, s51, 19
-; GISEL-NEXT: v_writelane_b32 v40, s52, 20
-; GISEL-NEXT: v_writelane_b32 v40, s53, 21
-; GISEL-NEXT: v_writelane_b32 v40, s54, 22
-; GISEL-NEXT: v_writelane_b32 v40, s55, 23
-; GISEL-NEXT: v_writelane_b32 v40, s56, 24
-; GISEL-NEXT: v_writelane_b32 v40, s57, 25
-; GISEL-NEXT: v_writelane_b32 v40, s58, 26
-; GISEL-NEXT: v_writelane_b32 v40, s59, 27
-; GISEL-NEXT: v_writelane_b32 v40, s60, 28
-; GISEL-NEXT: v_writelane_b32 v40, s61, 29
-; GISEL-NEXT: v_writelane_b32 v40, s62, 30
-; GISEL-NEXT: v_writelane_b32 v40, s63, 31
+; GISEL-NEXT: v_writelane_b32 v40, s46, 6
+; GISEL-NEXT: v_writelane_b32 v40, s47, 7
+; GISEL-NEXT: v_writelane_b32 v40, s48, 8
+; GISEL-NEXT: v_writelane_b32 v40, s49, 9
+; GISEL-NEXT: v_writelane_b32 v40, s50, 10
+; GISEL-NEXT: v_writelane_b32 v40, s51, 11
+; GISEL-NEXT: v_writelane_b32 v40, s52, 12
+; GISEL-NEXT: v_writelane_b32 v40, s53, 13
+; GISEL-NEXT: v_writelane_b32 v40, s62, 14
+; GISEL-NEXT: v_writelane_b32 v40, s63, 15
; GISEL-NEXT: s_mov_b64 s[4:5], exec
; GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
; GISEL-NEXT: v_readfirstlane_b32 s8, v1
@@ -1392,32 +1216,16 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, ptr %fptr) {
; GISEL-NEXT: ; %bb.2:
; GISEL-NEXT: s_mov_b64 exec, s[4:5]
; GISEL-NEXT: v_mov_b32_e32 v0, v2
-; GISEL-NEXT: v_readlane_b32 s63, v40, 31
-; GISEL-NEXT: v_readlane_b32 s62, v40, 30
-; GISEL-NEXT: v_readlane_b32 s61, v40, 29
-; GISEL-NEXT: v_readlane_b32 s60, v40, 28
-; GISEL-NEXT: v_readlane_b32 s59, v40, 27
-; GISEL-NEXT: v_readlane_b32 s58, v40, 26
-; GISEL-NEXT: v_readlane_b32 s57, v40, 25
-; GISEL-NEXT: v_readlane_b32 s56, v40, 24
-; GISEL-NEXT: v_readlane_b32 s55, v40, 23
-; GISEL-NEXT: v_readlane_b32 s54, v40, 22
-; GISEL-NEXT: v_readlane_b32 s53, v40, 21
-; GISEL-NEXT: v_readlane_b32 s52, v40, 20
-; GISEL-NEXT: v_readlane_b32 s51, v40, 19
-; GISEL-NEXT: v_readlane_b32 s50, v40, 18
-; GISEL-NEXT: v_readlane_b32 s49, v40, 17
-; GISEL-NEXT: v_readlane_b32 s48, v40, 16
-; GISEL-NEXT: v_readlane_b32 s47, v40, 15
-; GISEL-NEXT: v_readlane_b32 s46, v40, 14
-; GISEL-NEXT: v_readlane_b32 s45, v40, 13
-; GISEL-NEXT: v_readlane_b32 s44, v40, 12
-; GISEL-NEXT: v_readlane_b32 s43, v40, 11
-; GISEL-NEXT: v_readlane_b32 s42, v40, 10
-; GISEL-NEXT: v_readlane_b32 s41, v40, 9
-; GISEL-NEXT: v_readlane_b32 s40, v40, 8
-; GISEL-NEXT: v_readlane_b32 s39, v40, 7
-; GISEL-NEXT: v_readlane_b32 s38, v40, 6
+; GISEL-NEXT: v_readlane_b32 s63, v40, 15
+; GISEL-NEXT: v_readlane_b32 s62, v40, 14
+; GISEL-NEXT: v_readlane_b32 s53, v40, 13
+; GISEL-NEXT: v_readlane_b32 s52, v40, 12
+; GISEL-NEXT: v_readlane_b32 s51, v40, 11
+; GISEL-NEXT: v_readlane_b32 s50, v40, 10
+; GISEL-NEXT: v_readlane_b32 s49, v40, 9
+; GISEL-NEXT: v_readlane_b32 s48, v40, 8
+; GISEL-NEXT: v_readlane_b32 s47, v40, 7
+; GISEL-NEXT: v_readlane_b32 s46, v40, 6
; GISEL-NEXT: v_readlane_b32 s37, v40, 5
; GISEL-NEXT: v_readlane_b32 s36, v40, 4
; GISEL-NEXT: v_readlane_b32 s35, v40, 3
@@ -1452,32 +1260,16 @@ define void @test_indirect_tail_call_vgpr_ptr(ptr %fptr) {
; GCN-NEXT: v_writelane_b32 v40, s35, 3
; GCN-NEXT: v_writelane_b32 v40, s36, 4
; GCN-NEXT: v_writelane_b32 v40, s37, 5
-; GCN-NEXT: v_writelane_b32 v40, s38, 6
-; GCN-NEXT: v_writelane_b32 v40, s39, 7
-; GCN-NEXT: v_writelane_b32 v40, s40, 8
-; GCN-NEXT: v_writelane_b32 v40, s41, 9
-; GCN-NEXT: v_writelane_b32 v40, s42, 10
-; GCN-NEXT: v_writelane_b32 v40, s43, 11
-; GCN-NEXT: v_writelane_b32 v40, s44, 12
-; GCN-NEXT: v_writelane_b32 v40, s45, 13
-; GCN-NEXT: v_writelane_b32 v40, s46, 14
-; GCN-NEXT: v_writelane_b32 v40, s47, 15
-; GCN-NEXT: v_writelane_b32 v40, s48, 16
-; GCN-NEXT: v_writelane_b32 v40, s49, 17
-; GCN-NEXT: v_writelane_b32 v40, s50, 18
-; GCN-NEXT: v_writelane_b32 v40, s51, 19
-; GCN-NEXT: v_writelane_b32 v40, s52, 20
-; GCN-NEXT: v_writelane_b32 v40, s53, 21
-; GCN-NEXT: v_writelane_b32 v40, s54, 22
-; GCN-NEXT: v_writelane_b32 v40, s55, 23
-; GCN-NEXT: v_writelane_b32 v40, s56, 24
-; GCN-NEXT: v_writelane_b32 v40, s57, 25
-; GCN-NEXT: v_writelane_b32 v40, s58, 26
-; GCN-NEXT: v_writelane_b32 v40, s59, 27
-; GCN-NEXT: v_writelane_b32 v40, s60, 28
-; GCN-NEXT: v_writelane_b32 v40, s61, 29
-; GCN-NEXT: v_writelane_b32 v40, s62, 30
-; GCN-NEXT: v_writelane_b32 v40, s63, 31
+; GCN-NEXT: v_writelane_b32 v40, s46, 6
+; GCN-NEXT: v_writelane_b32 v40, s47, 7
+; GCN-NEXT: v_writelane_b32 v40, s48, 8
+; GCN-NEXT: v_writelane_b32 v40, s49, 9
+; GCN-NEXT: v_writelane_b32 v40, s50, 10
+; GCN-NEXT: v_writelane_b32 v40, s51, 11
+; GCN-NEXT: v_writelane_b32 v40, s52, 12
+; GCN-NEXT: v_writelane_b32 v40, s53, 13
+; GCN-NEXT: v_writelane_b32 v40, s62, 14
+; GCN-NEXT: v_writelane_b32 v40, s63, 15
; GCN-NEXT: s_mov_b64 s[4:5], exec
; GCN-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1
; GCN-NEXT: v_readfirstlane_b32 s6, v0
@@ -1490,32 +1282,16 @@ define void @test_indirect_tail_call_vgpr_ptr(ptr %fptr) {
; GCN-NEXT: s_cbranch_execnz .LBB9_1
; GCN-NEXT: ; %bb.2:
; GCN-NEXT: s_mov_b64 exec, s[4:5]
-; GCN-NEXT: v_readlane_b32 s63, v40, 31
-; GCN-NEXT: v_readlane_b32 s62, v40, 30
-; GCN-NEXT: v_readlane_b32 s61, v40, 29
-; GCN-NEXT: v_readlane_b32 s60, v40, 28
-; GCN-NEXT: v_readlane_b32 s59, v40, 27
-; GCN-NEXT: v_readlane_b32 s58, v40, 26
-; GCN-NEXT: v_readlane_b32 s57, v40, 25
-; GCN-NEXT: v_readlane_b32 s56, v40, 24
-; GCN-NEXT: v_readlane_b32 s55, v40, 23
-; GCN-NEXT: v_readlane_b32 s54, v40, 22
-; GCN-NEXT: v_readlane_b32 s53, v40, 21
-; GCN-NEXT: v_readlane_b32 s52, v40, 20
-; GCN-NEXT: v_readlane_b32 s51, v40, 19
-; GCN-NEXT: v_readlane_b32 s50, v40, 18
-; GCN-NEXT: v_readlane_b32 s49, v40, 17
-; GCN-NEXT: v_readlane_b32 s48, v40, 16
-; GCN-NEXT: v_readlane_b32 s47, v40, 15
-; GCN-NEXT: v_readlane_b32 s46, v40, 14
-; GCN-NEXT: v_readlane_b32 s45, v40, 13
-; GCN-NEXT: v_readlane_b32 s44, v40, 12
-; GCN-NEXT: v_readlane_b32 s43, v40, 11
-; GCN-NEXT: v_readlane_b32 s42, v40, 10
-; GCN-NEXT: v_readlane_b32 s41, v40, 9
-; GCN-NEXT: v_readlane_b32 s40, v40, 8
-; GCN-NEXT: v_readlane_b32 s39, v40, 7
-; GCN-NEXT: v_readlane_b32 s38, v40, 6
+; GCN-NEXT: v_readlane_b32 s63, v40, 15
+; GCN-NEXT: v_readlane_b32 s62, v40, 14
+; GCN-NEXT: v_readlane_b32 s53, v40, 13
+; GCN-NEXT: v_readlane_b32 s52, v40, 12
+; GCN-NEXT: v_readlane_b32 s51, v40, 11
+; GCN-NEXT: v_readlane_b32 s50, v40, 10
+; GCN-NEXT: v_readlane_b32 s49, v40, 9
+; GCN-NEXT: v_readlane_b32 s48, v40, 8
+; GCN-NEXT: v_readlane_b32 s47, v40, 7
+; GCN-NEXT: v_readlane_b32 s46, v40, 6
; GCN-NEXT: v_readlane_b32 s37, v40, 5
; GCN-NEXT: v_readlane_b32 s36, v40, 4
; GCN-NEXT: v_readlane_b32 s35, v40, 3
@@ -1545,32 +1321,16 @@ define void @test_indirect_tail_call_vgpr_ptr(ptr %fptr) {
; GISEL-NEXT: v_writelane_b32 v40, s35, 3
; GISEL-NEXT: v_writelane_b32 v40, s36, 4
; GISEL-NEXT: v_writelane_b32 v40, s37, 5
-; GISEL-NEXT: v_writelane_b32 v40, s38, 6
-; GISEL-NEXT: v_writelane_b32 v40, s39, 7
-; GISEL-NEXT: v_writelane_b32 v40, s40, 8
-; GISEL-NEXT: v_writelane_b32 v40, s41, 9
-; GISEL-NEXT: v_writelane_b32 v40, s42, 10
-; GISEL-NEXT: v_writelane_b32 v40, s43, 11
-; GISEL-NEXT: v_writelane_b32 v40, s44, 12
-; GISEL-NEXT: v_writelane_b32 v40, s45, 13
-; GISEL-NEXT: v_writelane_b32 v40, s46, 14
-; GISEL-NEXT: v_writelane_b32 v40, s47, 15
-; GISEL-NEXT: v_writelane_b32 v40, s48, 16
-; GISEL-NEXT: v_writelane_b32 v40, s49, 17
-; GISEL-NEXT: v_writelane_b32 v40, s50, 18
-; GISEL-NEXT: v_writelane_b32 v40, s51, 19
-; GISEL-NEXT: v_writelane_b32 v40, s52, 20
-; GISEL-NEXT: v_writelane_b32 v40, s53, 21
-; GISEL-NEXT: v_writelane_b32 v40, s54, 22
-; GISEL-NEXT: v_writelane_b32 v40, s55, 23
-; GISEL-NEXT: v_writelane_b32 v40, s56, 24
-; GISEL-NEXT: v_writelane_b32 v40, s57, 25
-; GISEL-NEXT: v_writelane_b32 v40, s58, 26
-; GISEL-NEXT: v_writelane_b32 v40, s59, 27
-; GISEL-NEXT: v_writelane_b32 v40, s60, 28
-; GISEL-NEXT: v_writelane_b32 v40, s61, 29
-; GISEL-NEXT: v_writelane_b32 v40, s62, 30
-; GISEL-NEXT: v_writelane_b32 v40, s63, 31
+; GISEL-NEXT: v_writelane_b32 v40, s46, 6
+; GISEL-NEXT: v_writelane_b32 v40, s47, 7
+; GISEL-NEXT: v_writelane_b32 v40, s48, 8
+; GISEL-NEXT: v_writelane_b32 v40, s49, 9
+; GISEL-NEXT: v_writelane_b32 v40, s50, 10
+; GISEL-NEXT: v_writelane_b32 v40, s51, 11
+; GISEL-NEXT: v_writelane_b32 v40, s52, 12
+; GISEL-NEXT: v_writelane_b32 v40, s53, 13
+; GISEL-NEXT: v_writelane_b32 v40, s62, 14
+; GISEL-NEXT: v_writelane_b32 v40, s63, 15
; GISEL-NEXT: s_mov_b64 s[4:5], exec
; GISEL-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1
; GISEL-NEXT: v_readfirstlane_b32 s6, v0
@@ -1583,32 +1343,16 @@ define void @test_indirect_tail_call_vgpr_ptr(ptr %fptr) {
; GISEL-NEXT: s_cbranch_execnz .LBB9_1
; GISEL-NEXT: ; %bb.2:
; GISEL-NEXT: s_mov_b64 exec, s[4:5]
-; GISEL-NEXT: v_readlane_b32 s63, v40, 31
-; GISEL-NEXT: v_readlane_b32 s62, v40, 30
-; GISEL-NEXT: v_readlane_b32 s61, v40, 29
-; GISEL-NEXT: v_readlane_b32 s60, v40, 28
-; GISEL-NEXT: v_readlane_b32 s59, v40, 27
-; GISEL-NEXT: v_readlane_b32 s58, v40, 26
-; GISEL-NEXT: v_readlane_b32 s57, v40, 25
-; GISEL-NEXT: v_readlane_b32 s56, v40, 24
-; GISEL-NEXT: v_readlane_b32 s55, v40, 23
-; GISEL-NEXT: v_readlane_b32 s54, v40, 22
-; GISEL-NEXT: v_readlane_b32 s53, v40, 21
-; GISEL-NEXT: v_readlane_b32 s52, v40, 20
-; GISEL-NEXT: v_readlane_b32 s51, v40, 19
-; GISEL-NEXT: v_readlane_b32 s50, v40, 18
-; GISEL-NEXT: v_readlane_b32 s49, v40, 17
-; GISEL-NEXT: v_readlane_b32 s48, v40, 16
-; GISEL-NEXT: v_readlane_b32 s47, v40, 15
-; GISEL-NEXT: v_readlane_b32 s46, v40, 14
-; GISEL-NEXT: v_readlane_b32 s45, v40, 13
-; GISEL-NEXT: v_readlane_b32 s44, v40, 12
-; GISEL-NEXT: v_readlane_b32 s43, v40, 11
-; GISEL-NEXT: v_readlane_b32 s42, v40, 10
-; GISEL-NEXT: v_readlane_b32 s41, v40, 9
-; GISEL-NEXT: v_readlane_b32 s40, v40, 8
-; GISEL-NEXT: v_readlane_b32 s39, v40, 7
-; GISEL-NEXT: v_readlane_b32 s38, v40, 6
+; GISEL-NEXT: v_readlane_b32 s63, v40, 15
+; GISEL-NEXT: v_readlane_b32 s62, v40, 14
+; GISEL-NEXT: v_readlane_b32 s53, v40, 13
+; GISEL-NEXT: v_readlane_b32 s52, v40, 12
+; GISEL-NEXT: v_readlane_b32 s51, v40, 11
+; GISEL-NEXT: v_readlane_b32 s50, v40, 10
+; GISEL-NEXT: v_readlane_b32 s49, v40, 9
+; GISEL-NEXT: v_readlane_b32 s48, v40, 8
+; GISEL-NEXT: v_readlane_b32 s47, v40, 7
+; GISEL-NEXT: v_readlane_b32 s46, v40, 6
; GISEL-NEXT: v_readlane_b32 s37, v40, 5
; GISEL-NEXT: v_readlane_b32 s36, v40, 4
; GISEL-NEXT: v_readlane_b32 s35, v40, 3
diff --git a/llvm/test/CodeGen/AMDGPU/issue48473.mir b/llvm/test/CodeGen/AMDGPU/issue48473.mir
index e272bd3480383..55de5dd133700 100644
--- a/llvm/test/CodeGen/AMDGPU/issue48473.mir
+++ b/llvm/test/CodeGen/AMDGPU/issue48473.mir
@@ -43,7 +43,7 @@
# %25 to $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67
# CHECK-LABEL: name: issue48473
-# CHECK: S_NOP 0, implicit killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed renamable $sgpr12_sgpr13_sgpr14_sgpr15, implicit killed renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, implicit killed renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit killed renamable $sgpr84_sgpr85_sgpr86_sgpr87, implicit killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, implicit killed renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, implicit killed renamable $sgpr88_sgpr89_sgpr90_sgpr91, implicit killed renamable $sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83, implicit killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59, implicit killed renamable $sgpr92_sgpr93_sgpr94_sgpr95, implicit killed renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit killed renamable $sgpr96_sgpr97_sgpr98_sgpr99, implicit killed renamable $sgpr8_sgpr9_sgpr10_sgpr11, implicit killed renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67
+# CHECK: S_NOP 0, implicit killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed renamable $sgpr12_sgpr13_sgpr14_sgpr15, implicit killed renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, implicit killed renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, implicit killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43, implicit killed renamable $sgpr88_sgpr89_sgpr90_sgpr91, implicit killed renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, implicit killed renamable $sgpr84_sgpr85_sgpr86_sgpr87, implicit killed renamable $sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83, implicit killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59, implicit killed renamable $sgpr92_sgpr93_sgpr94_sgpr95, implicit killed renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit killed renamable $sgpr96_sgpr97_sgpr98_sgpr99, implicit killed renamable $sgpr8_sgpr9_sgpr10_sgpr11, implicit killed renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67
---
name: issue48473
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll
index dbe95a8091932..4fd9fc95b8532 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll
@@ -113,20 +113,20 @@ exit:
define amdgpu_ps void @test_call(ptr addrspace(1) inreg %ptr) {
; GFX9-SDAG-LABEL: test_call:
; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-SDAG-NEXT: s_mov_b32 s38, -1
-; GFX9-SDAG-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-SDAG-NEXT: s_add_u32 s36, s36, s2
-; GFX9-SDAG-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-SDAG-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-SDAG-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-SDAG-NEXT: s_mov_b32 s50, -1
+; GFX9-SDAG-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-SDAG-NEXT: s_add_u32 s48, s48, s2
+; GFX9-SDAG-NEXT: s_addc_u32 s49, s49, 0
; GFX9-SDAG-NEXT: s_getpc_b64 s[0:1]
; GFX9-SDAG-NEXT: s_add_u32 s0, s0, foo at gotpcrel32@lo+4
; GFX9-SDAG-NEXT: s_addc_u32 s1, s1, foo at gotpcrel32@hi+12
; GFX9-SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
; GFX9-SDAG-NEXT: s_mov_b32 s6, src_pops_exiting_wave_id
-; GFX9-SDAG-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-SDAG-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-SDAG-NEXT: s_mov_b64 s[8:9], 36
-; GFX9-SDAG-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-SDAG-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s6
; GFX9-SDAG-NEXT: s_mov_b32 s32, 0
; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
@@ -135,20 +135,20 @@ define amdgpu_ps void @test_call(ptr addrspace(1) inreg %ptr) {
;
; GFX9-GISEL-LABEL: test_call:
; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-GISEL-NEXT: s_mov_b32 s38, -1
-; GFX9-GISEL-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-GISEL-NEXT: s_add_u32 s36, s36, s2
-; GFX9-GISEL-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-GISEL-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-GISEL-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-GISEL-NEXT: s_mov_b32 s50, -1
+; GFX9-GISEL-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-GISEL-NEXT: s_add_u32 s48, s48, s2
+; GFX9-GISEL-NEXT: s_addc_u32 s49, s49, 0
; GFX9-GISEL-NEXT: s_getpc_b64 s[0:1]
; GFX9-GISEL-NEXT: s_add_u32 s0, s0, foo at gotpcrel32@lo+4
; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, foo at gotpcrel32@hi+12
; GFX9-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
; GFX9-GISEL-NEXT: s_mov_b32 s2, src_pops_exiting_wave_id
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX9-GISEL-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX9-GISEL-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-GISEL-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-GISEL-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-GISEL-NEXT: s_mov_b64 s[8:9], 36
; GFX9-GISEL-NEXT: s_mov_b32 s32, 0
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -157,12 +157,12 @@ define amdgpu_ps void @test_call(ptr addrspace(1) inreg %ptr) {
;
; GFX10-LABEL: test_call:
; GFX10: ; %bb.0:
-; GFX10-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX10-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX10-NEXT: s_mov_b32 s38, -1
-; GFX10-NEXT: s_mov_b32 s39, 0x31c16000
-; GFX10-NEXT: s_add_u32 s36, s36, s2
-; GFX10-NEXT: s_addc_u32 s37, s37, 0
+; GFX10-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX10-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX10-NEXT: s_mov_b32 s50, -1
+; GFX10-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX10-NEXT: s_add_u32 s48, s48, s2
+; GFX10-NEXT: s_addc_u32 s49, s49, 0
; GFX10-NEXT: s_getpc_b64 s[0:1]
; GFX10-NEXT: s_add_u32 s0, s0, foo at gotpcrel32@lo+4
; GFX10-NEXT: s_addc_u32 s1, s1, foo at gotpcrel32@hi+12
@@ -171,8 +171,8 @@ define amdgpu_ps void @test_call(ptr addrspace(1) inreg %ptr) {
; GFX10-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
; GFX10-NEXT: s_mov_b32 s32, 0
; GFX10-NEXT: v_mov_b32_e32 v0, s0
-; GFX10-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX10-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX10-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX10-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX10-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll
index 584dd2700c419..c6a412a9f88b0 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll
@@ -1727,14 +1727,9 @@ define <16 x float> @v_maximum_v16f32(<16 x float> %src0, <16 x float> %src1) {
; GFX7-LABEL: v_maximum_v16f32:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX7-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
-; GFX7-NEXT: s_mov_b64 exec, s[4:5]
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v17
; GFX7-NEXT: v_max_f32_e32 v1, v1, v17
; GFX7-NEXT: buffer_load_dword v17, off, s[0:3], s32
-; GFX7-NEXT: v_writelane_b32 v31, s30, 0
-; GFX7-NEXT: v_writelane_b32 v31, s31, 1
; GFX7-NEXT: v_cmp_o_f32_e64 s[4:5], v2, v18
; GFX7-NEXT: v_max_f32_e32 v2, v2, v18
; GFX7-NEXT: v_cmp_o_f32_e64 s[6:7], v3, v19
@@ -1743,7 +1738,7 @@ define <16 x float> @v_maximum_v16f32(<16 x float> %src0, <16 x float> %src1) {
; GFX7-NEXT: v_max_f32_e32 v19, v0, v16
; GFX7-NEXT: v_cmp_o_f32_e64 s[28:29], v0, v16
; GFX7-NEXT: v_max_f32_e32 v16, v14, v30
-; GFX7-NEXT: v_cmp_o_f32_e64 s[30:31], v14, v30
+; GFX7-NEXT: v_cmp_o_f32_e64 s[38:39], v14, v30
; GFX7-NEXT: v_cmp_o_f32_e64 s[8:9], v4, v20
; GFX7-NEXT: v_max_f32_e32 v4, v4, v20
; GFX7-NEXT: v_cmp_o_f32_e64 s[10:11], v5, v21
@@ -1765,7 +1760,7 @@ define <16 x float> @v_maximum_v16f32(<16 x float> %src0, <16 x float> %src1) {
; GFX7-NEXT: v_cmp_o_f32_e64 s[26:27], v13, v29
; GFX7-NEXT: v_max_f32_e32 v13, v13, v29
; GFX7-NEXT: v_cndmask_b32_e32 v1, v18, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v14, v18, v16, s[30:31]
+; GFX7-NEXT: v_cndmask_b32_e64 v14, v18, v16, s[38:39]
; GFX7-NEXT: v_cndmask_b32_e64 v0, v18, v19, s[28:29]
; GFX7-NEXT: v_cndmask_b32_e64 v2, v18, v2, s[4:5]
; GFX7-NEXT: v_cndmask_b32_e64 v3, v18, v3, s[6:7]
@@ -1779,29 +1774,18 @@ define <16 x float> @v_maximum_v16f32(<16 x float> %src0, <16 x float> %src1) {
; GFX7-NEXT: v_cndmask_b32_e64 v11, v18, v11, s[22:23]
; GFX7-NEXT: v_cndmask_b32_e64 v12, v18, v12, s[24:25]
; GFX7-NEXT: v_cndmask_b32_e64 v13, v18, v13, s[26:27]
-; GFX7-NEXT: v_readlane_b32 s31, v31, 1
-; GFX7-NEXT: v_readlane_b32 s30, v31, 0
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_max_f32_e32 v16, v15, v17
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v15, v17
; GFX7-NEXT: v_cndmask_b32_e32 v15, v18, v16, vcc
-; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
-; GFX7-NEXT: s_mov_b64 exec, s[4:5]
-; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v16f32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX8-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
-; GFX8-NEXT: s_mov_b64 exec, s[4:5]
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v17
; GFX8-NEXT: v_max_f32_e32 v1, v1, v17
; GFX8-NEXT: buffer_load_dword v17, off, s[0:3], s32
-; GFX8-NEXT: v_writelane_b32 v31, s30, 0
-; GFX8-NEXT: v_writelane_b32 v31, s31, 1
; GFX8-NEXT: v_cmp_o_f32_e64 s[4:5], v2, v18
; GFX8-NEXT: v_max_f32_e32 v2, v2, v18
; GFX8-NEXT: v_cmp_o_f32_e64 s[6:7], v3, v19
@@ -1810,7 +1794,7 @@ define <16 x float> @v_maximum_v16f32(<16 x float> %src0, <16 x float> %src1) {
; GFX8-NEXT: v_max_f32_e32 v19, v0, v16
; GFX8-NEXT: v_cmp_o_f32_e64 s[28:29], v0, v16
; GFX8-NEXT: v_max_f32_e32 v16, v14, v30
-; GFX8-NEXT: v_cmp_o_f32_e64 s[30:31], v14, v30
+; GFX8-NEXT: v_cmp_o_f32_e64 s[38:39], v14, v30
; GFX8-NEXT: v_cmp_o_f32_e64 s[8:9], v4, v20
; GFX8-NEXT: v_max_f32_e32 v4, v4, v20
; GFX8-NEXT: v_cmp_o_f32_e64 s[10:11], v5, v21
@@ -1832,7 +1816,7 @@ define <16 x float> @v_maximum_v16f32(<16 x float> %src0, <16 x float> %src1) {
; GFX8-NEXT: v_cmp_o_f32_e64 s[26:27], v13, v29
; GFX8-NEXT: v_max_f32_e32 v13, v13, v29
; GFX8-NEXT: v_cndmask_b32_e32 v1, v18, v1, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v14, v18, v16, s[30:31]
+; GFX8-NEXT: v_cndmask_b32_e64 v14, v18, v16, s[38:39]
; GFX8-NEXT: v_cndmask_b32_e64 v0, v18, v19, s[28:29]
; GFX8-NEXT: v_cndmask_b32_e64 v2, v18, v2, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e64 v3, v18, v3, s[6:7]
@@ -1846,29 +1830,18 @@ define <16 x float> @v_maximum_v16f32(<16 x float> %src0, <16 x float> %src1) {
; GFX8-NEXT: v_cndmask_b32_e64 v11, v18, v11, s[22:23]
; GFX8-NEXT: v_cndmask_b32_e64 v12, v18, v12, s[24:25]
; GFX8-NEXT: v_cndmask_b32_e64 v13, v18, v13, s[26:27]
-; GFX8-NEXT: v_readlane_b32 s31, v31, 1
-; GFX8-NEXT: v_readlane_b32 s30, v31, 0
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_max_f32_e32 v16, v15, v17
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v15, v17
; GFX8-NEXT: v_cndmask_b32_e32 v15, v18, v16, vcc
-; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
-; GFX8-NEXT: s_mov_b64 exec, s[4:5]
-; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-LABEL: v_maximum_v16f32:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX900-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
-; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v1, v17
; GFX900-NEXT: v_max_f32_e32 v1, v1, v17
; GFX900-NEXT: buffer_load_dword v17, off, s[0:3], s32
-; GFX900-NEXT: v_writelane_b32 v31, s30, 0
-; GFX900-NEXT: v_writelane_b32 v31, s31, 1
; GFX900-NEXT: v_cmp_o_f32_e64 s[4:5], v2, v18
; GFX900-NEXT: v_max_f32_e32 v2, v2, v18
; GFX900-NEXT: v_cmp_o_f32_e64 s[6:7], v3, v19
@@ -1877,7 +1850,7 @@ define <16 x float> @v_maximum_v16f32(<16 x float> %src0, <16 x float> %src1) {
; GFX900-NEXT: v_max_f32_e32 v19, v0, v16
; GFX900-NEXT: v_cmp_o_f32_e64 s[28:29], v0, v16
; GFX900-NEXT: v_max_f32_e32 v16, v14, v30
-; GFX900-NEXT: v_cmp_o_f32_e64 s[30:31], v14, v30
+; GFX900-NEXT: v_cmp_o_f32_e64 s[38:39], v14, v30
; GFX900-NEXT: v_cmp_o_f32_e64 s[8:9], v4, v20
; GFX900-NEXT: v_max_f32_e32 v4, v4, v20
; GFX900-NEXT: v_cmp_o_f32_e64 s[10:11], v5, v21
@@ -1899,7 +1872,7 @@ define <16 x float> @v_maximum_v16f32(<16 x float> %src0, <16 x float> %src1) {
; GFX900-NEXT: v_cmp_o_f32_e64 s[26:27], v13, v29
; GFX900-NEXT: v_max_f32_e32 v13, v13, v29
; GFX900-NEXT: v_cndmask_b32_e32 v1, v18, v1, vcc
-; GFX900-NEXT: v_cndmask_b32_e64 v14, v18, v16, s[30:31]
+; GFX900-NEXT: v_cndmask_b32_e64 v14, v18, v16, s[38:39]
; GFX900-NEXT: v_cndmask_b32_e64 v0, v18, v19, s[28:29]
; GFX900-NEXT: v_cndmask_b32_e64 v2, v18, v2, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e64 v3, v18, v3, s[6:7]
@@ -1913,16 +1886,10 @@ define <16 x float> @v_maximum_v16f32(<16 x float> %src0, <16 x float> %src1) {
; GFX900-NEXT: v_cndmask_b32_e64 v11, v18, v11, s[22:23]
; GFX900-NEXT: v_cndmask_b32_e64 v12, v18, v12, s[24:25]
; GFX900-NEXT: v_cndmask_b32_e64 v13, v18, v13, s[26:27]
-; GFX900-NEXT: v_readlane_b32 s31, v31, 1
-; GFX900-NEXT: v_readlane_b32 s30, v31, 0
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: v_max_f32_e32 v16, v15, v17
; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v15, v17
; GFX900-NEXT: v_cndmask_b32_e32 v15, v18, v16, vcc
-; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
-; GFX900-NEXT: s_mov_b64 exec, s[4:5]
-; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_maximum_v16f32:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll
index e354ec6fb3dd7..f7ce72efa4373 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll
@@ -2008,15 +2008,8 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1)
; GFX7-LABEL: v_maximum_v16f64:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX7-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
-; GFX7-NEXT: s_mov_b64 exec, s[4:5]
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4
-; GFX7-NEXT: v_writelane_b32 v34, s30, 0
-; GFX7-NEXT: v_writelane_b32 v34, s31, 1
-; GFX7-NEXT: v_writelane_b32 v34, s34, 2
-; GFX7-NEXT: v_writelane_b32 v34, s35, 3
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[31:32]
; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[31:32]
@@ -2102,14 +2095,14 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1)
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116
; GFX7-NEXT: v_cndmask_b32_e64 v26, v26, 0, s[28:29]
; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cmp_u_f64_e64 s[30:31], v[28:29], v[31:32]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[38:39], v[28:29], v[31:32]
; GFX7-NEXT: v_max_f64 v[28:29], v[28:29], v[31:32]
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32
; GFX7-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:128
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124
-; GFX7-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[30:31]
+; GFX7-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[38:39]
; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cmp_u_f64_e64 s[34:35], v[30:31], v[32:33]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[40:41], v[30:31], v[32:33]
; GFX7-NEXT: v_max_f64 v[30:31], v[30:31], v[32:33]
; GFX7-NEXT: v_mov_b32_e32 v32, 0x7ff80000
; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v32, vcc
@@ -2126,31 +2119,16 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1)
; GFX7-NEXT: v_cndmask_b32_e64 v23, v23, v32, s[24:25]
; GFX7-NEXT: v_cndmask_b32_e64 v25, v25, v32, s[26:27]
; GFX7-NEXT: v_cndmask_b32_e64 v27, v27, v32, s[28:29]
-; GFX7-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[30:31]
-; GFX7-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[34:35]
-; GFX7-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[34:35]
-; GFX7-NEXT: v_readlane_b32 s35, v34, 3
-; GFX7-NEXT: v_readlane_b32 s34, v34, 2
-; GFX7-NEXT: v_readlane_b32 s31, v34, 1
-; GFX7-NEXT: v_readlane_b32 s30, v34, 0
-; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX7-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
-; GFX7-NEXT: s_mov_b64 exec, s[4:5]
-; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[38:39]
+; GFX7-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[40:41]
+; GFX7-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[40:41]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v16f64:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX8-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
-; GFX8-NEXT: s_mov_b64 exec, s[4:5]
; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8
; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4
-; GFX8-NEXT: v_writelane_b32 v34, s30, 0
-; GFX8-NEXT: v_writelane_b32 v34, s31, 1
-; GFX8-NEXT: v_writelane_b32 v34, s34, 2
-; GFX8-NEXT: v_writelane_b32 v34, s35, 3
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[31:32]
; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[31:32]
@@ -2236,14 +2214,14 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1)
; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116
; GFX8-NEXT: v_cndmask_b32_e64 v26, v26, 0, s[28:29]
; GFX8-NEXT: s_waitcnt vmcnt(0)
-; GFX8-NEXT: v_cmp_u_f64_e64 s[30:31], v[28:29], v[31:32]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[38:39], v[28:29], v[31:32]
; GFX8-NEXT: v_max_f64 v[28:29], v[28:29], v[31:32]
; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32
; GFX8-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:128
; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124
-; GFX8-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[30:31]
+; GFX8-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[38:39]
; GFX8-NEXT: s_waitcnt vmcnt(0)
-; GFX8-NEXT: v_cmp_u_f64_e64 s[34:35], v[30:31], v[32:33]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[40:41], v[30:31], v[32:33]
; GFX8-NEXT: v_max_f64 v[30:31], v[30:31], v[32:33]
; GFX8-NEXT: v_mov_b32_e32 v32, 0x7ff80000
; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v32, vcc
@@ -2260,31 +2238,16 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1)
; GFX8-NEXT: v_cndmask_b32_e64 v23, v23, v32, s[24:25]
; GFX8-NEXT: v_cndmask_b32_e64 v25, v25, v32, s[26:27]
; GFX8-NEXT: v_cndmask_b32_e64 v27, v27, v32, s[28:29]
-; GFX8-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[30:31]
-; GFX8-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[34:35]
-; GFX8-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[34:35]
-; GFX8-NEXT: v_readlane_b32 s35, v34, 3
-; GFX8-NEXT: v_readlane_b32 s34, v34, 2
-; GFX8-NEXT: v_readlane_b32 s31, v34, 1
-; GFX8-NEXT: v_readlane_b32 s30, v34, 0
-; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX8-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
-; GFX8-NEXT: s_mov_b64 exec, s[4:5]
-; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[38:39]
+; GFX8-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[40:41]
+; GFX8-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[40:41]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-LABEL: v_maximum_v16f64:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX900-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
-; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8
; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4
-; GFX900-NEXT: v_writelane_b32 v34, s30, 0
-; GFX900-NEXT: v_writelane_b32 v34, s31, 1
-; GFX900-NEXT: v_writelane_b32 v34, s34, 2
-; GFX900-NEXT: v_writelane_b32 v34, s35, 3
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[31:32]
; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[31:32]
@@ -2370,14 +2333,14 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1)
; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116
; GFX900-NEXT: v_cndmask_b32_e64 v26, v26, 0, s[28:29]
; GFX900-NEXT: s_waitcnt vmcnt(0)
-; GFX900-NEXT: v_cmp_u_f64_e64 s[30:31], v[28:29], v[31:32]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[38:39], v[28:29], v[31:32]
; GFX900-NEXT: v_max_f64 v[28:29], v[28:29], v[31:32]
; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32
; GFX900-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:128
; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124
-; GFX900-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[30:31]
+; GFX900-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[38:39]
; GFX900-NEXT: s_waitcnt vmcnt(0)
-; GFX900-NEXT: v_cmp_u_f64_e64 s[34:35], v[30:31], v[32:33]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[40:41], v[30:31], v[32:33]
; GFX900-NEXT: v_max_f64 v[30:31], v[30:31], v[32:33]
; GFX900-NEXT: v_mov_b32_e32 v32, 0x7ff80000
; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v32, vcc
@@ -2394,17 +2357,9 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1)
; GFX900-NEXT: v_cndmask_b32_e64 v23, v23, v32, s[24:25]
; GFX900-NEXT: v_cndmask_b32_e64 v25, v25, v32, s[26:27]
; GFX900-NEXT: v_cndmask_b32_e64 v27, v27, v32, s[28:29]
-; GFX900-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[30:31]
-; GFX900-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[34:35]
-; GFX900-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[34:35]
-; GFX900-NEXT: v_readlane_b32 s35, v34, 3
-; GFX900-NEXT: v_readlane_b32 s34, v34, 2
-; GFX900-NEXT: v_readlane_b32 s31, v34, 1
-; GFX900-NEXT: v_readlane_b32 s30, v34, 0
-; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX900-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
-; GFX900-NEXT: s_mov_b64 exec, s[4:5]
-; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[38:39]
+; GFX900-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[40:41]
+; GFX900-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[40:41]
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_maximum_v16f64:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll
index 9962433134073..7fe4f9be2727d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll
@@ -1727,14 +1727,9 @@ define <16 x float> @v_minimum_v16f32(<16 x float> %src0, <16 x float> %src1) {
; GFX7-LABEL: v_minimum_v16f32:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX7-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
-; GFX7-NEXT: s_mov_b64 exec, s[4:5]
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v17
; GFX7-NEXT: v_min_f32_e32 v1, v1, v17
; GFX7-NEXT: buffer_load_dword v17, off, s[0:3], s32
-; GFX7-NEXT: v_writelane_b32 v31, s30, 0
-; GFX7-NEXT: v_writelane_b32 v31, s31, 1
; GFX7-NEXT: v_cmp_o_f32_e64 s[4:5], v2, v18
; GFX7-NEXT: v_min_f32_e32 v2, v2, v18
; GFX7-NEXT: v_cmp_o_f32_e64 s[6:7], v3, v19
@@ -1743,7 +1738,7 @@ define <16 x float> @v_minimum_v16f32(<16 x float> %src0, <16 x float> %src1) {
; GFX7-NEXT: v_min_f32_e32 v19, v0, v16
; GFX7-NEXT: v_cmp_o_f32_e64 s[28:29], v0, v16
; GFX7-NEXT: v_min_f32_e32 v16, v14, v30
-; GFX7-NEXT: v_cmp_o_f32_e64 s[30:31], v14, v30
+; GFX7-NEXT: v_cmp_o_f32_e64 s[38:39], v14, v30
; GFX7-NEXT: v_cmp_o_f32_e64 s[8:9], v4, v20
; GFX7-NEXT: v_min_f32_e32 v4, v4, v20
; GFX7-NEXT: v_cmp_o_f32_e64 s[10:11], v5, v21
@@ -1765,7 +1760,7 @@ define <16 x float> @v_minimum_v16f32(<16 x float> %src0, <16 x float> %src1) {
; GFX7-NEXT: v_cmp_o_f32_e64 s[26:27], v13, v29
; GFX7-NEXT: v_min_f32_e32 v13, v13, v29
; GFX7-NEXT: v_cndmask_b32_e32 v1, v18, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v14, v18, v16, s[30:31]
+; GFX7-NEXT: v_cndmask_b32_e64 v14, v18, v16, s[38:39]
; GFX7-NEXT: v_cndmask_b32_e64 v0, v18, v19, s[28:29]
; GFX7-NEXT: v_cndmask_b32_e64 v2, v18, v2, s[4:5]
; GFX7-NEXT: v_cndmask_b32_e64 v3, v18, v3, s[6:7]
@@ -1779,29 +1774,18 @@ define <16 x float> @v_minimum_v16f32(<16 x float> %src0, <16 x float> %src1) {
; GFX7-NEXT: v_cndmask_b32_e64 v11, v18, v11, s[22:23]
; GFX7-NEXT: v_cndmask_b32_e64 v12, v18, v12, s[24:25]
; GFX7-NEXT: v_cndmask_b32_e64 v13, v18, v13, s[26:27]
-; GFX7-NEXT: v_readlane_b32 s31, v31, 1
-; GFX7-NEXT: v_readlane_b32 s30, v31, 0
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_min_f32_e32 v16, v15, v17
; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v15, v17
; GFX7-NEXT: v_cndmask_b32_e32 v15, v18, v16, vcc
-; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
-; GFX7-NEXT: s_mov_b64 exec, s[4:5]
-; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimum_v16f32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX8-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
-; GFX8-NEXT: s_mov_b64 exec, s[4:5]
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v17
; GFX8-NEXT: v_min_f32_e32 v1, v1, v17
; GFX8-NEXT: buffer_load_dword v17, off, s[0:3], s32
-; GFX8-NEXT: v_writelane_b32 v31, s30, 0
-; GFX8-NEXT: v_writelane_b32 v31, s31, 1
; GFX8-NEXT: v_cmp_o_f32_e64 s[4:5], v2, v18
; GFX8-NEXT: v_min_f32_e32 v2, v2, v18
; GFX8-NEXT: v_cmp_o_f32_e64 s[6:7], v3, v19
@@ -1810,7 +1794,7 @@ define <16 x float> @v_minimum_v16f32(<16 x float> %src0, <16 x float> %src1) {
; GFX8-NEXT: v_min_f32_e32 v19, v0, v16
; GFX8-NEXT: v_cmp_o_f32_e64 s[28:29], v0, v16
; GFX8-NEXT: v_min_f32_e32 v16, v14, v30
-; GFX8-NEXT: v_cmp_o_f32_e64 s[30:31], v14, v30
+; GFX8-NEXT: v_cmp_o_f32_e64 s[38:39], v14, v30
; GFX8-NEXT: v_cmp_o_f32_e64 s[8:9], v4, v20
; GFX8-NEXT: v_min_f32_e32 v4, v4, v20
; GFX8-NEXT: v_cmp_o_f32_e64 s[10:11], v5, v21
@@ -1832,7 +1816,7 @@ define <16 x float> @v_minimum_v16f32(<16 x float> %src0, <16 x float> %src1) {
; GFX8-NEXT: v_cmp_o_f32_e64 s[26:27], v13, v29
; GFX8-NEXT: v_min_f32_e32 v13, v13, v29
; GFX8-NEXT: v_cndmask_b32_e32 v1, v18, v1, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v14, v18, v16, s[30:31]
+; GFX8-NEXT: v_cndmask_b32_e64 v14, v18, v16, s[38:39]
; GFX8-NEXT: v_cndmask_b32_e64 v0, v18, v19, s[28:29]
; GFX8-NEXT: v_cndmask_b32_e64 v2, v18, v2, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e64 v3, v18, v3, s[6:7]
@@ -1846,29 +1830,18 @@ define <16 x float> @v_minimum_v16f32(<16 x float> %src0, <16 x float> %src1) {
; GFX8-NEXT: v_cndmask_b32_e64 v11, v18, v11, s[22:23]
; GFX8-NEXT: v_cndmask_b32_e64 v12, v18, v12, s[24:25]
; GFX8-NEXT: v_cndmask_b32_e64 v13, v18, v13, s[26:27]
-; GFX8-NEXT: v_readlane_b32 s31, v31, 1
-; GFX8-NEXT: v_readlane_b32 s30, v31, 0
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_min_f32_e32 v16, v15, v17
; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v15, v17
; GFX8-NEXT: v_cndmask_b32_e32 v15, v18, v16, vcc
-; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
-; GFX8-NEXT: s_mov_b64 exec, s[4:5]
-; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-LABEL: v_minimum_v16f32:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX900-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
-; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v1, v17
; GFX900-NEXT: v_min_f32_e32 v1, v1, v17
; GFX900-NEXT: buffer_load_dword v17, off, s[0:3], s32
-; GFX900-NEXT: v_writelane_b32 v31, s30, 0
-; GFX900-NEXT: v_writelane_b32 v31, s31, 1
; GFX900-NEXT: v_cmp_o_f32_e64 s[4:5], v2, v18
; GFX900-NEXT: v_min_f32_e32 v2, v2, v18
; GFX900-NEXT: v_cmp_o_f32_e64 s[6:7], v3, v19
@@ -1877,7 +1850,7 @@ define <16 x float> @v_minimum_v16f32(<16 x float> %src0, <16 x float> %src1) {
; GFX900-NEXT: v_min_f32_e32 v19, v0, v16
; GFX900-NEXT: v_cmp_o_f32_e64 s[28:29], v0, v16
; GFX900-NEXT: v_min_f32_e32 v16, v14, v30
-; GFX900-NEXT: v_cmp_o_f32_e64 s[30:31], v14, v30
+; GFX900-NEXT: v_cmp_o_f32_e64 s[38:39], v14, v30
; GFX900-NEXT: v_cmp_o_f32_e64 s[8:9], v4, v20
; GFX900-NEXT: v_min_f32_e32 v4, v4, v20
; GFX900-NEXT: v_cmp_o_f32_e64 s[10:11], v5, v21
@@ -1899,7 +1872,7 @@ define <16 x float> @v_minimum_v16f32(<16 x float> %src0, <16 x float> %src1) {
; GFX900-NEXT: v_cmp_o_f32_e64 s[26:27], v13, v29
; GFX900-NEXT: v_min_f32_e32 v13, v13, v29
; GFX900-NEXT: v_cndmask_b32_e32 v1, v18, v1, vcc
-; GFX900-NEXT: v_cndmask_b32_e64 v14, v18, v16, s[30:31]
+; GFX900-NEXT: v_cndmask_b32_e64 v14, v18, v16, s[38:39]
; GFX900-NEXT: v_cndmask_b32_e64 v0, v18, v19, s[28:29]
; GFX900-NEXT: v_cndmask_b32_e64 v2, v18, v2, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e64 v3, v18, v3, s[6:7]
@@ -1913,16 +1886,10 @@ define <16 x float> @v_minimum_v16f32(<16 x float> %src0, <16 x float> %src1) {
; GFX900-NEXT: v_cndmask_b32_e64 v11, v18, v11, s[22:23]
; GFX900-NEXT: v_cndmask_b32_e64 v12, v18, v12, s[24:25]
; GFX900-NEXT: v_cndmask_b32_e64 v13, v18, v13, s[26:27]
-; GFX900-NEXT: v_readlane_b32 s31, v31, 1
-; GFX900-NEXT: v_readlane_b32 s30, v31, 0
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: v_min_f32_e32 v16, v15, v17
; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v15, v17
; GFX900-NEXT: v_cndmask_b32_e32 v15, v18, v16, vcc
-; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
-; GFX900-NEXT: s_mov_b64 exec, s[4:5]
-; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_minimum_v16f32:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll
index 71fdd691a1512..ab20fd88091d9 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll
@@ -2008,15 +2008,8 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1)
; GFX7-LABEL: v_minimum_v16f64:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX7-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
-; GFX7-NEXT: s_mov_b64 exec, s[4:5]
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4
-; GFX7-NEXT: v_writelane_b32 v34, s30, 0
-; GFX7-NEXT: v_writelane_b32 v34, s31, 1
-; GFX7-NEXT: v_writelane_b32 v34, s34, 2
-; GFX7-NEXT: v_writelane_b32 v34, s35, 3
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[31:32]
; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[31:32]
@@ -2102,14 +2095,14 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1)
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116
; GFX7-NEXT: v_cndmask_b32_e64 v26, v26, 0, s[28:29]
; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cmp_u_f64_e64 s[30:31], v[28:29], v[31:32]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[38:39], v[28:29], v[31:32]
; GFX7-NEXT: v_min_f64 v[28:29], v[28:29], v[31:32]
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32
; GFX7-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:128
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124
-; GFX7-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[30:31]
+; GFX7-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[38:39]
; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cmp_u_f64_e64 s[34:35], v[30:31], v[32:33]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[40:41], v[30:31], v[32:33]
; GFX7-NEXT: v_min_f64 v[30:31], v[30:31], v[32:33]
; GFX7-NEXT: v_mov_b32_e32 v32, 0x7ff80000
; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v32, vcc
@@ -2126,31 +2119,16 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1)
; GFX7-NEXT: v_cndmask_b32_e64 v23, v23, v32, s[24:25]
; GFX7-NEXT: v_cndmask_b32_e64 v25, v25, v32, s[26:27]
; GFX7-NEXT: v_cndmask_b32_e64 v27, v27, v32, s[28:29]
-; GFX7-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[30:31]
-; GFX7-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[34:35]
-; GFX7-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[34:35]
-; GFX7-NEXT: v_readlane_b32 s35, v34, 3
-; GFX7-NEXT: v_readlane_b32 s34, v34, 2
-; GFX7-NEXT: v_readlane_b32 s31, v34, 1
-; GFX7-NEXT: v_readlane_b32 s30, v34, 0
-; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX7-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
-; GFX7-NEXT: s_mov_b64 exec, s[4:5]
-; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[38:39]
+; GFX7-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[40:41]
+; GFX7-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[40:41]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimum_v16f64:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX8-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
-; GFX8-NEXT: s_mov_b64 exec, s[4:5]
; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8
; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4
-; GFX8-NEXT: v_writelane_b32 v34, s30, 0
-; GFX8-NEXT: v_writelane_b32 v34, s31, 1
-; GFX8-NEXT: v_writelane_b32 v34, s34, 2
-; GFX8-NEXT: v_writelane_b32 v34, s35, 3
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[31:32]
; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[31:32]
@@ -2236,14 +2214,14 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1)
; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116
; GFX8-NEXT: v_cndmask_b32_e64 v26, v26, 0, s[28:29]
; GFX8-NEXT: s_waitcnt vmcnt(0)
-; GFX8-NEXT: v_cmp_u_f64_e64 s[30:31], v[28:29], v[31:32]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[38:39], v[28:29], v[31:32]
; GFX8-NEXT: v_min_f64 v[28:29], v[28:29], v[31:32]
; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32
; GFX8-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:128
; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124
-; GFX8-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[30:31]
+; GFX8-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[38:39]
; GFX8-NEXT: s_waitcnt vmcnt(0)
-; GFX8-NEXT: v_cmp_u_f64_e64 s[34:35], v[30:31], v[32:33]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[40:41], v[30:31], v[32:33]
; GFX8-NEXT: v_min_f64 v[30:31], v[30:31], v[32:33]
; GFX8-NEXT: v_mov_b32_e32 v32, 0x7ff80000
; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v32, vcc
@@ -2260,31 +2238,16 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1)
; GFX8-NEXT: v_cndmask_b32_e64 v23, v23, v32, s[24:25]
; GFX8-NEXT: v_cndmask_b32_e64 v25, v25, v32, s[26:27]
; GFX8-NEXT: v_cndmask_b32_e64 v27, v27, v32, s[28:29]
-; GFX8-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[30:31]
-; GFX8-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[34:35]
-; GFX8-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[34:35]
-; GFX8-NEXT: v_readlane_b32 s35, v34, 3
-; GFX8-NEXT: v_readlane_b32 s34, v34, 2
-; GFX8-NEXT: v_readlane_b32 s31, v34, 1
-; GFX8-NEXT: v_readlane_b32 s30, v34, 0
-; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX8-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
-; GFX8-NEXT: s_mov_b64 exec, s[4:5]
-; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[38:39]
+; GFX8-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[40:41]
+; GFX8-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[40:41]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-LABEL: v_minimum_v16f64:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX900-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
-; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8
; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4
-; GFX900-NEXT: v_writelane_b32 v34, s30, 0
-; GFX900-NEXT: v_writelane_b32 v34, s31, 1
-; GFX900-NEXT: v_writelane_b32 v34, s34, 2
-; GFX900-NEXT: v_writelane_b32 v34, s35, 3
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[31:32]
; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], v[31:32]
@@ -2370,14 +2333,14 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1)
; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116
; GFX900-NEXT: v_cndmask_b32_e64 v26, v26, 0, s[28:29]
; GFX900-NEXT: s_waitcnt vmcnt(0)
-; GFX900-NEXT: v_cmp_u_f64_e64 s[30:31], v[28:29], v[31:32]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[38:39], v[28:29], v[31:32]
; GFX900-NEXT: v_min_f64 v[28:29], v[28:29], v[31:32]
; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32
; GFX900-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:128
; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124
-; GFX900-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[30:31]
+; GFX900-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[38:39]
; GFX900-NEXT: s_waitcnt vmcnt(0)
-; GFX900-NEXT: v_cmp_u_f64_e64 s[34:35], v[30:31], v[32:33]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[40:41], v[30:31], v[32:33]
; GFX900-NEXT: v_min_f64 v[30:31], v[30:31], v[32:33]
; GFX900-NEXT: v_mov_b32_e32 v32, 0x7ff80000
; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v32, vcc
@@ -2394,17 +2357,9 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1)
; GFX900-NEXT: v_cndmask_b32_e64 v23, v23, v32, s[24:25]
; GFX900-NEXT: v_cndmask_b32_e64 v25, v25, v32, s[26:27]
; GFX900-NEXT: v_cndmask_b32_e64 v27, v27, v32, s[28:29]
-; GFX900-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[30:31]
-; GFX900-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[34:35]
-; GFX900-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[34:35]
-; GFX900-NEXT: v_readlane_b32 s35, v34, 3
-; GFX900-NEXT: v_readlane_b32 s34, v34, 2
-; GFX900-NEXT: v_readlane_b32 s31, v34, 1
-; GFX900-NEXT: v_readlane_b32 s30, v34, 0
-; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX900-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
-; GFX900-NEXT: s_mov_b64 exec, s[4:5]
-; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[38:39]
+; GFX900-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[40:41]
+; GFX900-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[40:41]
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_minimum_v16f64:
diff --git a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll
index 989ef6f981d9d..55e1c3842aa6f 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll
@@ -68,12 +68,12 @@ define amdgpu_kernel void @workgroup_ids_kernel() {
define amdgpu_kernel void @caller() {
; GFX9-SDAG-LABEL: caller:
; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-SDAG-NEXT: s_mov_b32 s38, -1
-; GFX9-SDAG-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-SDAG-NEXT: s_add_u32 s36, s36, s11
-; GFX9-SDAG-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-SDAG-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-SDAG-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-SDAG-NEXT: s_mov_b32 s50, -1
+; GFX9-SDAG-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-SDAG-NEXT: s_add_u32 s48, s48, s11
+; GFX9-SDAG-NEXT: s_addc_u32 s49, s49, 0
; GFX9-SDAG-NEXT: s_mov_b32 s12, s8
; GFX9-SDAG-NEXT: s_add_u32 s8, s4, 36
; GFX9-SDAG-NEXT: s_addc_u32 s9, s5, 0
@@ -86,9 +86,9 @@ define amdgpu_kernel void @caller() {
; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-SDAG-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-SDAG-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-SDAG-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-SDAG-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-SDAG-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-SDAG-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-SDAG-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s12
; GFX9-SDAG-NEXT: s_mov_b32 s32, 0
; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
@@ -97,12 +97,12 @@ define amdgpu_kernel void @caller() {
;
; GFX9-GISEL-LABEL: caller:
; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-GISEL-NEXT: s_mov_b32 s38, -1
-; GFX9-GISEL-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-GISEL-NEXT: s_add_u32 s36, s36, s11
-; GFX9-GISEL-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-GISEL-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-GISEL-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-GISEL-NEXT: s_mov_b32 s50, -1
+; GFX9-GISEL-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-GISEL-NEXT: s_add_u32 s48, s48, s11
+; GFX9-GISEL-NEXT: s_addc_u32 s49, s49, 0
; GFX9-GISEL-NEXT: s_mov_b32 s14, s8
; GFX9-GISEL-NEXT: s_add_u32 s8, s4, 36
; GFX9-GISEL-NEXT: s_addc_u32 s9, s5, 0
@@ -115,10 +115,10 @@ define amdgpu_kernel void @caller() {
; GFX9-GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v2, 20, v2
-; GFX9-GISEL-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-GISEL-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-GISEL-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s14
-; GFX9-GISEL-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-GISEL-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-GISEL-NEXT: s_mov_b64 s[4:5], s[12:13]
; GFX9-GISEL-NEXT: s_mov_b32 s12, s14
; GFX9-GISEL-NEXT: s_mov_b32 s32, 0
@@ -128,12 +128,12 @@ define amdgpu_kernel void @caller() {
;
; GFX9ARCH-SDAG-LABEL: caller:
; GFX9ARCH-SDAG: ; %bb.0:
-; GFX9ARCH-SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9ARCH-SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9ARCH-SDAG-NEXT: s_mov_b32 s38, -1
-; GFX9ARCH-SDAG-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9ARCH-SDAG-NEXT: s_add_u32 s36, s36, s8
-; GFX9ARCH-SDAG-NEXT: s_addc_u32 s37, s37, 0
+; GFX9ARCH-SDAG-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9ARCH-SDAG-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9ARCH-SDAG-NEXT: s_mov_b32 s50, -1
+; GFX9ARCH-SDAG-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9ARCH-SDAG-NEXT: s_add_u32 s48, s48, s8
+; GFX9ARCH-SDAG-NEXT: s_addc_u32 s49, s49, 0
; GFX9ARCH-SDAG-NEXT: s_add_u32 s8, s4, 36
; GFX9ARCH-SDAG-NEXT: s_addc_u32 s9, s5, 0
; GFX9ARCH-SDAG-NEXT: s_getpc_b64 s[4:5]
@@ -145,9 +145,9 @@ define amdgpu_kernel void @caller() {
; GFX9ARCH-SDAG-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9ARCH-SDAG-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9ARCH-SDAG-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9ARCH-SDAG-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9ARCH-SDAG-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9ARCH-SDAG-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9ARCH-SDAG-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9ARCH-SDAG-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9ARCH-SDAG-NEXT: v_mov_b32_e32 v0, ttmp9
; GFX9ARCH-SDAG-NEXT: s_mov_b32 s32, 0
; GFX9ARCH-SDAG-NEXT: s_waitcnt lgkmcnt(0)
@@ -156,12 +156,12 @@ define amdgpu_kernel void @caller() {
;
; GFX9ARCH-GISEL-LABEL: caller:
; GFX9ARCH-GISEL: ; %bb.0:
-; GFX9ARCH-GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9ARCH-GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9ARCH-GISEL-NEXT: s_mov_b32 s38, -1
-; GFX9ARCH-GISEL-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9ARCH-GISEL-NEXT: s_add_u32 s36, s36, s8
-; GFX9ARCH-GISEL-NEXT: s_addc_u32 s37, s37, 0
+; GFX9ARCH-GISEL-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9ARCH-GISEL-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9ARCH-GISEL-NEXT: s_mov_b32 s50, -1
+; GFX9ARCH-GISEL-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9ARCH-GISEL-NEXT: s_add_u32 s48, s48, s8
+; GFX9ARCH-GISEL-NEXT: s_addc_u32 s49, s49, 0
; GFX9ARCH-GISEL-NEXT: s_add_u32 s8, s4, 36
; GFX9ARCH-GISEL-NEXT: s_addc_u32 s9, s5, 0
; GFX9ARCH-GISEL-NEXT: s_mov_b64 s[12:13], s[0:1]
@@ -173,10 +173,10 @@ define amdgpu_kernel void @caller() {
; GFX9ARCH-GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX9ARCH-GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9ARCH-GISEL-NEXT: v_lshlrev_b32_e32 v2, 20, v2
-; GFX9ARCH-GISEL-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9ARCH-GISEL-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9ARCH-GISEL-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9ARCH-GISEL-NEXT: v_mov_b32_e32 v0, ttmp9
-; GFX9ARCH-GISEL-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9ARCH-GISEL-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9ARCH-GISEL-NEXT: s_mov_b64 s[4:5], s[12:13]
; GFX9ARCH-GISEL-NEXT: s_mov_b32 s32, 0
; GFX9ARCH-GISEL-NEXT: s_waitcnt lgkmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll b/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll
index acb706cee04d0..d29e6f8c3d2c6 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll
+++ b/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll
@@ -43,28 +43,28 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: s_addc_u32 s13, s13, 0
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13
-; CHECK-NEXT: s_load_dwordx8 s[48:55], s[8:9], 0x0
+; CHECK-NEXT: s_load_dwordx8 s[96:103], s[8:9], 0x0
; CHECK-NEXT: s_add_u32 s0, s0, s17
; CHECK-NEXT: s_mov_b64 s[34:35], s[8:9]
; CHECK-NEXT: s_addc_u32 s1, s1, 0
; CHECK-NEXT: v_mov_b32_e32 v40, v0
-; CHECK-NEXT: s_add_u32 s44, s34, 40
+; CHECK-NEXT: s_add_u32 s52, s34, 40
; CHECK-NEXT: v_mov_b32_e32 v31, v0
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: s_mov_b32 s33, s16
-; CHECK-NEXT: s_addc_u32 s45, s35, 0
-; CHECK-NEXT: s_mov_b32 s43, s14
+; CHECK-NEXT: s_addc_u32 s53, s35, 0
+; CHECK-NEXT: s_mov_b32 s51, s14
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z13get_global_idj at rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z13get_global_idj at rel32@hi+12
-; CHECK-NEXT: s_mov_b64 s[8:9], s[44:45]
+; CHECK-NEXT: s_mov_b64 s[8:9], s[52:53]
; CHECK-NEXT: s_mov_b32 s12, s14
; CHECK-NEXT: s_mov_b32 s13, s15
; CHECK-NEXT: s_mov_b32 s14, s33
-; CHECK-NEXT: s_mov_b32 s42, s15
+; CHECK-NEXT: s_mov_b32 s50, s15
; CHECK-NEXT: s_mov_b64 s[36:37], s[10:11]
-; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
-; CHECK-NEXT: s_mov_b64 s[40:41], s[4:5]
+; CHECK-NEXT: s_mov_b64 s[46:47], s[6:7]
+; CHECK-NEXT: s_mov_b64 s[48:49], s[4:5]
; CHECK-NEXT: v_mov_b32_e32 v45, 0
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_mov_b32_e32 v43, v0
@@ -73,12 +73,12 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z12get_local_idj at rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z12get_local_idj at rel32@hi+12
-; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
-; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
-; CHECK-NEXT: s_mov_b64 s[8:9], s[44:45]
+; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
+; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47]
+; CHECK-NEXT: s_mov_b64 s[8:9], s[52:53]
; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
-; CHECK-NEXT: s_mov_b32 s12, s43
-; CHECK-NEXT: s_mov_b32 s13, s42
+; CHECK-NEXT: s_mov_b32 s12, s51
+; CHECK-NEXT: s_mov_b32 s13, s50
; CHECK-NEXT: s_mov_b32 s14, s33
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_mov_b32_e32 v41, v0
@@ -87,12 +87,12 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z7barrierj at rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z7barrierj at rel32@hi+12
-; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
-; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
-; CHECK-NEXT: s_mov_b64 s[8:9], s[44:45]
+; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
+; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47]
+; CHECK-NEXT: s_mov_b64 s[8:9], s[52:53]
; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
-; CHECK-NEXT: s_mov_b32 s12, s43
-; CHECK-NEXT: s_mov_b32 s13, s42
+; CHECK-NEXT: s_mov_b32 s12, s51
+; CHECK-NEXT: s_mov_b32 s13, s50
; CHECK-NEXT: s_mov_b32 s14, s33
; CHECK-NEXT: ds_write_b32 v45, v45 offset:15360
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -102,22 +102,22 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z3minjj at rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z3minjj at rel32@hi+12
-; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
+; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
; CHECK-NEXT: v_and_b32_e32 v0, 0x7ffffffc, v0
; CHECK-NEXT: v_and_b32_e32 v1, 28, v1
-; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
-; CHECK-NEXT: s_mov_b64 s[8:9], s[44:45]
+; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47]
+; CHECK-NEXT: s_mov_b64 s[8:9], s[52:53]
; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
-; CHECK-NEXT: global_load_dword v0, v0, s[52:53]
-; CHECK-NEXT: s_mov_b32 s12, s43
-; CHECK-NEXT: s_mov_b32 s13, s42
+; CHECK-NEXT: global_load_dword v0, v0, s[100:101]
+; CHECK-NEXT: s_mov_b32 s12, s51
+; CHECK-NEXT: s_mov_b32 s13, s50
; CHECK-NEXT: s_mov_b32 s14, s33
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: v_bfe_u32 v0, v0, v1, 4
; CHECK-NEXT: v_mov_b32_e32 v1, 12
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_mov_b32_e32 v42, v0
-; CHECK-NEXT: s_mov_b32 s44, exec_lo
+; CHECK-NEXT: s_mov_b32 s52, exec_lo
; CHECK-NEXT: v_cmpx_ne_u32_e32 0, v42
; CHECK-NEXT: s_cbranch_execz .LBB0_25
; CHECK-NEXT: ; %bb.1: ; %.preheader5
@@ -136,7 +136,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: ; %bb.3:
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s4
; CHECK-NEXT: v_add_nc_u32_e32 v45, -1, v42
-; CHECK-NEXT: s_mov_b32 s45, 0
+; CHECK-NEXT: s_mov_b32 s53, 0
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v45
; CHECK-NEXT: s_and_b32 exec_lo, exec_lo, vcc_lo
; CHECK-NEXT: s_cbranch_execz .LBB0_25
@@ -144,46 +144,46 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: v_lshlrev_b32_e32 v43, 10, v43
; CHECK-NEXT: v_add_nc_u32_e32 v46, 0x3c05, v0
; CHECK-NEXT: v_mov_b32_e32 v47, 0
-; CHECK-NEXT: s_mov_b32 s47, 0
+; CHECK-NEXT: s_mov_b32 s63, 0
; CHECK-NEXT: .LBB0_5: ; =>This Loop Header: Depth=1
; CHECK-NEXT: ; Child Loop BB0_8 Depth 2
; CHECK-NEXT: ; Child Loop BB0_20 Depth 2
-; CHECK-NEXT: v_add_nc_u32_e32 v0, s47, v44
-; CHECK-NEXT: s_lshl_b32 s4, s47, 5
-; CHECK-NEXT: s_add_i32 s46, s47, 1
-; CHECK-NEXT: s_add_i32 s5, s47, 5
-; CHECK-NEXT: v_or3_b32 v57, s4, v43, s46
+; CHECK-NEXT: v_add_nc_u32_e32 v0, s63, v44
+; CHECK-NEXT: s_lshl_b32 s4, s63, 5
+; CHECK-NEXT: s_add_i32 s62, s63, 1
+; CHECK-NEXT: s_add_i32 s5, s63, 5
+; CHECK-NEXT: v_or3_b32 v57, s4, v43, s62
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: ds_read_u8 v56, v0
-; CHECK-NEXT: v_mov_b32_e32 v58, s46
-; CHECK-NEXT: s_mov_b32 s52, exec_lo
+; CHECK-NEXT: v_mov_b32_e32 v58, s62
+; CHECK-NEXT: s_mov_b32 s64, exec_lo
; CHECK-NEXT: v_cmpx_lt_u32_e64 s5, v42
; CHECK-NEXT: s_cbranch_execz .LBB0_17
; CHECK-NEXT: ; %bb.6: ; %.preheader2
; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1
-; CHECK-NEXT: s_mov_b32 s53, 0
-; CHECK-NEXT: s_mov_b32 s56, 0
+; CHECK-NEXT: s_mov_b32 s65, 0
+; CHECK-NEXT: s_mov_b32 s66, 0
; CHECK-NEXT: s_branch .LBB0_8
; CHECK-NEXT: .LBB0_7: ; in Loop: Header=BB0_8 Depth=2
-; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s57
-; CHECK-NEXT: s_add_i32 s56, s56, 4
-; CHECK-NEXT: s_add_i32 s4, s47, s56
-; CHECK-NEXT: v_add_nc_u32_e32 v0, s56, v57
+; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s67
+; CHECK-NEXT: s_add_i32 s66, s66, 4
+; CHECK-NEXT: s_add_i32 s4, s63, s66
+; CHECK-NEXT: v_add_nc_u32_e32 v0, s66, v57
; CHECK-NEXT: s_add_i32 s5, s4, 5
; CHECK-NEXT: s_add_i32 s4, s4, 1
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc_lo, s5, v42
; CHECK-NEXT: v_mov_b32_e32 v58, s4
-; CHECK-NEXT: s_or_b32 s53, vcc_lo, s53
-; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s53
+; CHECK-NEXT: s_or_b32 s65, vcc_lo, s65
+; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s65
; CHECK-NEXT: s_cbranch_execz .LBB0_16
; CHECK-NEXT: .LBB0_8: ; Parent Loop BB0_5 Depth=1
; CHECK-NEXT: ; => This Inner Loop Header: Depth=2
-; CHECK-NEXT: v_add_nc_u32_e32 v59, s56, v46
-; CHECK-NEXT: v_add_nc_u32_e32 v58, s56, v57
+; CHECK-NEXT: v_add_nc_u32_e32 v59, s66, v46
+; CHECK-NEXT: v_add_nc_u32_e32 v58, s66, v57
; CHECK-NEXT: ds_read_u8 v0, v59
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: v_cmp_eq_u16_sdwa s4, v56, v0 src0_sel:BYTE_0 src1_sel:DWORD
-; CHECK-NEXT: s_and_saveexec_b32 s57, s4
+; CHECK-NEXT: s_and_saveexec_b32 s67, s4
; CHECK-NEXT: s_cbranch_execz .LBB0_10
; CHECK-NEXT: ; %bb.9: ; in Loop: Header=BB0_8 Depth=2
; CHECK-NEXT: v_mov_b32_e32 v31, v40
@@ -193,22 +193,22 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_incPU3AS3Vj at rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_incPU3AS3Vj at rel32@hi+12
-; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
-; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
+; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
+; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47]
; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
-; CHECK-NEXT: s_mov_b32 s12, s43
-; CHECK-NEXT: s_mov_b32 s13, s42
+; CHECK-NEXT: s_mov_b32 s12, s51
+; CHECK-NEXT: s_mov_b32 s13, s50
; CHECK-NEXT: s_mov_b32 s14, s33
; CHECK-NEXT: v_add_nc_u32_e32 v47, 1, v47
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; CHECK-NEXT: ds_write_b32 v0, v58
; CHECK-NEXT: .LBB0_10: ; in Loop: Header=BB0_8 Depth=2
-; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s57
+; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s67
; CHECK-NEXT: ds_read_u8 v0, v59 offset:1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: v_cmp_eq_u16_sdwa s4, v56, v0 src0_sel:BYTE_0 src1_sel:DWORD
-; CHECK-NEXT: s_and_saveexec_b32 s57, s4
+; CHECK-NEXT: s_and_saveexec_b32 s67, s4
; CHECK-NEXT: s_cbranch_execz .LBB0_12
; CHECK-NEXT: ; %bb.11: ; in Loop: Header=BB0_8 Depth=2
; CHECK-NEXT: v_mov_b32_e32 v31, v40
@@ -218,11 +218,11 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_incPU3AS3Vj at rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_incPU3AS3Vj at rel32@hi+12
-; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
-; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
+; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
+; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47]
; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
-; CHECK-NEXT: s_mov_b32 s12, s43
-; CHECK-NEXT: s_mov_b32 s13, s42
+; CHECK-NEXT: s_mov_b32 s12, s51
+; CHECK-NEXT: s_mov_b32 s13, s50
; CHECK-NEXT: s_mov_b32 s14, s33
; CHECK-NEXT: v_add_nc_u32_e32 v60, 1, v58
; CHECK-NEXT: v_add_nc_u32_e32 v47, 1, v47
@@ -230,11 +230,11 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; CHECK-NEXT: ds_write_b32 v0, v60
; CHECK-NEXT: .LBB0_12: ; in Loop: Header=BB0_8 Depth=2
-; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s57
+; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s67
; CHECK-NEXT: ds_read_u8 v0, v59 offset:2
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: v_cmp_eq_u16_sdwa s4, v56, v0 src0_sel:BYTE_0 src1_sel:DWORD
-; CHECK-NEXT: s_and_saveexec_b32 s57, s4
+; CHECK-NEXT: s_and_saveexec_b32 s67, s4
; CHECK-NEXT: s_cbranch_execz .LBB0_14
; CHECK-NEXT: ; %bb.13: ; in Loop: Header=BB0_8 Depth=2
; CHECK-NEXT: v_mov_b32_e32 v31, v40
@@ -244,11 +244,11 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_incPU3AS3Vj at rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_incPU3AS3Vj at rel32@hi+12
-; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
-; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
+; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
+; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47]
; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
-; CHECK-NEXT: s_mov_b32 s12, s43
-; CHECK-NEXT: s_mov_b32 s13, s42
+; CHECK-NEXT: s_mov_b32 s12, s51
+; CHECK-NEXT: s_mov_b32 s13, s50
; CHECK-NEXT: s_mov_b32 s14, s33
; CHECK-NEXT: v_add_nc_u32_e32 v60, 2, v58
; CHECK-NEXT: v_add_nc_u32_e32 v47, 1, v47
@@ -256,11 +256,11 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; CHECK-NEXT: ds_write_b32 v0, v60
; CHECK-NEXT: .LBB0_14: ; in Loop: Header=BB0_8 Depth=2
-; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s57
+; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s67
; CHECK-NEXT: ds_read_u8 v0, v59 offset:3
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: v_cmp_eq_u16_sdwa s4, v56, v0 src0_sel:BYTE_0 src1_sel:DWORD
-; CHECK-NEXT: s_and_saveexec_b32 s57, s4
+; CHECK-NEXT: s_and_saveexec_b32 s67, s4
; CHECK-NEXT: s_cbranch_execz .LBB0_7
; CHECK-NEXT: ; %bb.15: ; in Loop: Header=BB0_8 Depth=2
; CHECK-NEXT: v_mov_b32_e32 v31, v40
@@ -270,11 +270,11 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_incPU3AS3Vj at rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_incPU3AS3Vj at rel32@hi+12
-; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
-; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
+; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
+; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47]
; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
-; CHECK-NEXT: s_mov_b32 s12, s43
-; CHECK-NEXT: s_mov_b32 s13, s42
+; CHECK-NEXT: s_mov_b32 s12, s51
+; CHECK-NEXT: s_mov_b32 s13, s50
; CHECK-NEXT: s_mov_b32 s14, s33
; CHECK-NEXT: v_add_nc_u32_e32 v58, 3, v58
; CHECK-NEXT: v_add_nc_u32_e32 v47, 1, v47
@@ -284,27 +284,27 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: s_branch .LBB0_7
; CHECK-NEXT: .LBB0_16: ; %Flow45
; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1
-; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s53
+; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s65
; CHECK-NEXT: v_mov_b32_e32 v57, v0
; CHECK-NEXT: .LBB0_17: ; %Flow46
; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1
-; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s52
-; CHECK-NEXT: s_mov_b32 s47, exec_lo
+; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s64
+; CHECK-NEXT: s_mov_b32 s63, exec_lo
; CHECK-NEXT: v_cmpx_lt_u32_e64 v58, v42
; CHECK-NEXT: s_cbranch_execz .LBB0_23
; CHECK-NEXT: ; %bb.18: ; %.preheader
; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1
-; CHECK-NEXT: s_mov_b32 s52, 0
+; CHECK-NEXT: s_mov_b32 s64, 0
; CHECK-NEXT: s_inst_prefetch 0x1
; CHECK-NEXT: s_branch .LBB0_20
; CHECK-NEXT: .p2align 6
; CHECK-NEXT: .LBB0_19: ; in Loop: Header=BB0_20 Depth=2
-; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s53
+; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s65
; CHECK-NEXT: v_add_nc_u32_e32 v58, 1, v58
; CHECK-NEXT: v_add_nc_u32_e32 v57, 1, v57
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc_lo, v58, v42
-; CHECK-NEXT: s_or_b32 s52, vcc_lo, s52
-; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s52
+; CHECK-NEXT: s_or_b32 s64, vcc_lo, s64
+; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s64
; CHECK-NEXT: s_cbranch_execz .LBB0_22
; CHECK-NEXT: .LBB0_20: ; Parent Loop BB0_5 Depth=1
; CHECK-NEXT: ; => This Inner Loop Header: Depth=2
@@ -312,7 +312,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: ds_read_u8 v0, v0
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: v_cmp_eq_u16_sdwa s4, v56, v0 src0_sel:BYTE_0 src1_sel:DWORD
-; CHECK-NEXT: s_and_saveexec_b32 s53, s4
+; CHECK-NEXT: s_and_saveexec_b32 s65, s4
; CHECK-NEXT: s_cbranch_execz .LBB0_19
; CHECK-NEXT: ; %bb.21: ; in Loop: Header=BB0_20 Depth=2
; CHECK-NEXT: v_mov_b32_e32 v31, v40
@@ -322,11 +322,11 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_incPU3AS3Vj at rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_incPU3AS3Vj at rel32@hi+12
-; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
-; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
+; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
+; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47]
; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
-; CHECK-NEXT: s_mov_b32 s12, s43
-; CHECK-NEXT: s_mov_b32 s13, s42
+; CHECK-NEXT: s_mov_b32 s12, s51
+; CHECK-NEXT: s_mov_b32 s13, s50
; CHECK-NEXT: s_mov_b32 s14, s33
; CHECK-NEXT: v_add_nc_u32_e32 v47, 1, v47
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -336,22 +336,22 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: .LBB0_22: ; %Flow43
; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1
; CHECK-NEXT: s_inst_prefetch 0x2
-; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s52
+; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s64
; CHECK-NEXT: .LBB0_23: ; %Flow44
; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1
-; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s47
+; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s63
; CHECK-NEXT: ; %bb.24: ; in Loop: Header=BB0_5 Depth=1
-; CHECK-NEXT: v_cmp_ge_u32_e32 vcc_lo, s46, v45
+; CHECK-NEXT: v_cmp_ge_u32_e32 vcc_lo, s62, v45
; CHECK-NEXT: v_cmp_lt_u32_e64 s4, 59, v47
; CHECK-NEXT: v_add_nc_u32_e32 v46, 1, v46
-; CHECK-NEXT: s_mov_b32 s47, s46
+; CHECK-NEXT: s_mov_b32 s63, s62
; CHECK-NEXT: s_or_b32 s4, vcc_lo, s4
; CHECK-NEXT: s_and_b32 s4, exec_lo, s4
-; CHECK-NEXT: s_or_b32 s45, s4, s45
-; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s45
+; CHECK-NEXT: s_or_b32 s53, s4, s53
+; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s53
; CHECK-NEXT: s_cbranch_execnz .LBB0_5
; CHECK-NEXT: .LBB0_25: ; %Flow51
-; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s44
+; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s52
; CHECK-NEXT: v_mov_b32_e32 v31, v40
; CHECK-NEXT: v_mov_b32_e32 v0, 1
; CHECK-NEXT: s_add_u32 s8, s34, 40
@@ -359,11 +359,11 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z7barrierj at rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z7barrierj at rel32@hi+12
-; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
-; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
+; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
+; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47]
; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
-; CHECK-NEXT: s_mov_b32 s12, s43
-; CHECK-NEXT: s_mov_b32 s13, s42
+; CHECK-NEXT: s_mov_b32 s12, s51
+; CHECK-NEXT: s_mov_b32 s13, s50
; CHECK-NEXT: s_mov_b32 s14, s33
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_mov_b32_e32 v0, 0
@@ -373,10 +373,10 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: v_cmpx_gt_u32_e64 v47, v41
; CHECK-NEXT: s_cbranch_execz .LBB0_33
; CHECK-NEXT: ; %bb.26:
-; CHECK-NEXT: s_mov_b32 s44, 0
+; CHECK-NEXT: s_mov_b32 s52, 0
; CHECK-NEXT: s_branch .LBB0_28
; CHECK-NEXT: .LBB0_27: ; in Loop: Header=BB0_28 Depth=1
-; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s45
+; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s53
; CHECK-NEXT: v_mov_b32_e32 v31, v40
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: s_add_u32 s8, s34, 40
@@ -384,21 +384,21 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z14get_local_sizej at rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z14get_local_sizej at rel32@hi+12
-; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
-; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
+; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
+; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47]
; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
-; CHECK-NEXT: s_mov_b32 s12, s43
-; CHECK-NEXT: s_mov_b32 s13, s42
+; CHECK-NEXT: s_mov_b32 s12, s51
+; CHECK-NEXT: s_mov_b32 s13, s50
; CHECK-NEXT: s_mov_b32 s14, s33
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_add_co_u32 v41, vcc_lo, v0, v41
; CHECK-NEXT: v_cmp_le_u32_e32 vcc_lo, v47, v41
-; CHECK-NEXT: s_or_b32 s44, vcc_lo, s44
-; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s44
+; CHECK-NEXT: s_or_b32 s52, vcc_lo, s52
+; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s52
; CHECK-NEXT: s_cbranch_execz .LBB0_33
; CHECK-NEXT: .LBB0_28: ; =>This Inner Loop Header: Depth=1
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v41
-; CHECK-NEXT: s_mov_b32 s45, exec_lo
+; CHECK-NEXT: s_mov_b32 s53, exec_lo
; CHECK-NEXT: ds_read_b32 v0, v0
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: v_lshrrev_b32_e32 v63, 10, v0
@@ -407,8 +407,8 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: v_mul_u32_u24_e32 v1, 0x180, v63
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 5, v62
; CHECK-NEXT: v_lshlrev_b32_e32 v4, 5, v72
-; CHECK-NEXT: v_add_co_u32 v2, s4, s48, v1
-; CHECK-NEXT: v_add_co_ci_u32_e64 v3, null, s49, 0, s4
+; CHECK-NEXT: v_add_co_u32 v2, s4, s96, v1
+; CHECK-NEXT: v_add_co_ci_u32_e64 v3, null, s97, 0, s4
; CHECK-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
; CHECK-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v3, vcc_lo
; CHECK-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4
@@ -443,10 +443,10 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_addPU3AS1Vjj at rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_addPU3AS1Vjj at rel32@hi+12
; CHECK-NEXT: v_or3_b32 v2, v3, v2, v4
-; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
+; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47]
; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
-; CHECK-NEXT: s_mov_b32 s12, s43
-; CHECK-NEXT: s_mov_b32 s13, s42
+; CHECK-NEXT: s_mov_b32 s12, s51
+; CHECK-NEXT: s_mov_b32 s13, s50
; CHECK-NEXT: v_or3_b32 v73, v2, v0, v1
; CHECK-NEXT: s_mov_b32 s14, s33
; CHECK-NEXT: v_lshrrev_b32_e32 v0, 1, v73
@@ -454,11 +454,11 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: v_and_b32_e32 v0, 0x7fffc, v0
; CHECK-NEXT: v_lshlrev_b32_e64 v44, v1, 1
; CHECK-NEXT: v_and_b32_e32 v74, 28, v1
-; CHECK-NEXT: v_add_co_u32 v42, s4, s54, v0
-; CHECK-NEXT: v_add_co_ci_u32_e64 v43, null, s55, 0, s4
+; CHECK-NEXT: v_add_co_u32 v42, s4, s102, v0
+; CHECK-NEXT: v_add_co_ci_u32_e64 v43, null, s103, 0, s4
; CHECK-NEXT: v_mov_b32_e32 v2, v44
; CHECK-NEXT: v_mov_b32_e32 v0, v42
-; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
+; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
; CHECK-NEXT: v_mov_b32_e32 v1, v43
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_bfe_u32 v0, v0, v74, 4
@@ -469,7 +469,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: ; %bb.30: ; in Loop: Header=BB0_28 Depth=1
; CHECK-NEXT: v_xor_b32_e32 v4, v60, v58
; CHECK-NEXT: v_lshrrev_b64 v[2:3], 16, v[56:57]
-; CHECK-NEXT: v_mad_u64_u32 v[6:7], null, 0x180, v73, s[50:51]
+; CHECK-NEXT: v_mad_u64_u32 v[6:7], null, 0x180, v73, s[98:99]
; CHECK-NEXT: v_lshlrev_b32_e32 v10, 5, v0
; CHECK-NEXT: v_lshlrev_b32_e32 v1, 16, v4
; CHECK-NEXT: v_lshlrev_b32_e32 v8, 6, v72
@@ -503,11 +503,11 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_subPU3AS1Vjj at rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_subPU3AS1Vjj at rel32@hi+12
-; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
-; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
+; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
+; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47]
; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
-; CHECK-NEXT: s_mov_b32 s12, s43
-; CHECK-NEXT: s_mov_b32 s13, s42
+; CHECK-NEXT: s_mov_b32 s12, s51
+; CHECK-NEXT: s_mov_b32 s13, s50
; CHECK-NEXT: s_mov_b32 s14, s33
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: s_branch .LBB0_27
@@ -792,28 +792,28 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt
; CHECK-NEXT: s_addc_u32 s13, s13, 0
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13
-; CHECK-NEXT: s_load_dwordx2 s[46:47], s[8:9], 0x10
+; CHECK-NEXT: s_load_dwordx2 s[62:63], s[8:9], 0x10
; CHECK-NEXT: s_add_u32 s0, s0, s17
-; CHECK-NEXT: s_mov_b64 s[38:39], s[8:9]
+; CHECK-NEXT: s_mov_b64 s[46:47], s[8:9]
; CHECK-NEXT: s_addc_u32 s1, s1, 0
; CHECK-NEXT: v_mov_b32_e32 v40, v0
-; CHECK-NEXT: s_add_u32 s44, s38, 40
+; CHECK-NEXT: s_add_u32 s52, s46, 40
; CHECK-NEXT: v_mov_b32_e32 v31, v0
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: s_mov_b32 s33, s16
-; CHECK-NEXT: s_addc_u32 s45, s39, 0
-; CHECK-NEXT: s_mov_b32 s43, s14
+; CHECK-NEXT: s_addc_u32 s53, s47, 0
+; CHECK-NEXT: s_mov_b32 s51, s14
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z13get_global_idj at rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z13get_global_idj at rel32@hi+12
-; CHECK-NEXT: s_mov_b64 s[8:9], s[44:45]
+; CHECK-NEXT: s_mov_b64 s[8:9], s[52:53]
; CHECK-NEXT: s_mov_b32 s12, s14
; CHECK-NEXT: s_mov_b32 s13, s15
; CHECK-NEXT: s_mov_b32 s14, s33
-; CHECK-NEXT: s_mov_b32 s42, s15
+; CHECK-NEXT: s_mov_b32 s50, s15
; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11]
; CHECK-NEXT: s_mov_b64 s[36:37], s[6:7]
-; CHECK-NEXT: s_mov_b64 s[40:41], s[4:5]
+; CHECK-NEXT: s_mov_b64 s[48:49], s[4:5]
; CHECK-NEXT: v_mov_b32_e32 v43, 0
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_mov_b32_e32 v42, v0
@@ -822,12 +822,12 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z12get_local_idj at rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z12get_local_idj at rel32@hi+12
-; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
+; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
; CHECK-NEXT: s_mov_b64 s[6:7], s[36:37]
-; CHECK-NEXT: s_mov_b64 s[8:9], s[44:45]
+; CHECK-NEXT: s_mov_b64 s[8:9], s[52:53]
; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
-; CHECK-NEXT: s_mov_b32 s12, s43
-; CHECK-NEXT: s_mov_b32 s13, s42
+; CHECK-NEXT: s_mov_b32 s12, s51
+; CHECK-NEXT: s_mov_b32 s13, s50
; CHECK-NEXT: s_mov_b32 s14, s33
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_mul_lo_u32 v46, v0, 14
@@ -836,12 +836,12 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z7barrierj at rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z7barrierj at rel32@hi+12
-; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
+; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
; CHECK-NEXT: s_mov_b64 s[6:7], s[36:37]
-; CHECK-NEXT: s_mov_b64 s[8:9], s[44:45]
+; CHECK-NEXT: s_mov_b64 s[8:9], s[52:53]
; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
-; CHECK-NEXT: s_mov_b32 s12, s43
-; CHECK-NEXT: s_mov_b32 s13, s42
+; CHECK-NEXT: s_mov_b32 s12, s51
+; CHECK-NEXT: s_mov_b32 s13, s50
; CHECK-NEXT: s_mov_b32 s14, s33
; CHECK-NEXT: ds_write_b32 v43, v43 offset:15360
; CHECK-NEXT: v_add_nc_u32_e32 v44, 0x3c04, v46
@@ -852,15 +852,15 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z3minjj at rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z3minjj at rel32@hi+12
-; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
+; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
; CHECK-NEXT: v_and_b32_e32 v0, 0x7ffffffc, v0
; CHECK-NEXT: v_and_b32_e32 v1, 28, v1
; CHECK-NEXT: s_mov_b64 s[6:7], s[36:37]
-; CHECK-NEXT: s_mov_b64 s[8:9], s[44:45]
+; CHECK-NEXT: s_mov_b64 s[8:9], s[52:53]
; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
-; CHECK-NEXT: global_load_dword v0, v0, s[46:47]
-; CHECK-NEXT: s_mov_b32 s12, s43
-; CHECK-NEXT: s_mov_b32 s13, s42
+; CHECK-NEXT: global_load_dword v0, v0, s[62:63]
+; CHECK-NEXT: s_mov_b32 s12, s51
+; CHECK-NEXT: s_mov_b32 s13, s50
; CHECK-NEXT: s_mov_b32 s14, s33
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: v_bfe_u32 v0, v0, v1, 4
@@ -868,7 +868,7 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_mov_b32_e32 v41, v0
; CHECK-NEXT: v_lshlrev_b32_e32 v42, 10, v42
-; CHECK-NEXT: s_mov_b32 s44, 0
+; CHECK-NEXT: s_mov_b32 s52, 0
; CHECK-NEXT: s_mov_b32 s4, 0
; CHECK-NEXT: ds_write_b8 v46, v43 offset:15364
; CHECK-NEXT: v_add_nc_u32_e32 v45, -1, v41
@@ -878,12 +878,12 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt
; CHECK-NEXT: ; Child Loop BB1_8 Depth 2
; CHECK-NEXT: v_add_nc_u32_e32 v0, s4, v44
; CHECK-NEXT: s_lshl_b32 s5, s4, 5
-; CHECK-NEXT: s_add_i32 s45, s4, 1
+; CHECK-NEXT: s_add_i32 s53, s4, 1
; CHECK-NEXT: s_add_i32 s6, s4, 5
-; CHECK-NEXT: v_or3_b32 v47, s5, v42, s45
+; CHECK-NEXT: v_or3_b32 v47, s5, v42, s53
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: ds_read_u8 v46, v0
-; CHECK-NEXT: v_mov_b32_e32 v56, s45
+; CHECK-NEXT: v_mov_b32_e32 v56, s53
; CHECK-NEXT: s_mov_b32 s5, exec_lo
; CHECK-NEXT: v_cmpx_lt_u32_e64 s6, v41
; CHECK-NEXT: s_cbranch_execz .LBB1_5
@@ -912,23 +912,23 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt
; CHECK-NEXT: .LBB1_5: ; %Flow4
; CHECK-NEXT: ; in Loop: Header=BB1_1 Depth=1
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s5
-; CHECK-NEXT: s_mov_b32 s46, exec_lo
+; CHECK-NEXT: s_mov_b32 s62, exec_lo
; CHECK-NEXT: v_cmpx_lt_u32_e64 v56, v41
; CHECK-NEXT: s_cbranch_execz .LBB1_11
; CHECK-NEXT: ; %bb.6: ; %.103.preheader
; CHECK-NEXT: ; in Loop: Header=BB1_1 Depth=1
-; CHECK-NEXT: s_mov_b32 s47, 0
+; CHECK-NEXT: s_mov_b32 s63, 0
; CHECK-NEXT: s_inst_prefetch 0x1
; CHECK-NEXT: s_branch .LBB1_8
; CHECK-NEXT: .p2align 6
; CHECK-NEXT: .LBB1_7: ; %.114
; CHECK-NEXT: ; in Loop: Header=BB1_8 Depth=2
-; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s48
+; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s64
; CHECK-NEXT: v_add_nc_u32_e32 v56, 1, v56
; CHECK-NEXT: v_add_nc_u32_e32 v47, 1, v47
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc_lo, v56, v41
-; CHECK-NEXT: s_or_b32 s47, vcc_lo, s47
-; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s47
+; CHECK-NEXT: s_or_b32 s63, vcc_lo, s63
+; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s63
; CHECK-NEXT: s_cbranch_execz .LBB1_10
; CHECK-NEXT: .LBB1_8: ; %.103
; CHECK-NEXT: ; Parent Loop BB1_1 Depth=1
@@ -937,22 +937,22 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt
; CHECK-NEXT: ds_read_u8 v0, v0
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: v_cmp_eq_u16_sdwa s4, v46, v0 src0_sel:BYTE_0 src1_sel:DWORD
-; CHECK-NEXT: s_and_saveexec_b32 s48, s4
+; CHECK-NEXT: s_and_saveexec_b32 s64, s4
; CHECK-NEXT: s_cbranch_execz .LBB1_7
; CHECK-NEXT: ; %bb.9: ; %.110
; CHECK-NEXT: ; in Loop: Header=BB1_8 Depth=2
; CHECK-NEXT: v_mov_b32_e32 v31, v40
; CHECK-NEXT: v_mov_b32_e32 v0, 0x3c00
-; CHECK-NEXT: s_add_u32 s8, s38, 40
-; CHECK-NEXT: s_addc_u32 s9, s39, 0
+; CHECK-NEXT: s_add_u32 s8, s46, 40
+; CHECK-NEXT: s_addc_u32 s9, s47, 0
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_incPU3AS3Vj at rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_incPU3AS3Vj at rel32@hi+12
-; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
+; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
; CHECK-NEXT: s_mov_b64 s[6:7], s[36:37]
; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
-; CHECK-NEXT: s_mov_b32 s12, s43
-; CHECK-NEXT: s_mov_b32 s13, s42
+; CHECK-NEXT: s_mov_b32 s12, s51
+; CHECK-NEXT: s_mov_b32 s13, s50
; CHECK-NEXT: s_mov_b32 s14, s33
; CHECK-NEXT: v_add_nc_u32_e32 v43, 1, v43
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -962,34 +962,34 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt
; CHECK-NEXT: .LBB1_10: ; %Flow
; CHECK-NEXT: ; in Loop: Header=BB1_1 Depth=1
; CHECK-NEXT: s_inst_prefetch 0x2
-; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s47
+; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s63
; CHECK-NEXT: .LBB1_11: ; %Flow2
; CHECK-NEXT: ; in Loop: Header=BB1_1 Depth=1
-; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s46
+; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s62
; CHECK-NEXT: ; %bb.12: ; %.32
; CHECK-NEXT: ; in Loop: Header=BB1_1 Depth=1
-; CHECK-NEXT: v_cmp_ge_u32_e32 vcc_lo, s45, v45
+; CHECK-NEXT: v_cmp_ge_u32_e32 vcc_lo, s53, v45
; CHECK-NEXT: v_cmp_lt_u32_e64 s4, 59, v43
; CHECK-NEXT: s_or_b32 s4, vcc_lo, s4
; CHECK-NEXT: s_and_b32 s4, exec_lo, s4
-; CHECK-NEXT: s_or_b32 s44, s4, s44
-; CHECK-NEXT: s_mov_b32 s4, s45
-; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s44
+; CHECK-NEXT: s_or_b32 s52, s4, s52
+; CHECK-NEXT: s_mov_b32 s4, s53
+; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s52
; CHECK-NEXT: s_cbranch_execnz .LBB1_1
; CHECK-NEXT: ; %bb.13: ; %.119
-; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s44
+; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s52
; CHECK-NEXT: v_mov_b32_e32 v31, v40
; CHECK-NEXT: v_mov_b32_e32 v0, 1
-; CHECK-NEXT: s_add_u32 s8, s38, 40
-; CHECK-NEXT: s_addc_u32 s9, s39, 0
+; CHECK-NEXT: s_add_u32 s8, s46, 40
+; CHECK-NEXT: s_addc_u32 s9, s47, 0
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z7barrierj at rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z7barrierj at rel32@hi+12
-; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
+; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
; CHECK-NEXT: s_mov_b64 s[6:7], s[36:37]
; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
-; CHECK-NEXT: s_mov_b32 s12, s43
-; CHECK-NEXT: s_mov_b32 s13, s42
+; CHECK-NEXT: s_mov_b32 s12, s51
+; CHECK-NEXT: s_mov_b32 s13, s50
; CHECK-NEXT: s_mov_b32 s14, s33
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.gfx10.ll b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.gfx10.ll
index b4682dfb8a26d..4ca00f2daf97a 100644
--- a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.gfx10.ll
+++ b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.gfx10.ll
@@ -12,13 +12,7 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 {
; GFX10_1-LABEL: scalar_mov_materializes_frame_index_unavailable_scc:
; GFX10_1: ; %bb.0:
; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1
-; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80880
-; GFX10_1-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill
-; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10_1-NEXT: s_mov_b32 exec_lo, s4
; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32
-; GFX10_1-NEXT: v_writelane_b32 v1, s59, 0
; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo
; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0
; GFX10_1-NEXT: ;;#ASMSTART
@@ -30,24 +24,12 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 {
; GFX10_1-NEXT: ;;#ASMSTART
; GFX10_1-NEXT: ; use s59, scc
; GFX10_1-NEXT: ;;#ASMEND
-; GFX10_1-NEXT: v_readlane_b32 s59, v1, 0
-; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1
-; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80880
-; GFX10_1-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload
-; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10_1-NEXT: s_mov_b32 exec_lo, s4
-; GFX10_1-NEXT: s_waitcnt vmcnt(0)
; GFX10_1-NEXT: s_setpc_b64 s[30:31]
;
; GFX10_3-LABEL: scalar_mov_materializes_frame_index_unavailable_scc:
; GFX10_3: ; %bb.0:
; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1
-; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80880
-; GFX10_3-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill
-; GFX10_3-NEXT: s_mov_b32 exec_lo, s4
; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32
-; GFX10_3-NEXT: v_writelane_b32 v1, s59, 0
; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo
; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0
; GFX10_3-NEXT: ;;#ASMSTART
@@ -59,23 +41,13 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 {
; GFX10_3-NEXT: ;;#ASMSTART
; GFX10_3-NEXT: ; use s59, scc
; GFX10_3-NEXT: ;;#ASMEND
-; GFX10_3-NEXT: v_readlane_b32 s59, v1, 0
-; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1
-; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80880
-; GFX10_3-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload
-; GFX10_3-NEXT: s_mov_b32 exec_lo, s4
-; GFX10_3-NEXT: s_waitcnt vmcnt(0)
; GFX10_3-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: scalar_mov_materializes_frame_index_unavailable_scc:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
-; GFX11-NEXT: s_add_i32 s1, s32, 0x4044
-; GFX11-NEXT: scratch_store_b32 off, v1, s1 ; 4-byte Folded Spill
-; GFX11-NEXT: s_mov_b32 exec_lo, s0
; GFX11-NEXT: s_add_i32 s0, s32, 64
-; GFX11-NEXT: v_writelane_b32 v1, s59, 0
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: s_and_b32 s0, 0, exec_lo
; GFX11-NEXT: s_addc_u32 s0, s32, 0x4040
@@ -89,12 +61,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 {
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s59, scc
; GFX11-NEXT: ;;#ASMEND
-; GFX11-NEXT: v_readlane_b32 s59, v1, 0
-; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
-; GFX11-NEXT: s_add_i32 s1, s32, 0x4044
-; GFX11-NEXT: scratch_load_b32 v1, off, s1 ; 4-byte Folded Reload
-; GFX11-NEXT: s_mov_b32 exec_lo, s0
-; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: scalar_mov_materializes_frame_index_unavailable_scc:
@@ -104,13 +70,7 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 {
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: s_xor_saveexec_b32 s0, -1
-; GFX12-NEXT: scratch_store_b32 off, v1, s32 offset:16388 ; 4-byte Folded Spill
-; GFX12-NEXT: s_wait_alu 0xfffe
-; GFX12-NEXT: s_mov_b32 exec_lo, s0
-; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX12-NEXT: s_and_b32 s0, 0, exec_lo
-; GFX12-NEXT: v_writelane_b32 v1, s59, 0
; GFX12-NEXT: s_add_co_ci_u32 s0, s32, 0x4000
; GFX12-NEXT: v_mov_b32_e32 v0, s32
; GFX12-NEXT: s_wait_alu 0xfffe
@@ -124,50 +84,30 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 {
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s59, scc
; GFX12-NEXT: ;;#ASMEND
-; GFX12-NEXT: v_readlane_b32 s59, v1, 0
-; GFX12-NEXT: s_xor_saveexec_b32 s0, -1
-; GFX12-NEXT: scratch_load_b32 v1, off, s32 offset:16388 ; 4-byte Folded Reload
; GFX12-NEXT: s_wait_alu 0xfffe
-; GFX12-NEXT: s_mov_b32 exec_lo, s0
-; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: scalar_mov_materializes_frame_index_unavailable_scc:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX8-NEXT: s_add_i32 s6, s32, 0x101100
-; GFX8-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill
-; GFX8-NEXT: s_mov_b64 exec, s[4:5]
; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0
-; GFX8-NEXT: v_writelane_b32 v1, s59, 0
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use alloca0 v0
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32
; GFX8-NEXT: s_movk_i32 s59, 0x4040
; GFX8-NEXT: v_add_u32_e32 v0, vcc, s59, v0
-; GFX8-NEXT: v_readfirstlane_b32 s59, v0
; GFX8-NEXT: s_and_b64 s[4:5], 0, exec
+; GFX8-NEXT: v_readfirstlane_b32 s59, v0
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use s59, scc
; GFX8-NEXT: ;;#ASMEND
-; GFX8-NEXT: v_readlane_b32 s59, v1, 0
-; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX8-NEXT: s_add_i32 s6, s32, 0x101100
-; GFX8-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload
-; GFX8-NEXT: s_mov_b64 exec, s[4:5]
-; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-LABEL: scalar_mov_materializes_frame_index_unavailable_scc:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX900-NEXT: s_add_i32 s6, s32, 0x101100
-; GFX900-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill
-; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32
; GFX900-NEXT: v_add_u32_e32 v0, 64, v0
; GFX900-NEXT: ;;#ASMSTART
@@ -175,47 +115,29 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32
; GFX900-NEXT: v_add_u32_e32 v0, 0x4040, v0
-; GFX900-NEXT: v_writelane_b32 v1, s59, 0
-; GFX900-NEXT: v_readfirstlane_b32 s59, v0
; GFX900-NEXT: s_and_b64 s[4:5], 0, exec
+; GFX900-NEXT: v_readfirstlane_b32 s59, v0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s59, scc
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_readlane_b32 s59, v1, 0
-; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX900-NEXT: s_add_i32 s6, s32, 0x101100
-; GFX900-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload
-; GFX900-NEXT: s_mov_b64 exec, s[4:5]
-; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: scalar_mov_materializes_frame_index_unavailable_scc:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
-; GFX942-NEXT: s_add_i32 s2, s32, 0x4044
-; GFX942-NEXT: scratch_store_dword off, v1, s2 ; 4-byte Folded Spill
-; GFX942-NEXT: s_mov_b64 exec, s[0:1]
; GFX942-NEXT: s_add_i32 s0, s32, 64
; GFX942-NEXT: v_mov_b32_e32 v0, s0
; GFX942-NEXT: s_and_b64 s[0:1], 0, exec
; GFX942-NEXT: s_addc_u32 s0, s32, 0x4040
; GFX942-NEXT: s_bitcmp1_b32 s0, 0
; GFX942-NEXT: s_bitset0_b32 s0, 0
-; GFX942-NEXT: v_writelane_b32 v1, s59, 0
-; GFX942-NEXT: s_mov_b32 s59, s0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use alloca0 v0
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s59, s0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s59, scc
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_readlane_b32 s59, v1, 0
-; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
-; GFX942-NEXT: s_add_i32 s2, s32, 0x4044
-; GFX942-NEXT: scratch_load_dword v1, off, s2 ; 4-byte Folded Reload
-; GFX942-NEXT: s_mov_b64 exec, s[0:1]
-; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%alloca0 = alloca [4096 x i32], align 64, addrspace(5)
%alloca1 = alloca i32, align 4, addrspace(5)
@@ -230,12 +152,6 @@ define void @scalar_mov_materializes_frame_index_dead_scc() #0 {
; GFX10_1-LABEL: scalar_mov_materializes_frame_index_dead_scc:
; GFX10_1: ; %bb.0:
; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1
-; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80880
-; GFX10_1-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill
-; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10_1-NEXT: s_mov_b32 exec_lo, s4
-; GFX10_1-NEXT: v_writelane_b32 v1, s59, 0
; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32
; GFX10_1-NEXT: s_lshr_b32 s59, s32, 5
; GFX10_1-NEXT: s_addk_i32 s59, 0x4040
@@ -246,23 +162,11 @@ define void @scalar_mov_materializes_frame_index_dead_scc() #0 {
; GFX10_1-NEXT: ;;#ASMSTART
; GFX10_1-NEXT: ; use s59
; GFX10_1-NEXT: ;;#ASMEND
-; GFX10_1-NEXT: v_readlane_b32 s59, v1, 0
-; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1
-; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80880
-; GFX10_1-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload
-; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10_1-NEXT: s_mov_b32 exec_lo, s4
-; GFX10_1-NEXT: s_waitcnt vmcnt(0)
; GFX10_1-NEXT: s_setpc_b64 s[30:31]
;
; GFX10_3-LABEL: scalar_mov_materializes_frame_index_dead_scc:
; GFX10_3: ; %bb.0:
; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1
-; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80880
-; GFX10_3-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill
-; GFX10_3-NEXT: s_mov_b32 exec_lo, s4
-; GFX10_3-NEXT: v_writelane_b32 v1, s59, 0
; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32
; GFX10_3-NEXT: s_lshr_b32 s59, s32, 5
; GFX10_3-NEXT: s_addk_i32 s59, 0x4040
@@ -273,22 +177,11 @@ define void @scalar_mov_materializes_frame_index_dead_scc() #0 {
; GFX10_3-NEXT: ;;#ASMSTART
; GFX10_3-NEXT: ; use s59
; GFX10_3-NEXT: ;;#ASMEND
-; GFX10_3-NEXT: v_readlane_b32 s59, v1, 0
-; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1
-; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80880
-; GFX10_3-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload
-; GFX10_3-NEXT: s_mov_b32 exec_lo, s4
-; GFX10_3-NEXT: s_waitcnt vmcnt(0)
; GFX10_3-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: scalar_mov_materializes_frame_index_dead_scc:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
-; GFX11-NEXT: s_add_i32 s1, s32, 0x4044
-; GFX11-NEXT: scratch_store_b32 off, v1, s1 ; 4-byte Folded Spill
-; GFX11-NEXT: s_mov_b32 exec_lo, s0
-; GFX11-NEXT: v_writelane_b32 v1, s59, 0
; GFX11-NEXT: s_add_i32 s0, s32, 64
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_mov_b32_e32 v0, s0
@@ -300,12 +193,6 @@ define void @scalar_mov_materializes_frame_index_dead_scc() #0 {
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s59
; GFX11-NEXT: ;;#ASMEND
-; GFX11-NEXT: v_readlane_b32 s59, v1, 0
-; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
-; GFX11-NEXT: s_add_i32 s1, s32, 0x4044
-; GFX11-NEXT: scratch_load_b32 v1, off, s1 ; 4-byte Folded Reload
-; GFX11-NEXT: s_mov_b32 exec_lo, s0
-; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: scalar_mov_materializes_frame_index_dead_scc:
@@ -315,105 +202,62 @@ define void @scalar_mov_materializes_frame_index_dead_scc() #0 {
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: s_xor_saveexec_b32 s0, -1
-; GFX12-NEXT: scratch_store_b32 off, v1, s32 offset:16388 ; 4-byte Folded Spill
-; GFX12-NEXT: s_wait_alu 0xfffe
-; GFX12-NEXT: s_mov_b32 exec_lo, s0
-; GFX12-NEXT: v_writelane_b32 v1, s59, 0
; GFX12-NEXT: s_add_co_i32 s0, s32, 0x4000
; GFX12-NEXT: v_mov_b32_e32 v0, s32
-; GFX12-NEXT: s_wait_alu 0xfffe
-; GFX12-NEXT: s_mov_b32 s59, s0
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use alloca0 v0
; GFX12-NEXT: ;;#ASMEND
+; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: s_mov_b32 s59, s0
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s59
; GFX12-NEXT: ;;#ASMEND
-; GFX12-NEXT: v_readlane_b32 s59, v1, 0
-; GFX12-NEXT: s_xor_saveexec_b32 s0, -1
-; GFX12-NEXT: scratch_load_b32 v1, off, s32 offset:16388 ; 4-byte Folded Reload
; GFX12-NEXT: s_wait_alu 0xfffe
-; GFX12-NEXT: s_mov_b32 exec_lo, s0
-; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: scalar_mov_materializes_frame_index_dead_scc:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX8-NEXT: s_add_i32 s6, s32, 0x101100
-; GFX8-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill
-; GFX8-NEXT: s_mov_b64 exec, s[4:5]
-; GFX8-NEXT: v_writelane_b32 v1, s59, 0
-; GFX8-NEXT: s_lshr_b32 s59, s32, 6
; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32
-; GFX8-NEXT: s_addk_i32 s59, 0x4040
+; GFX8-NEXT: s_lshr_b32 s59, s32, 6
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use alloca0 v0
; GFX8-NEXT: ;;#ASMEND
+; GFX8-NEXT: s_addk_i32 s59, 0x4040
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use s59
; GFX8-NEXT: ;;#ASMEND
-; GFX8-NEXT: v_readlane_b32 s59, v1, 0
-; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX8-NEXT: s_add_i32 s6, s32, 0x101100
-; GFX8-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload
-; GFX8-NEXT: s_mov_b64 exec, s[4:5]
-; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-LABEL: scalar_mov_materializes_frame_index_dead_scc:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX900-NEXT: s_add_i32 s6, s32, 0x101100
-; GFX900-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill
-; GFX900-NEXT: s_mov_b64 exec, s[4:5]
-; GFX900-NEXT: v_writelane_b32 v1, s59, 0
-; GFX900-NEXT: s_lshr_b32 s59, s32, 6
; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32
-; GFX900-NEXT: s_addk_i32 s59, 0x4040
+; GFX900-NEXT: s_lshr_b32 s59, s32, 6
; GFX900-NEXT: v_add_u32_e32 v0, 64, v0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use alloca0 v0
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_addk_i32 s59, 0x4040
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s59
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_readlane_b32 s59, v1, 0
-; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX900-NEXT: s_add_i32 s6, s32, 0x101100
-; GFX900-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload
-; GFX900-NEXT: s_mov_b64 exec, s[4:5]
-; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: scalar_mov_materializes_frame_index_dead_scc:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
-; GFX942-NEXT: s_add_i32 s2, s32, 0x4044
-; GFX942-NEXT: scratch_store_dword off, v1, s2 ; 4-byte Folded Spill
-; GFX942-NEXT: s_mov_b64 exec, s[0:1]
; GFX942-NEXT: s_add_i32 s0, s32, 64
; GFX942-NEXT: v_mov_b32_e32 v0, s0
; GFX942-NEXT: s_add_i32 s0, s32, 0x4040
-; GFX942-NEXT: v_writelane_b32 v1, s59, 0
-; GFX942-NEXT: s_mov_b32 s59, s0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use alloca0 v0
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s59, s0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s59
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_readlane_b32 s59, v1, 0
-; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
-; GFX942-NEXT: s_add_i32 s2, s32, 0x4044
-; GFX942-NEXT: scratch_load_dword v1, off, s2 ; 4-byte Folded Reload
-; GFX942-NEXT: s_mov_b64 exec, s[0:1]
-; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%alloca0 = alloca [4096 x i32], align 64, addrspace(5)
%alloca1 = alloca i32, align 4, addrspace(5)
@@ -428,14 +272,8 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 {
; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10_1-NEXT: s_mov_b32 s5, s33
; GFX10_1-NEXT: s_mov_b32 s33, s32
-; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1
-; GFX10_1-NEXT: s_add_i32 s6, s33, 0x80880
-; GFX10_1-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill
-; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10_1-NEXT: s_mov_b32 exec_lo, s4
-; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s33
-; GFX10_1-NEXT: v_writelane_b32 v1, s59, 0
; GFX10_1-NEXT: s_add_i32 s32, s32, 0x81000
+; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s33
; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo
; GFX10_1-NEXT: s_mov_b32 s32, s33
; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0
@@ -443,19 +281,12 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 {
; GFX10_1-NEXT: ; use alloca0 v0
; GFX10_1-NEXT: ;;#ASMEND
; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s33
+; GFX10_1-NEXT: s_mov_b32 s33, s5
; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 0x4040, v0
; GFX10_1-NEXT: v_readfirstlane_b32 s59, v0
; GFX10_1-NEXT: ;;#ASMSTART
; GFX10_1-NEXT: ; use s59, scc
; GFX10_1-NEXT: ;;#ASMEND
-; GFX10_1-NEXT: v_readlane_b32 s59, v1, 0
-; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1
-; GFX10_1-NEXT: s_add_i32 s6, s33, 0x80880
-; GFX10_1-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload
-; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10_1-NEXT: s_mov_b32 exec_lo, s4
-; GFX10_1-NEXT: s_mov_b32 s33, s5
-; GFX10_1-NEXT: s_waitcnt vmcnt(0)
; GFX10_1-NEXT: s_setpc_b64 s[30:31]
;
; GFX10_3-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_fp:
@@ -463,13 +294,8 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 {
; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10_3-NEXT: s_mov_b32 s5, s33
; GFX10_3-NEXT: s_mov_b32 s33, s32
-; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1
-; GFX10_3-NEXT: s_add_i32 s6, s33, 0x80880
-; GFX10_3-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill
-; GFX10_3-NEXT: s_mov_b32 exec_lo, s4
-; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s33
-; GFX10_3-NEXT: v_writelane_b32 v1, s59, 0
; GFX10_3-NEXT: s_add_i32 s32, s32, 0x81000
+; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s33
; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo
; GFX10_3-NEXT: s_mov_b32 s32, s33
; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0
@@ -477,18 +303,12 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 {
; GFX10_3-NEXT: ; use alloca0 v0
; GFX10_3-NEXT: ;;#ASMEND
; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s33
+; GFX10_3-NEXT: s_mov_b32 s33, s5
; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 0x4040, v0
; GFX10_3-NEXT: v_readfirstlane_b32 s59, v0
; GFX10_3-NEXT: ;;#ASMSTART
; GFX10_3-NEXT: ; use s59, scc
; GFX10_3-NEXT: ;;#ASMEND
-; GFX10_3-NEXT: v_readlane_b32 s59, v1, 0
-; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1
-; GFX10_3-NEXT: s_add_i32 s6, s33, 0x80880
-; GFX10_3-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload
-; GFX10_3-NEXT: s_mov_b32 exec_lo, s4
-; GFX10_3-NEXT: s_mov_b32 s33, s5
-; GFX10_3-NEXT: s_waitcnt vmcnt(0)
; GFX10_3-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_fp:
@@ -496,13 +316,9 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 {
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s1, s33
; GFX11-NEXT: s_mov_b32 s33, s32
-; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
-; GFX11-NEXT: s_add_i32 s2, s33, 0x4044
-; GFX11-NEXT: scratch_store_b32 off, v1, s2 ; 4-byte Folded Spill
-; GFX11-NEXT: s_mov_b32 exec_lo, s0
; GFX11-NEXT: s_addk_i32 s32, 0x4080
; GFX11-NEXT: s_add_i32 s0, s33, 64
-; GFX11-NEXT: v_writelane_b32 v1, s59, 0
+; GFX11-NEXT: s_mov_b32 s32, s33
; GFX11-NEXT: v_mov_b32_e32 v0, s0
; GFX11-NEXT: s_and_b32 s0, 0, exec_lo
; GFX11-NEXT: s_addc_u32 s0, s33, 0x4040
@@ -511,18 +327,11 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 {
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: s_bitcmp1_b32 s0, 0
; GFX11-NEXT: s_bitset0_b32 s0, 0
-; GFX11-NEXT: s_mov_b32 s32, s33
+; GFX11-NEXT: s_mov_b32 s33, s1
; GFX11-NEXT: s_mov_b32 s59, s0
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s59, scc
; GFX11-NEXT: ;;#ASMEND
-; GFX11-NEXT: v_readlane_b32 s59, v1, 0
-; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
-; GFX11-NEXT: s_add_i32 s2, s33, 0x4044
-; GFX11-NEXT: scratch_load_b32 v1, off, s2 ; 4-byte Folded Reload
-; GFX11-NEXT: s_mov_b32 exec_lo, s0
-; GFX11-NEXT: s_mov_b32 s33, s1
-; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_fp:
@@ -534,13 +343,9 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 {
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_mov_b32 s1, s33
; GFX12-NEXT: s_mov_b32 s33, s32
-; GFX12-NEXT: s_xor_saveexec_b32 s0, -1
-; GFX12-NEXT: scratch_store_b32 off, v1, s33 offset:16388 ; 4-byte Folded Spill
-; GFX12-NEXT: s_wait_alu 0xfffe
-; GFX12-NEXT: s_mov_b32 exec_lo, s0
; GFX12-NEXT: s_addk_co_i32 s32, 0x4040
; GFX12-NEXT: s_and_b32 s0, 0, exec_lo
-; GFX12-NEXT: v_writelane_b32 v1, s59, 0
+; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: s_add_co_ci_u32 s0, s33, 0x4000
; GFX12-NEXT: v_mov_b32_e32 v0, s33
; GFX12-NEXT: s_wait_alu 0xfffe
@@ -554,14 +359,8 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 {
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s59, scc
; GFX12-NEXT: ;;#ASMEND
-; GFX12-NEXT: v_readlane_b32 s59, v1, 0
; GFX12-NEXT: s_mov_b32 s32, s33
-; GFX12-NEXT: s_xor_saveexec_b32 s0, -1
-; GFX12-NEXT: scratch_load_b32 v1, off, s33 offset:16388 ; 4-byte Folded Reload
-; GFX12-NEXT: s_wait_alu 0xfffe
-; GFX12-NEXT: s_mov_b32 exec_lo, s0
; GFX12-NEXT: s_mov_b32 s33, s1
-; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
@@ -570,33 +369,22 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 {
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: s_mov_b32 s6, s33
; GFX8-NEXT: s_mov_b32 s33, s32
-; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX8-NEXT: s_add_i32 s7, s33, 0x101100
-; GFX8-NEXT: buffer_store_dword v1, off, s[0:3], s7 ; 4-byte Folded Spill
-; GFX8-NEXT: s_mov_b64 exec, s[4:5]
; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s33
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0
-; GFX8-NEXT: v_writelane_b32 v1, s59, 0
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use alloca0 v0
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s33
; GFX8-NEXT: s_movk_i32 s59, 0x4040
-; GFX8-NEXT: v_add_u32_e32 v0, vcc, s59, v0
; GFX8-NEXT: s_add_i32 s32, s32, 0x102000
-; GFX8-NEXT: v_readfirstlane_b32 s59, v0
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, s59, v0
; GFX8-NEXT: s_and_b64 s[4:5], 0, exec
+; GFX8-NEXT: v_readfirstlane_b32 s59, v0
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use s59, scc
; GFX8-NEXT: ;;#ASMEND
-; GFX8-NEXT: v_readlane_b32 s59, v1, 0
; GFX8-NEXT: s_mov_b32 s32, s33
-; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX8-NEXT: s_add_i32 s7, s33, 0x101100
-; GFX8-NEXT: buffer_load_dword v1, off, s[0:3], s7 ; 4-byte Folded Reload
-; GFX8-NEXT: s_mov_b64 exec, s[4:5]
; GFX8-NEXT: s_mov_b32 s33, s6
-; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_fp:
@@ -604,32 +392,21 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 {
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_mov_b32 s6, s33
; GFX900-NEXT: s_mov_b32 s33, s32
-; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX900-NEXT: s_add_i32 s7, s33, 0x101100
-; GFX900-NEXT: buffer_store_dword v1, off, s[0:3], s7 ; 4-byte Folded Spill
-; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s33
; GFX900-NEXT: v_add_u32_e32 v0, 64, v0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use alloca0 v0
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s33
-; GFX900-NEXT: v_add_u32_e32 v0, 0x4040, v0
; GFX900-NEXT: s_add_i32 s32, s32, 0x102000
-; GFX900-NEXT: v_writelane_b32 v1, s59, 0
-; GFX900-NEXT: v_readfirstlane_b32 s59, v0
+; GFX900-NEXT: v_add_u32_e32 v0, 0x4040, v0
; GFX900-NEXT: s_and_b64 s[4:5], 0, exec
+; GFX900-NEXT: v_readfirstlane_b32 s59, v0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s59, scc
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_readlane_b32 s59, v1, 0
; GFX900-NEXT: s_mov_b32 s32, s33
-; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX900-NEXT: s_add_i32 s7, s33, 0x101100
-; GFX900-NEXT: buffer_load_dword v1, off, s[0:3], s7 ; 4-byte Folded Reload
-; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: s_mov_b32 s33, s6
-; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_fp:
@@ -637,10 +414,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 {
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: s_mov_b32 s2, s33
; GFX942-NEXT: s_mov_b32 s33, s32
-; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
-; GFX942-NEXT: s_add_i32 s3, s33, 0x4044
-; GFX942-NEXT: scratch_store_dword off, v1, s3 ; 4-byte Folded Spill
-; GFX942-NEXT: s_mov_b64 exec, s[0:1]
; GFX942-NEXT: s_addk_i32 s32, 0x4080
; GFX942-NEXT: s_add_i32 s0, s33, 64
; GFX942-NEXT: v_mov_b32_e32 v0, s0
@@ -648,22 +421,15 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 {
; GFX942-NEXT: s_addc_u32 s0, s33, 0x4040
; GFX942-NEXT: s_bitcmp1_b32 s0, 0
; GFX942-NEXT: s_bitset0_b32 s0, 0
-; GFX942-NEXT: v_writelane_b32 v1, s59, 0
-; GFX942-NEXT: s_mov_b32 s59, s0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use alloca0 v0
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s59, s0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s59, scc
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_readlane_b32 s59, v1, 0
; GFX942-NEXT: s_mov_b32 s32, s33
-; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
-; GFX942-NEXT: s_add_i32 s3, s33, 0x4044
-; GFX942-NEXT: scratch_load_dword v1, off, s3 ; 4-byte Folded Reload
-; GFX942-NEXT: s_mov_b64 exec, s[0:1]
; GFX942-NEXT: s_mov_b32 s33, s2
-; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%alloca0 = alloca [4096 x i32], align 64, addrspace(5)
%alloca1 = alloca i32, align 4, addrspace(5)
@@ -676,75 +442,39 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset()
; GFX10_1-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset:
; GFX10_1: ; %bb.0:
; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1
-; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80800
-; GFX10_1-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill
-; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10_1-NEXT: s_mov_b32 exec_lo, s4
-; GFX10_1-NEXT: v_lshrrev_b32_e64 v1, 5, s32
-; GFX10_1-NEXT: v_writelane_b32 v0, s59, 0
+; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32
; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo
-; GFX10_1-NEXT: v_add_nc_u32_e32 v1, 64, v1
-; GFX10_1-NEXT: v_readfirstlane_b32 s59, v1
+; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0
+; GFX10_1-NEXT: v_readfirstlane_b32 s59, v0
; GFX10_1-NEXT: ;;#ASMSTART
; GFX10_1-NEXT: ; use s59, scc
; GFX10_1-NEXT: ;;#ASMEND
-; GFX10_1-NEXT: v_readlane_b32 s59, v0, 0
-; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1
-; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80800
-; GFX10_1-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload
-; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10_1-NEXT: s_mov_b32 exec_lo, s4
-; GFX10_1-NEXT: s_waitcnt vmcnt(0)
; GFX10_1-NEXT: s_setpc_b64 s[30:31]
;
; GFX10_3-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset:
; GFX10_3: ; %bb.0:
; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1
-; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80800
-; GFX10_3-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill
-; GFX10_3-NEXT: s_mov_b32 exec_lo, s4
-; GFX10_3-NEXT: v_lshrrev_b32_e64 v1, 5, s32
-; GFX10_3-NEXT: v_writelane_b32 v0, s59, 0
+; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32
; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo
-; GFX10_3-NEXT: v_add_nc_u32_e32 v1, 64, v1
-; GFX10_3-NEXT: v_readfirstlane_b32 s59, v1
+; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0
+; GFX10_3-NEXT: v_readfirstlane_b32 s59, v0
; GFX10_3-NEXT: ;;#ASMSTART
; GFX10_3-NEXT: ; use s59, scc
; GFX10_3-NEXT: ;;#ASMEND
-; GFX10_3-NEXT: v_readlane_b32 s59, v0, 0
-; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1
-; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80800
-; GFX10_3-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload
-; GFX10_3-NEXT: s_mov_b32 exec_lo, s4
-; GFX10_3-NEXT: s_waitcnt vmcnt(0)
; GFX10_3-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
-; GFX11-NEXT: s_add_i32 s1, s32, 0x4040
-; GFX11-NEXT: scratch_store_b32 off, v0, s1 ; 4-byte Folded Spill
-; GFX11-NEXT: s_mov_b32 exec_lo, s0
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: s_and_b32 s0, 0, exec_lo
-; GFX11-NEXT: v_writelane_b32 v0, s59, 0
; GFX11-NEXT: s_addc_u32 s0, s32, 64
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: s_bitcmp1_b32 s0, 0
; GFX11-NEXT: s_bitset0_b32 s0, 0
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_mov_b32 s59, s0
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s59, scc
; GFX11-NEXT: ;;#ASMEND
-; GFX11-NEXT: v_readlane_b32 s59, v0, 0
-; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
-; GFX11-NEXT: s_add_i32 s1, s32, 0x4040
-; GFX11-NEXT: scratch_load_b32 v0, off, s1 ; 4-byte Folded Reload
-; GFX11-NEXT: s_mov_b32 exec_lo, s0
-; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset:
@@ -754,94 +484,50 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset()
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: s_xor_saveexec_b32 s0, -1
-; GFX12-NEXT: scratch_store_b32 off, v0, s32 offset:16384 ; 4-byte Folded Spill
-; GFX12-NEXT: s_wait_alu 0xfffe
-; GFX12-NEXT: s_mov_b32 exec_lo, s0
-; GFX12-NEXT: v_writelane_b32 v0, s59, 0
-; GFX12-NEXT: s_mov_b32 s59, s32
; GFX12-NEXT: s_and_b32 s0, 0, exec_lo
+; GFX12-NEXT: s_mov_b32 s59, s32
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s59, scc
; GFX12-NEXT: ;;#ASMEND
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT: v_readlane_b32 s59, v0, 0
-; GFX12-NEXT: s_xor_saveexec_b32 s0, -1
-; GFX12-NEXT: scratch_load_b32 v0, off, s32 offset:16384 ; 4-byte Folded Reload
; GFX12-NEXT: s_wait_alu 0xfffe
-; GFX12-NEXT: s_mov_b32 exec_lo, s0
-; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX8-NEXT: s_add_i32 s6, s32, 0x101000
-; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill
-; GFX8-NEXT: s_mov_b64 exec, s[4:5]
-; GFX8-NEXT: v_writelane_b32 v0, s59, 0
-; GFX8-NEXT: v_lshrrev_b32_e64 v1, 6, s32
+; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32
; GFX8-NEXT: s_mov_b32 s59, 64
-; GFX8-NEXT: v_add_u32_e32 v1, vcc, s59, v1
-; GFX8-NEXT: v_readfirstlane_b32 s59, v1
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, s59, v0
; GFX8-NEXT: s_and_b64 s[4:5], 0, exec
+; GFX8-NEXT: v_readfirstlane_b32 s59, v0
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use s59, scc
; GFX8-NEXT: ;;#ASMEND
-; GFX8-NEXT: v_readlane_b32 s59, v0, 0
-; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX8-NEXT: s_add_i32 s6, s32, 0x101000
-; GFX8-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload
-; GFX8-NEXT: s_mov_b64 exec, s[4:5]
-; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX900-NEXT: s_add_i32 s6, s32, 0x101000
-; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill
-; GFX900-NEXT: s_mov_b64 exec, s[4:5]
-; GFX900-NEXT: v_lshrrev_b32_e64 v1, 6, s32
-; GFX900-NEXT: v_add_u32_e32 v1, 64, v1
-; GFX900-NEXT: v_writelane_b32 v0, s59, 0
-; GFX900-NEXT: v_readfirstlane_b32 s59, v1
+; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32
+; GFX900-NEXT: v_add_u32_e32 v0, 64, v0
; GFX900-NEXT: s_and_b64 s[4:5], 0, exec
+; GFX900-NEXT: v_readfirstlane_b32 s59, v0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s59, scc
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_readlane_b32 s59, v0, 0
-; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX900-NEXT: s_add_i32 s6, s32, 0x101000
-; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload
-; GFX900-NEXT: s_mov_b64 exec, s[4:5]
-; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
-; GFX942-NEXT: s_add_i32 s2, s32, 0x4040
-; GFX942-NEXT: scratch_store_dword off, v0, s2 ; 4-byte Folded Spill
-; GFX942-NEXT: s_mov_b64 exec, s[0:1]
; GFX942-NEXT: s_and_b64 s[0:1], 0, exec
; GFX942-NEXT: s_addc_u32 s0, s32, 64
; GFX942-NEXT: s_bitcmp1_b32 s0, 0
; GFX942-NEXT: s_bitset0_b32 s0, 0
-; GFX942-NEXT: v_writelane_b32 v0, s59, 0
; GFX942-NEXT: s_mov_b32 s59, s0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s59, scc
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_readlane_b32 s59, v0, 0
-; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
-; GFX942-NEXT: s_add_i32 s2, s32, 0x4040
-; GFX942-NEXT: scratch_load_dword v0, off, s2 ; 4-byte Folded Reload
-; GFX942-NEXT: s_mov_b64 exec, s[0:1]
-; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%alloca0 = alloca [4096 x i32], align 64, addrspace(5)
call void asm sideeffect "; use $0, $1", "{s59},{scc}"(ptr addrspace(5) %alloca0, i32 0)
@@ -852,67 +538,32 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset() #0
; GFX10_1-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset:
; GFX10_1: ; %bb.0:
; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1
-; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80800
-; GFX10_1-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill
-; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10_1-NEXT: s_mov_b32 exec_lo, s4
-; GFX10_1-NEXT: v_writelane_b32 v0, s59, 0
; GFX10_1-NEXT: s_lshr_b32 s59, s32, 5
; GFX10_1-NEXT: s_add_i32 s59, s59, 64
; GFX10_1-NEXT: ;;#ASMSTART
; GFX10_1-NEXT: ; use s59
; GFX10_1-NEXT: ;;#ASMEND
-; GFX10_1-NEXT: v_readlane_b32 s59, v0, 0
-; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1
-; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80800
-; GFX10_1-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload
-; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10_1-NEXT: s_mov_b32 exec_lo, s4
-; GFX10_1-NEXT: s_waitcnt vmcnt(0)
; GFX10_1-NEXT: s_setpc_b64 s[30:31]
;
; GFX10_3-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset:
; GFX10_3: ; %bb.0:
; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1
-; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80800
-; GFX10_3-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill
-; GFX10_3-NEXT: s_mov_b32 exec_lo, s4
-; GFX10_3-NEXT: v_writelane_b32 v0, s59, 0
; GFX10_3-NEXT: s_lshr_b32 s59, s32, 5
; GFX10_3-NEXT: s_add_i32 s59, s59, 64
; GFX10_3-NEXT: ;;#ASMSTART
; GFX10_3-NEXT: ; use s59
; GFX10_3-NEXT: ;;#ASMEND
-; GFX10_3-NEXT: v_readlane_b32 s59, v0, 0
-; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1
-; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80800
-; GFX10_3-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload
-; GFX10_3-NEXT: s_mov_b32 exec_lo, s4
-; GFX10_3-NEXT: s_waitcnt vmcnt(0)
; GFX10_3-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
-; GFX11-NEXT: s_add_i32 s1, s32, 0x4040
-; GFX11-NEXT: scratch_store_b32 off, v0, s1 ; 4-byte Folded Spill
-; GFX11-NEXT: s_mov_b32 exec_lo, s0
-; GFX11-NEXT: v_writelane_b32 v0, s59, 0
; GFX11-NEXT: s_add_i32 s0, s32, 64
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_mov_b32 s59, s0
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s59
; GFX11-NEXT: ;;#ASMEND
-; GFX11-NEXT: v_readlane_b32 s59, v0, 0
-; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
-; GFX11-NEXT: s_add_i32 s1, s32, 0x4040
-; GFX11-NEXT: scratch_load_b32 v0, off, s1 ; 4-byte Folded Reload
-; GFX11-NEXT: s_mov_b32 exec_lo, s0
-; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset:
@@ -922,85 +573,41 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset() #0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: s_xor_saveexec_b32 s0, -1
-; GFX12-NEXT: scratch_store_b32 off, v0, s32 offset:16384 ; 4-byte Folded Spill
-; GFX12-NEXT: s_wait_alu 0xfffe
-; GFX12-NEXT: s_mov_b32 exec_lo, s0
-; GFX12-NEXT: v_writelane_b32 v0, s59, 0
; GFX12-NEXT: s_mov_b32 s59, s32
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s59
; GFX12-NEXT: ;;#ASMEND
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT: v_readlane_b32 s59, v0, 0
-; GFX12-NEXT: s_xor_saveexec_b32 s0, -1
-; GFX12-NEXT: scratch_load_b32 v0, off, s32 offset:16384 ; 4-byte Folded Reload
; GFX12-NEXT: s_wait_alu 0xfffe
-; GFX12-NEXT: s_mov_b32 exec_lo, s0
-; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX8-NEXT: s_add_i32 s6, s32, 0x101000
-; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill
-; GFX8-NEXT: s_mov_b64 exec, s[4:5]
-; GFX8-NEXT: v_writelane_b32 v0, s59, 0
; GFX8-NEXT: s_lshr_b32 s59, s32, 6
; GFX8-NEXT: s_add_i32 s59, s59, 64
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use s59
; GFX8-NEXT: ;;#ASMEND
-; GFX8-NEXT: v_readlane_b32 s59, v0, 0
-; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX8-NEXT: s_add_i32 s6, s32, 0x101000
-; GFX8-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload
-; GFX8-NEXT: s_mov_b64 exec, s[4:5]
-; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX900-NEXT: s_add_i32 s6, s32, 0x101000
-; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill
-; GFX900-NEXT: s_mov_b64 exec, s[4:5]
-; GFX900-NEXT: v_writelane_b32 v0, s59, 0
; GFX900-NEXT: s_lshr_b32 s59, s32, 6
; GFX900-NEXT: s_add_i32 s59, s59, 64
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s59
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_readlane_b32 s59, v0, 0
-; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX900-NEXT: s_add_i32 s6, s32, 0x101000
-; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload
-; GFX900-NEXT: s_mov_b64 exec, s[4:5]
-; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
-; GFX942-NEXT: s_add_i32 s2, s32, 0x4040
-; GFX942-NEXT: scratch_store_dword off, v0, s2 ; 4-byte Folded Spill
-; GFX942-NEXT: s_mov_b64 exec, s[0:1]
; GFX942-NEXT: s_add_i32 s0, s32, 64
-; GFX942-NEXT: v_writelane_b32 v0, s59, 0
; GFX942-NEXT: s_mov_b32 s59, s0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s59
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_readlane_b32 s59, v0, 0
-; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
-; GFX942-NEXT: s_add_i32 s2, s32, 0x4040
-; GFX942-NEXT: scratch_load_dword v0, off, s2 ; 4-byte Folded Reload
-; GFX942-NEXT: s_mov_b64 exec, s[0:1]
-; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%alloca0 = alloca [4096 x i32], align 64, addrspace(5)
call void asm sideeffect "; use $0", "{s59}"(ptr addrspace(5) %alloca0)
@@ -1013,29 +620,16 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp
; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10_1-NEXT: s_mov_b32 s5, s33
; GFX10_1-NEXT: s_mov_b32 s33, s32
-; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1
-; GFX10_1-NEXT: s_add_i32 s6, s33, 0x80800
-; GFX10_1-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill
-; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10_1-NEXT: s_mov_b32 exec_lo, s4
-; GFX10_1-NEXT: v_lshrrev_b32_e64 v1, 5, s33
-; GFX10_1-NEXT: v_writelane_b32 v0, s59, 0
-; GFX10_1-NEXT: s_add_i32 s32, s32, 0x81000
+; GFX10_1-NEXT: s_add_i32 s32, s32, 0x80800
+; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s33
; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo
; GFX10_1-NEXT: s_mov_b32 s32, s33
-; GFX10_1-NEXT: v_add_nc_u32_e32 v1, 64, v1
-; GFX10_1-NEXT: v_readfirstlane_b32 s59, v1
+; GFX10_1-NEXT: s_mov_b32 s33, s5
+; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0
+; GFX10_1-NEXT: v_readfirstlane_b32 s59, v0
; GFX10_1-NEXT: ;;#ASMSTART
; GFX10_1-NEXT: ; use s59, scc
; GFX10_1-NEXT: ;;#ASMEND
-; GFX10_1-NEXT: v_readlane_b32 s59, v0, 0
-; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1
-; GFX10_1-NEXT: s_add_i32 s6, s33, 0x80800
-; GFX10_1-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload
-; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10_1-NEXT: s_mov_b32 exec_lo, s4
-; GFX10_1-NEXT: s_mov_b32 s33, s5
-; GFX10_1-NEXT: s_waitcnt vmcnt(0)
; GFX10_1-NEXT: s_setpc_b64 s[30:31]
;
; GFX10_3-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp:
@@ -1043,27 +637,16 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp
; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10_3-NEXT: s_mov_b32 s5, s33
; GFX10_3-NEXT: s_mov_b32 s33, s32
-; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1
-; GFX10_3-NEXT: s_add_i32 s6, s33, 0x80800
-; GFX10_3-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill
-; GFX10_3-NEXT: s_mov_b32 exec_lo, s4
-; GFX10_3-NEXT: v_lshrrev_b32_e64 v1, 5, s33
-; GFX10_3-NEXT: v_writelane_b32 v0, s59, 0
-; GFX10_3-NEXT: s_add_i32 s32, s32, 0x81000
+; GFX10_3-NEXT: s_add_i32 s32, s32, 0x80800
+; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s33
; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo
; GFX10_3-NEXT: s_mov_b32 s32, s33
-; GFX10_3-NEXT: v_add_nc_u32_e32 v1, 64, v1
-; GFX10_3-NEXT: v_readfirstlane_b32 s59, v1
+; GFX10_3-NEXT: s_mov_b32 s33, s5
+; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0
+; GFX10_3-NEXT: v_readfirstlane_b32 s59, v0
; GFX10_3-NEXT: ;;#ASMSTART
; GFX10_3-NEXT: ; use s59, scc
; GFX10_3-NEXT: ;;#ASMEND
-; GFX10_3-NEXT: v_readlane_b32 s59, v0, 0
-; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1
-; GFX10_3-NEXT: s_add_i32 s6, s33, 0x80800
-; GFX10_3-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload
-; GFX10_3-NEXT: s_mov_b32 exec_lo, s4
-; GFX10_3-NEXT: s_mov_b32 s33, s5
-; GFX10_3-NEXT: s_waitcnt vmcnt(0)
; GFX10_3-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp:
@@ -1071,29 +654,17 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s1, s33
; GFX11-NEXT: s_mov_b32 s33, s32
-; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
-; GFX11-NEXT: s_add_i32 s2, s33, 0x4040
-; GFX11-NEXT: scratch_store_b32 off, v0, s2 ; 4-byte Folded Spill
-; GFX11-NEXT: s_mov_b32 exec_lo, s0
-; GFX11-NEXT: s_addk_i32 s32, 0x4080
+; GFX11-NEXT: s_addk_i32 s32, 0x4040
; GFX11-NEXT: s_and_b32 s0, 0, exec_lo
-; GFX11-NEXT: v_writelane_b32 v0, s59, 0
; GFX11-NEXT: s_addc_u32 s0, s33, 64
; GFX11-NEXT: s_mov_b32 s32, s33
; GFX11-NEXT: s_bitcmp1_b32 s0, 0
; GFX11-NEXT: s_bitset0_b32 s0, 0
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT: s_mov_b32 s33, s1
; GFX11-NEXT: s_mov_b32 s59, s0
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s59, scc
; GFX11-NEXT: ;;#ASMEND
-; GFX11-NEXT: v_readlane_b32 s59, v0, 0
-; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
-; GFX11-NEXT: s_add_i32 s2, s33, 0x4040
-; GFX11-NEXT: scratch_load_b32 v0, off, s2 ; 4-byte Folded Reload
-; GFX11-NEXT: s_mov_b32 exec_lo, s0
-; GFX11-NEXT: s_mov_b32 s33, s1
-; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp:
@@ -1105,25 +676,15 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_mov_b32 s1, s33
; GFX12-NEXT: s_mov_b32 s33, s32
-; GFX12-NEXT: s_xor_saveexec_b32 s0, -1
-; GFX12-NEXT: scratch_store_b32 off, v0, s33 offset:16384 ; 4-byte Folded Spill
-; GFX12-NEXT: s_wait_alu 0xfffe
-; GFX12-NEXT: s_mov_b32 exec_lo, s0
-; GFX12-NEXT: v_writelane_b32 v0, s59, 0
; GFX12-NEXT: s_addk_co_i32 s32, 0x4040
-; GFX12-NEXT: s_mov_b32 s59, s33
; GFX12-NEXT: s_and_b32 s0, 0, exec_lo
+; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: s_mov_b32 s59, s33
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s59, scc
; GFX12-NEXT: ;;#ASMEND
-; GFX12-NEXT: v_readlane_b32 s59, v0, 0
; GFX12-NEXT: s_mov_b32 s32, s33
-; GFX12-NEXT: s_xor_saveexec_b32 s0, -1
-; GFX12-NEXT: scratch_load_b32 v0, off, s33 offset:16384 ; 4-byte Folded Reload
-; GFX12-NEXT: s_wait_alu 0xfffe
-; GFX12-NEXT: s_mov_b32 exec_lo, s0
; GFX12-NEXT: s_mov_b32 s33, s1
-; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
@@ -1132,28 +693,17 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: s_mov_b32 s6, s33
; GFX8-NEXT: s_mov_b32 s33, s32
-; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX8-NEXT: s_add_i32 s7, s33, 0x101000
-; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], s7 ; 4-byte Folded Spill
-; GFX8-NEXT: s_mov_b64 exec, s[4:5]
-; GFX8-NEXT: v_writelane_b32 v0, s59, 0
-; GFX8-NEXT: v_lshrrev_b32_e64 v1, 6, s33
+; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s33
; GFX8-NEXT: s_mov_b32 s59, 64
-; GFX8-NEXT: v_add_u32_e32 v1, vcc, s59, v1
-; GFX8-NEXT: s_add_i32 s32, s32, 0x102000
-; GFX8-NEXT: v_readfirstlane_b32 s59, v1
+; GFX8-NEXT: s_add_i32 s32, s32, 0x101000
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, s59, v0
; GFX8-NEXT: s_and_b64 s[4:5], 0, exec
+; GFX8-NEXT: v_readfirstlane_b32 s59, v0
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use s59, scc
; GFX8-NEXT: ;;#ASMEND
-; GFX8-NEXT: v_readlane_b32 s59, v0, 0
; GFX8-NEXT: s_mov_b32 s32, s33
-; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX8-NEXT: s_add_i32 s7, s33, 0x101000
-; GFX8-NEXT: buffer_load_dword v0, off, s[0:3], s7 ; 4-byte Folded Reload
-; GFX8-NEXT: s_mov_b64 exec, s[4:5]
; GFX8-NEXT: s_mov_b32 s33, s6
-; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp:
@@ -1161,27 +711,16 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_mov_b32 s6, s33
; GFX900-NEXT: s_mov_b32 s33, s32
-; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX900-NEXT: s_add_i32 s7, s33, 0x101000
-; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s7 ; 4-byte Folded Spill
-; GFX900-NEXT: s_mov_b64 exec, s[4:5]
-; GFX900-NEXT: v_lshrrev_b32_e64 v1, 6, s33
-; GFX900-NEXT: v_add_u32_e32 v1, 64, v1
-; GFX900-NEXT: s_add_i32 s32, s32, 0x102000
-; GFX900-NEXT: v_writelane_b32 v0, s59, 0
-; GFX900-NEXT: v_readfirstlane_b32 s59, v1
+; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s33
+; GFX900-NEXT: s_add_i32 s32, s32, 0x101000
+; GFX900-NEXT: v_add_u32_e32 v0, 64, v0
; GFX900-NEXT: s_and_b64 s[4:5], 0, exec
+; GFX900-NEXT: v_readfirstlane_b32 s59, v0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s59, scc
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_readlane_b32 s59, v0, 0
; GFX900-NEXT: s_mov_b32 s32, s33
-; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX900-NEXT: s_add_i32 s7, s33, 0x101000
-; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s7 ; 4-byte Folded Reload
-; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: s_mov_b32 s33, s6
-; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp:
@@ -1189,28 +728,17 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: s_mov_b32 s2, s33
; GFX942-NEXT: s_mov_b32 s33, s32
-; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
-; GFX942-NEXT: s_add_i32 s3, s33, 0x4040
-; GFX942-NEXT: scratch_store_dword off, v0, s3 ; 4-byte Folded Spill
-; GFX942-NEXT: s_mov_b64 exec, s[0:1]
-; GFX942-NEXT: s_addk_i32 s32, 0x4080
+; GFX942-NEXT: s_addk_i32 s32, 0x4040
; GFX942-NEXT: s_and_b64 s[0:1], 0, exec
; GFX942-NEXT: s_addc_u32 s0, s33, 64
; GFX942-NEXT: s_bitcmp1_b32 s0, 0
; GFX942-NEXT: s_bitset0_b32 s0, 0
-; GFX942-NEXT: v_writelane_b32 v0, s59, 0
; GFX942-NEXT: s_mov_b32 s59, s0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s59, scc
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_readlane_b32 s59, v0, 0
; GFX942-NEXT: s_mov_b32 s32, s33
-; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
-; GFX942-NEXT: s_add_i32 s3, s33, 0x4040
-; GFX942-NEXT: scratch_load_dword v0, off, s3 ; 4-byte Folded Reload
-; GFX942-NEXT: s_mov_b64 exec, s[0:1]
; GFX942-NEXT: s_mov_b32 s33, s2
-; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%alloca0 = alloca [4096 x i32], align 64, addrspace(5)
call void asm sideeffect "; use $0, $1", "{s59},{scc}"(ptr addrspace(5) %alloca0, i32 0)
@@ -1223,27 +751,14 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset_fp()
; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10_1-NEXT: s_mov_b32 s4, s33
; GFX10_1-NEXT: s_mov_b32 s33, s32
-; GFX10_1-NEXT: s_xor_saveexec_b32 s5, -1
-; GFX10_1-NEXT: s_add_i32 s6, s33, 0x80800
-; GFX10_1-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill
-; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10_1-NEXT: s_mov_b32 exec_lo, s5
-; GFX10_1-NEXT: v_writelane_b32 v0, s59, 0
-; GFX10_1-NEXT: s_add_i32 s32, s32, 0x81000
+; GFX10_1-NEXT: s_add_i32 s32, s32, 0x80800
; GFX10_1-NEXT: s_lshr_b32 s59, s33, 5
; GFX10_1-NEXT: s_mov_b32 s32, s33
; GFX10_1-NEXT: s_add_i32 s59, s59, 64
; GFX10_1-NEXT: ;;#ASMSTART
; GFX10_1-NEXT: ; use s59
; GFX10_1-NEXT: ;;#ASMEND
-; GFX10_1-NEXT: v_readlane_b32 s59, v0, 0
-; GFX10_1-NEXT: s_xor_saveexec_b32 s5, -1
-; GFX10_1-NEXT: s_add_i32 s6, s33, 0x80800
-; GFX10_1-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload
-; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10_1-NEXT: s_mov_b32 exec_lo, s5
; GFX10_1-NEXT: s_mov_b32 s33, s4
-; GFX10_1-NEXT: s_waitcnt vmcnt(0)
; GFX10_1-NEXT: s_setpc_b64 s[30:31]
;
; GFX10_3-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset_fp:
@@ -1251,25 +766,14 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset_fp()
; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10_3-NEXT: s_mov_b32 s4, s33
; GFX10_3-NEXT: s_mov_b32 s33, s32
-; GFX10_3-NEXT: s_xor_saveexec_b32 s5, -1
-; GFX10_3-NEXT: s_add_i32 s6, s33, 0x80800
-; GFX10_3-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill
-; GFX10_3-NEXT: s_mov_b32 exec_lo, s5
-; GFX10_3-NEXT: v_writelane_b32 v0, s59, 0
-; GFX10_3-NEXT: s_add_i32 s32, s32, 0x81000
+; GFX10_3-NEXT: s_add_i32 s32, s32, 0x80800
; GFX10_3-NEXT: s_lshr_b32 s59, s33, 5
; GFX10_3-NEXT: s_mov_b32 s32, s33
; GFX10_3-NEXT: s_add_i32 s59, s59, 64
; GFX10_3-NEXT: ;;#ASMSTART
; GFX10_3-NEXT: ; use s59
; GFX10_3-NEXT: ;;#ASMEND
-; GFX10_3-NEXT: v_readlane_b32 s59, v0, 0
-; GFX10_3-NEXT: s_xor_saveexec_b32 s5, -1
-; GFX10_3-NEXT: s_add_i32 s6, s33, 0x80800
-; GFX10_3-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload
-; GFX10_3-NEXT: s_mov_b32 exec_lo, s5
; GFX10_3-NEXT: s_mov_b32 s33, s4
-; GFX10_3-NEXT: s_waitcnt vmcnt(0)
; GFX10_3-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset_fp:
@@ -1277,25 +781,14 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset_fp()
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
-; GFX11-NEXT: s_xor_saveexec_b32 s1, -1
-; GFX11-NEXT: s_add_i32 s2, s33, 0x4040
-; GFX11-NEXT: scratch_store_b32 off, v0, s2 ; 4-byte Folded Spill
-; GFX11-NEXT: s_mov_b32 exec_lo, s1
-; GFX11-NEXT: v_writelane_b32 v0, s59, 0
-; GFX11-NEXT: s_addk_i32 s32, 0x4080
+; GFX11-NEXT: s_addk_i32 s32, 0x4040
; GFX11-NEXT: s_add_i32 s1, s33, 64
; GFX11-NEXT: s_mov_b32 s32, s33
; GFX11-NEXT: s_mov_b32 s59, s1
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s59
; GFX11-NEXT: ;;#ASMEND
-; GFX11-NEXT: v_readlane_b32 s59, v0, 0
-; GFX11-NEXT: s_xor_saveexec_b32 s1, -1
-; GFX11-NEXT: s_add_i32 s2, s33, 0x4040
-; GFX11-NEXT: scratch_load_b32 v0, off, s2 ; 4-byte Folded Reload
-; GFX11-NEXT: s_mov_b32 exec_lo, s1
; GFX11-NEXT: s_mov_b32 s33, s0
-; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset_fp:
@@ -1307,24 +800,14 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset_fp()
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_mov_b32 s0, s33
; GFX12-NEXT: s_mov_b32 s33, s32
-; GFX12-NEXT: s_xor_saveexec_b32 s1, -1
-; GFX12-NEXT: scratch_store_b32 off, v0, s33 offset:16384 ; 4-byte Folded Spill
-; GFX12-NEXT: s_wait_alu 0xfffe
-; GFX12-NEXT: s_mov_b32 exec_lo, s1
-; GFX12-NEXT: v_writelane_b32 v0, s59, 0
; GFX12-NEXT: s_addk_co_i32 s32, 0x4040
+; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: s_mov_b32 s59, s33
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s59
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: s_mov_b32 s32, s33
-; GFX12-NEXT: v_readlane_b32 s59, v0, 0
-; GFX12-NEXT: s_xor_saveexec_b32 s1, -1
-; GFX12-NEXT: scratch_load_b32 v0, off, s33 offset:16384 ; 4-byte Folded Reload
-; GFX12-NEXT: s_wait_alu 0xfffe
-; GFX12-NEXT: s_mov_b32 exec_lo, s1
; GFX12-NEXT: s_mov_b32 s33, s0
-; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
@@ -1333,25 +816,14 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset_fp()
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: s_mov_b32 s4, s33
; GFX8-NEXT: s_mov_b32 s33, s32
-; GFX8-NEXT: s_xor_saveexec_b64 s[6:7], -1
-; GFX8-NEXT: s_add_i32 s5, s33, 0x101000
-; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill
-; GFX8-NEXT: s_mov_b64 exec, s[6:7]
-; GFX8-NEXT: s_add_i32 s32, s32, 0x102000
-; GFX8-NEXT: v_writelane_b32 v0, s59, 0
+; GFX8-NEXT: s_add_i32 s32, s32, 0x101000
; GFX8-NEXT: s_lshr_b32 s59, s33, 6
; GFX8-NEXT: s_add_i32 s59, s59, 64
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use s59
; GFX8-NEXT: ;;#ASMEND
-; GFX8-NEXT: v_readlane_b32 s59, v0, 0
; GFX8-NEXT: s_mov_b32 s32, s33
-; GFX8-NEXT: s_xor_saveexec_b64 s[6:7], -1
-; GFX8-NEXT: s_add_i32 s5, s33, 0x101000
-; GFX8-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload
-; GFX8-NEXT: s_mov_b64 exec, s[6:7]
; GFX8-NEXT: s_mov_b32 s33, s4
-; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset_fp:
@@ -1359,25 +831,14 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset_fp()
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_mov_b32 s4, s33
; GFX900-NEXT: s_mov_b32 s33, s32
-; GFX900-NEXT: s_xor_saveexec_b64 s[6:7], -1
-; GFX900-NEXT: s_add_i32 s5, s33, 0x101000
-; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill
-; GFX900-NEXT: s_mov_b64 exec, s[6:7]
-; GFX900-NEXT: s_add_i32 s32, s32, 0x102000
-; GFX900-NEXT: v_writelane_b32 v0, s59, 0
+; GFX900-NEXT: s_add_i32 s32, s32, 0x101000
; GFX900-NEXT: s_lshr_b32 s59, s33, 6
; GFX900-NEXT: s_add_i32 s59, s59, 64
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s59
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_readlane_b32 s59, v0, 0
; GFX900-NEXT: s_mov_b32 s32, s33
-; GFX900-NEXT: s_xor_saveexec_b64 s[6:7], -1
-; GFX900-NEXT: s_add_i32 s5, s33, 0x101000
-; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload
-; GFX900-NEXT: s_mov_b64 exec, s[6:7]
; GFX900-NEXT: s_mov_b32 s33, s4
-; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset_fp:
@@ -1385,25 +846,14 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset_fp()
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: s_mov_b32 s0, s33
; GFX942-NEXT: s_mov_b32 s33, s32
-; GFX942-NEXT: s_xor_saveexec_b64 s[2:3], -1
-; GFX942-NEXT: s_add_i32 s1, s33, 0x4040
-; GFX942-NEXT: scratch_store_dword off, v0, s1 ; 4-byte Folded Spill
-; GFX942-NEXT: s_mov_b64 exec, s[2:3]
-; GFX942-NEXT: s_addk_i32 s32, 0x4080
+; GFX942-NEXT: s_addk_i32 s32, 0x4040
; GFX942-NEXT: s_add_i32 s1, s33, 64
-; GFX942-NEXT: v_writelane_b32 v0, s59, 0
; GFX942-NEXT: s_mov_b32 s59, s1
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s59
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_readlane_b32 s59, v0, 0
; GFX942-NEXT: s_mov_b32 s32, s33
-; GFX942-NEXT: s_xor_saveexec_b64 s[2:3], -1
-; GFX942-NEXT: s_add_i32 s1, s33, 0x4040
-; GFX942-NEXT: scratch_load_dword v0, off, s1 ; 4-byte Folded Reload
-; GFX942-NEXT: s_mov_b64 exec, s[2:3]
; GFX942-NEXT: s_mov_b32 s33, s0
-; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%alloca0 = alloca [4096 x i32], align 64, addrspace(5)
call void asm sideeffect "; use $0", "{s59}"(ptr addrspace(5) %alloca0)
@@ -1414,12 +864,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset(
; GFX10_1-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset:
; GFX10_1: ; %bb.0:
; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1
-; GFX10_1-NEXT: s_add_i32 s5, s32, 0x100800
-; GFX10_1-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill
-; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10_1-NEXT: s_mov_b32 exec_lo, s4
-; GFX10_1-NEXT: v_writelane_b32 v1, s59, 0
; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32
; GFX10_1-NEXT: s_lshr_b32 s4, s32, 5
; GFX10_1-NEXT: s_add_i32 s59, s4, 0x442c
@@ -1431,23 +875,11 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset(
; GFX10_1-NEXT: ;;#ASMSTART
; GFX10_1-NEXT: ; use s59, scc
; GFX10_1-NEXT: ;;#ASMEND
-; GFX10_1-NEXT: v_readlane_b32 s59, v1, 0
-; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1
-; GFX10_1-NEXT: s_add_i32 s5, s32, 0x100800
-; GFX10_1-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload
-; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10_1-NEXT: s_mov_b32 exec_lo, s4
-; GFX10_1-NEXT: s_waitcnt vmcnt(0)
; GFX10_1-NEXT: s_setpc_b64 s[30:31]
;
; GFX10_3-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset:
; GFX10_3: ; %bb.0:
; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1
-; GFX10_3-NEXT: s_add_i32 s5, s32, 0x100800
-; GFX10_3-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill
-; GFX10_3-NEXT: s_mov_b32 exec_lo, s4
-; GFX10_3-NEXT: v_writelane_b32 v1, s59, 0
; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32
; GFX10_3-NEXT: s_lshr_b32 s4, s32, 5
; GFX10_3-NEXT: s_add_i32 s59, s4, 0x442c
@@ -1459,22 +891,11 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset(
; GFX10_3-NEXT: ;;#ASMSTART
; GFX10_3-NEXT: ; use s59, scc
; GFX10_3-NEXT: ;;#ASMEND
-; GFX10_3-NEXT: v_readlane_b32 s59, v1, 0
-; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1
-; GFX10_3-NEXT: s_add_i32 s5, s32, 0x100800
-; GFX10_3-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload
-; GFX10_3-NEXT: s_mov_b32 exec_lo, s4
-; GFX10_3-NEXT: s_waitcnt vmcnt(0)
; GFX10_3-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
-; GFX11-NEXT: s_add_i32 s1, s32, 0x8040
-; GFX11-NEXT: scratch_store_b32 off, v1, s1 ; 4-byte Folded Spill
-; GFX11-NEXT: s_mov_b32 exec_lo, s0
-; GFX11-NEXT: v_writelane_b32 v1, s59, 0
; GFX11-NEXT: s_add_i32 s0, s32, 64
; GFX11-NEXT: s_add_i32 s59, s32, 0x442c
; GFX11-NEXT: v_mov_b32_e32 v0, s0
@@ -1485,12 +906,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset(
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s59, scc
; GFX11-NEXT: ;;#ASMEND
-; GFX11-NEXT: v_readlane_b32 s59, v1, 0
-; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
-; GFX11-NEXT: s_add_i32 s1, s32, 0x8040
-; GFX11-NEXT: scratch_load_b32 v1, off, s1 ; 4-byte Folded Reload
-; GFX11-NEXT: s_mov_b32 exec_lo, s0
-; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset:
@@ -1500,11 +915,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset(
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: s_xor_saveexec_b32 s0, -1
-; GFX12-NEXT: scratch_store_b32 off, v1, s32 offset:32768 ; 4-byte Folded Spill
-; GFX12-NEXT: s_wait_alu 0xfffe
-; GFX12-NEXT: s_mov_b32 exec_lo, s0
-; GFX12-NEXT: v_writelane_b32 v1, s59, 0
; GFX12-NEXT: s_add_co_i32 s59, s32, 0x43ec
; GFX12-NEXT: v_mov_b32_e32 v0, s32
; GFX12-NEXT: s_and_b32 s0, 0, exec_lo
@@ -1514,23 +924,13 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset(
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s59, scc
; GFX12-NEXT: ;;#ASMEND
-; GFX12-NEXT: v_readlane_b32 s59, v1, 0
-; GFX12-NEXT: s_xor_saveexec_b32 s0, -1
-; GFX12-NEXT: scratch_load_b32 v1, off, s32 offset:32768 ; 4-byte Folded Reload
; GFX12-NEXT: s_wait_alu 0xfffe
-; GFX12-NEXT: s_mov_b32 exec_lo, s0
-; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX8-NEXT: s_add_i32 s6, s32, 0x201000
-; GFX8-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill
-; GFX8-NEXT: s_mov_b64 exec, s[4:5]
; GFX8-NEXT: s_lshr_b32 s4, s32, 6
-; GFX8-NEXT: v_writelane_b32 v1, s59, 0
; GFX8-NEXT: s_add_i32 s59, s4, 0x442c
; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0
@@ -1541,23 +941,12 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset(
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use s59, scc
; GFX8-NEXT: ;;#ASMEND
-; GFX8-NEXT: v_readlane_b32 s59, v1, 0
-; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX8-NEXT: s_add_i32 s6, s32, 0x201000
-; GFX8-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload
-; GFX8-NEXT: s_mov_b64 exec, s[4:5]
-; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX900-NEXT: s_add_i32 s6, s32, 0x201000
-; GFX900-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill
-; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: s_lshr_b32 s4, s32, 6
-; GFX900-NEXT: v_writelane_b32 v1, s59, 0
; GFX900-NEXT: s_add_i32 s59, s4, 0x442c
; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32
; GFX900-NEXT: v_add_u32_e32 v0, 64, v0
@@ -1568,22 +957,11 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset(
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s59, scc
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_readlane_b32 s59, v1, 0
-; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX900-NEXT: s_add_i32 s6, s32, 0x201000
-; GFX900-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload
-; GFX900-NEXT: s_mov_b64 exec, s[4:5]
-; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
-; GFX942-NEXT: s_add_i32 s2, s32, 0x8040
-; GFX942-NEXT: scratch_store_dword off, v1, s2 ; 4-byte Folded Spill
-; GFX942-NEXT: s_mov_b64 exec, s[0:1]
-; GFX942-NEXT: v_writelane_b32 v1, s59, 0
; GFX942-NEXT: s_add_i32 s59, s32, 0x442c
; GFX942-NEXT: s_add_i32 s0, s32, 64
; GFX942-NEXT: v_mov_b32_e32 v0, s0
@@ -1594,12 +972,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset(
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s59, scc
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_readlane_b32 s59, v1, 0
-; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
-; GFX942-NEXT: s_add_i32 s2, s32, 0x8040
-; GFX942-NEXT: scratch_load_dword v1, off, s2 ; 4-byte Folded Reload
-; GFX942-NEXT: s_mov_b64 exec, s[0:1]
-; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%alloca0 = alloca [4096 x i32], align 64, addrspace(5)
%alloca1 = alloca [4096 x i32], align 4, addrspace(5)
@@ -1613,12 +985,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offse
; GFX10_1-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset:
; GFX10_1: ; %bb.0:
; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1
-; GFX10_1-NEXT: s_add_i32 s5, s32, 0x100800
-; GFX10_1-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill
-; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10_1-NEXT: s_mov_b32 exec_lo, s4
-; GFX10_1-NEXT: v_writelane_b32 v1, s59, 0
; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32
; GFX10_1-NEXT: s_lshl_b32 s4, s16, 2
; GFX10_1-NEXT: s_lshr_b32 s59, s32, 5
@@ -1632,23 +998,11 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offse
; GFX10_1-NEXT: ;;#ASMSTART
; GFX10_1-NEXT: ; use s59, scc
; GFX10_1-NEXT: ;;#ASMEND
-; GFX10_1-NEXT: v_readlane_b32 s59, v1, 0
-; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1
-; GFX10_1-NEXT: s_add_i32 s5, s32, 0x100800
-; GFX10_1-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload
-; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10_1-NEXT: s_mov_b32 exec_lo, s4
-; GFX10_1-NEXT: s_waitcnt vmcnt(0)
; GFX10_1-NEXT: s_setpc_b64 s[30:31]
;
; GFX10_3-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset:
; GFX10_3: ; %bb.0:
; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1
-; GFX10_3-NEXT: s_add_i32 s5, s32, 0x100800
-; GFX10_3-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill
-; GFX10_3-NEXT: s_mov_b32 exec_lo, s4
-; GFX10_3-NEXT: v_writelane_b32 v1, s59, 0
; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32
; GFX10_3-NEXT: s_lshl_b32 s4, s16, 2
; GFX10_3-NEXT: s_lshr_b32 s59, s32, 5
@@ -1662,23 +1016,12 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offse
; GFX10_3-NEXT: ;;#ASMSTART
; GFX10_3-NEXT: ; use s59, scc
; GFX10_3-NEXT: ;;#ASMEND
-; GFX10_3-NEXT: v_readlane_b32 s59, v1, 0
-; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1
-; GFX10_3-NEXT: s_add_i32 s5, s32, 0x100800
-; GFX10_3-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload
-; GFX10_3-NEXT: s_mov_b32 exec_lo, s4
-; GFX10_3-NEXT: s_waitcnt vmcnt(0)
; GFX10_3-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_xor_saveexec_b32 s1, -1
-; GFX11-NEXT: s_add_i32 s2, s32, 0x8040
-; GFX11-NEXT: scratch_store_b32 off, v1, s2 ; 4-byte Folded Spill
-; GFX11-NEXT: s_mov_b32 exec_lo, s1
; GFX11-NEXT: s_add_i32 s1, s32, 64
-; GFX11-NEXT: v_writelane_b32 v1, s59, 0
; GFX11-NEXT: s_lshl_b32 s0, s0, 2
; GFX11-NEXT: v_mov_b32_e32 v0, s1
; GFX11-NEXT: s_add_i32 s59, s32, s0
@@ -1690,12 +1033,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offse
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s59, scc
; GFX11-NEXT: ;;#ASMEND
-; GFX11-NEXT: v_readlane_b32 s59, v1, 0
-; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
-; GFX11-NEXT: s_add_i32 s1, s32, 0x8040
-; GFX11-NEXT: scratch_load_b32 v1, off, s1 ; 4-byte Folded Reload
-; GFX11-NEXT: s_mov_b32 exec_lo, s0
-; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset:
@@ -1705,11 +1042,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offse
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: s_xor_saveexec_b32 s1, -1
-; GFX12-NEXT: scratch_store_b32 off, v1, s32 offset:32768 ; 4-byte Folded Spill
-; GFX12-NEXT: s_wait_alu 0xfffe
-; GFX12-NEXT: s_mov_b32 exec_lo, s1
-; GFX12-NEXT: v_writelane_b32 v1, s59, 0
; GFX12-NEXT: s_lshl_b32 s0, s0, 2
; GFX12-NEXT: v_mov_b32_e32 v0, s32
; GFX12-NEXT: s_wait_alu 0xfffe
@@ -1723,22 +1055,12 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offse
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s59, scc
; GFX12-NEXT: ;;#ASMEND
-; GFX12-NEXT: v_readlane_b32 s59, v1, 0
-; GFX12-NEXT: s_xor_saveexec_b32 s0, -1
-; GFX12-NEXT: scratch_load_b32 v1, off, s32 offset:32768 ; 4-byte Folded Reload
; GFX12-NEXT: s_wait_alu 0xfffe
-; GFX12-NEXT: s_mov_b32 exec_lo, s0
-; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX8-NEXT: s_add_i32 s6, s32, 0x201000
-; GFX8-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill
-; GFX8-NEXT: s_mov_b64 exec, s[4:5]
-; GFX8-NEXT: v_writelane_b32 v1, s59, 0
; GFX8-NEXT: s_lshl_b32 s4, s16, 2
; GFX8-NEXT: s_lshr_b32 s59, s32, 6
; GFX8-NEXT: s_add_i32 s59, s59, s4
@@ -1752,22 +1074,11 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offse
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use s59, scc
; GFX8-NEXT: ;;#ASMEND
-; GFX8-NEXT: v_readlane_b32 s59, v1, 0
-; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX8-NEXT: s_add_i32 s6, s32, 0x201000
-; GFX8-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload
-; GFX8-NEXT: s_mov_b64 exec, s[4:5]
-; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX900-NEXT: s_add_i32 s6, s32, 0x201000
-; GFX900-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill
-; GFX900-NEXT: s_mov_b64 exec, s[4:5]
-; GFX900-NEXT: v_writelane_b32 v1, s59, 0
; GFX900-NEXT: s_lshl_b32 s4, s16, 2
; GFX900-NEXT: s_lshr_b32 s59, s32, 6
; GFX900-NEXT: s_add_i32 s59, s59, s4
@@ -1781,23 +1092,12 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offse
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s59, scc
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_readlane_b32 s59, v1, 0
-; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX900-NEXT: s_add_i32 s6, s32, 0x201000
-; GFX900-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload
-; GFX900-NEXT: s_mov_b64 exec, s[4:5]
-; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: s_xor_saveexec_b64 s[2:3], -1
-; GFX942-NEXT: s_add_i32 s1, s32, 0x8040
-; GFX942-NEXT: scratch_store_dword off, v1, s1 ; 4-byte Folded Spill
-; GFX942-NEXT: s_mov_b64 exec, s[2:3]
; GFX942-NEXT: s_lshl_b32 s0, s0, 2
-; GFX942-NEXT: v_writelane_b32 v1, s59, 0
; GFX942-NEXT: s_add_i32 s59, s32, s0
; GFX942-NEXT: s_addk_i32 s59, 0x4040
; GFX942-NEXT: s_add_i32 s0, s32, 64
@@ -1809,12 +1109,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offse
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s59, scc
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_readlane_b32 s59, v1, 0
-; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
-; GFX942-NEXT: s_add_i32 s2, s32, 0x8040
-; GFX942-NEXT: scratch_load_dword v1, off, s2 ; 4-byte Folded Reload
-; GFX942-NEXT: s_mov_b64 exec, s[0:1]
-; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%alloca0 = alloca [4096 x i32], align 64, addrspace(5)
%alloca1 = alloca [4096 x i32], align 4, addrspace(5)
diff --git a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll
index 2420393b63ba9..23b7369a11dd3 100644
--- a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll
@@ -44,30 +44,17 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0
; GFX7-NEXT: v_writelane_b32 v23, s35, 4
; GFX7-NEXT: v_writelane_b32 v23, s36, 5
; GFX7-NEXT: v_writelane_b32 v23, s37, 6
-; GFX7-NEXT: v_writelane_b32 v23, s38, 7
-; GFX7-NEXT: v_writelane_b32 v23, s39, 8
-; GFX7-NEXT: v_writelane_b32 v23, s40, 9
-; GFX7-NEXT: v_writelane_b32 v23, s41, 10
-; GFX7-NEXT: v_writelane_b32 v23, s42, 11
-; GFX7-NEXT: v_writelane_b32 v23, s43, 12
-; GFX7-NEXT: v_writelane_b32 v23, s44, 13
-; GFX7-NEXT: v_writelane_b32 v23, s45, 14
-; GFX7-NEXT: v_writelane_b32 v23, s46, 15
-; GFX7-NEXT: v_writelane_b32 v23, s47, 16
-; GFX7-NEXT: v_writelane_b32 v23, s48, 17
-; GFX7-NEXT: v_writelane_b32 v23, s49, 18
-; GFX7-NEXT: v_writelane_b32 v23, s50, 19
-; GFX7-NEXT: v_writelane_b32 v23, s51, 20
-; GFX7-NEXT: v_writelane_b32 v23, s52, 21
-; GFX7-NEXT: v_writelane_b32 v23, s53, 22
-; GFX7-NEXT: v_writelane_b32 v23, s54, 23
-; GFX7-NEXT: v_writelane_b32 v23, s55, 24
-; GFX7-NEXT: v_writelane_b32 v23, s56, 25
+; GFX7-NEXT: v_writelane_b32 v23, s46, 7
+; GFX7-NEXT: v_writelane_b32 v23, s47, 8
+; GFX7-NEXT: v_writelane_b32 v23, s48, 9
+; GFX7-NEXT: v_writelane_b32 v23, s49, 10
+; GFX7-NEXT: v_writelane_b32 v23, s50, 11
+; GFX7-NEXT: v_writelane_b32 v23, s51, 12
; GFX7-NEXT: v_lshr_b32_e64 v0, s32, 6
-; GFX7-NEXT: v_writelane_b32 v23, s57, 26
+; GFX7-NEXT: v_writelane_b32 v23, s52, 13
; GFX7-NEXT: v_add_i32_e32 v0, vcc, 64, v0
; GFX7-NEXT: s_and_b64 s[4:5], 0, exec
-; GFX7-NEXT: v_writelane_b32 v23, s58, 27
+; GFX7-NEXT: v_writelane_b32 v23, s53, 14
; GFX7-NEXT: ;;#ASMSTART
; GFX7-NEXT: ; use alloca0 v0
; GFX7-NEXT: ;;#ASMEND
@@ -78,35 +65,20 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0
; GFX7-NEXT: v_mov_b32_e32 v0, 0x4040
; GFX7-NEXT: v_mad_u32_u24 v0, v0, 64, s32
; GFX7-NEXT: v_lshrrev_b32_e32 v0, 6, v0
-; GFX7-NEXT: v_writelane_b32 v23, s59, 28
; GFX7-NEXT: v_readfirstlane_b32 s59, v0
; GFX7-NEXT: buffer_load_dword v0, off, s[0:3], s32
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: ;;#ASMSTART
; GFX7-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc
; GFX7-NEXT: ;;#ASMEND
-; GFX7-NEXT: v_readlane_b32 s59, v23, 28
-; GFX7-NEXT: v_readlane_b32 s58, v23, 27
-; GFX7-NEXT: v_readlane_b32 s57, v23, 26
-; GFX7-NEXT: v_readlane_b32 s56, v23, 25
-; GFX7-NEXT: v_readlane_b32 s55, v23, 24
-; GFX7-NEXT: v_readlane_b32 s54, v23, 23
-; GFX7-NEXT: v_readlane_b32 s53, v23, 22
-; GFX7-NEXT: v_readlane_b32 s52, v23, 21
-; GFX7-NEXT: v_readlane_b32 s51, v23, 20
-; GFX7-NEXT: v_readlane_b32 s50, v23, 19
-; GFX7-NEXT: v_readlane_b32 s49, v23, 18
-; GFX7-NEXT: v_readlane_b32 s48, v23, 17
-; GFX7-NEXT: v_readlane_b32 s47, v23, 16
-; GFX7-NEXT: v_readlane_b32 s46, v23, 15
-; GFX7-NEXT: v_readlane_b32 s45, v23, 14
-; GFX7-NEXT: v_readlane_b32 s44, v23, 13
-; GFX7-NEXT: v_readlane_b32 s43, v23, 12
-; GFX7-NEXT: v_readlane_b32 s42, v23, 11
-; GFX7-NEXT: v_readlane_b32 s41, v23, 10
-; GFX7-NEXT: v_readlane_b32 s40, v23, 9
-; GFX7-NEXT: v_readlane_b32 s39, v23, 8
-; GFX7-NEXT: v_readlane_b32 s38, v23, 7
+; GFX7-NEXT: v_readlane_b32 s53, v23, 14
+; GFX7-NEXT: v_readlane_b32 s52, v23, 13
+; GFX7-NEXT: v_readlane_b32 s51, v23, 12
+; GFX7-NEXT: v_readlane_b32 s50, v23, 11
+; GFX7-NEXT: v_readlane_b32 s49, v23, 10
+; GFX7-NEXT: v_readlane_b32 s48, v23, 9
+; GFX7-NEXT: v_readlane_b32 s47, v23, 8
+; GFX7-NEXT: v_readlane_b32 s46, v23, 7
; GFX7-NEXT: v_readlane_b32 s37, v23, 6
; GFX7-NEXT: v_readlane_b32 s36, v23, 5
; GFX7-NEXT: v_readlane_b32 s35, v23, 4
@@ -135,30 +107,17 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0
; GFX8-NEXT: v_writelane_b32 v23, s35, 4
; GFX8-NEXT: v_writelane_b32 v23, s36, 5
; GFX8-NEXT: v_writelane_b32 v23, s37, 6
-; GFX8-NEXT: v_writelane_b32 v23, s38, 7
-; GFX8-NEXT: v_writelane_b32 v23, s39, 8
-; GFX8-NEXT: v_writelane_b32 v23, s40, 9
-; GFX8-NEXT: v_writelane_b32 v23, s41, 10
-; GFX8-NEXT: v_writelane_b32 v23, s42, 11
-; GFX8-NEXT: v_writelane_b32 v23, s43, 12
-; GFX8-NEXT: v_writelane_b32 v23, s44, 13
-; GFX8-NEXT: v_writelane_b32 v23, s45, 14
-; GFX8-NEXT: v_writelane_b32 v23, s46, 15
-; GFX8-NEXT: v_writelane_b32 v23, s47, 16
-; GFX8-NEXT: v_writelane_b32 v23, s48, 17
-; GFX8-NEXT: v_writelane_b32 v23, s49, 18
-; GFX8-NEXT: v_writelane_b32 v23, s50, 19
-; GFX8-NEXT: v_writelane_b32 v23, s51, 20
-; GFX8-NEXT: v_writelane_b32 v23, s52, 21
-; GFX8-NEXT: v_writelane_b32 v23, s53, 22
-; GFX8-NEXT: v_writelane_b32 v23, s54, 23
-; GFX8-NEXT: v_writelane_b32 v23, s55, 24
-; GFX8-NEXT: v_writelane_b32 v23, s56, 25
+; GFX8-NEXT: v_writelane_b32 v23, s46, 7
+; GFX8-NEXT: v_writelane_b32 v23, s47, 8
+; GFX8-NEXT: v_writelane_b32 v23, s48, 9
+; GFX8-NEXT: v_writelane_b32 v23, s49, 10
+; GFX8-NEXT: v_writelane_b32 v23, s50, 11
+; GFX8-NEXT: v_writelane_b32 v23, s51, 12
; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32
-; GFX8-NEXT: v_writelane_b32 v23, s57, 26
+; GFX8-NEXT: v_writelane_b32 v23, s52, 13
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0
; GFX8-NEXT: s_and_b64 s[4:5], 0, exec
-; GFX8-NEXT: v_writelane_b32 v23, s58, 27
+; GFX8-NEXT: v_writelane_b32 v23, s53, 14
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use alloca0 v0
; GFX8-NEXT: ;;#ASMEND
@@ -169,35 +128,20 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0
; GFX8-NEXT: v_mov_b32_e32 v0, 0x4040
; GFX8-NEXT: v_mad_u32_u24 v0, v0, 64, s32
; GFX8-NEXT: v_lshrrev_b32_e32 v0, 6, v0
-; GFX8-NEXT: v_writelane_b32 v23, s59, 28
; GFX8-NEXT: v_readfirstlane_b32 s59, v0
; GFX8-NEXT: buffer_load_dword v0, off, s[0:3], s32
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc
; GFX8-NEXT: ;;#ASMEND
-; GFX8-NEXT: v_readlane_b32 s59, v23, 28
-; GFX8-NEXT: v_readlane_b32 s58, v23, 27
-; GFX8-NEXT: v_readlane_b32 s57, v23, 26
-; GFX8-NEXT: v_readlane_b32 s56, v23, 25
-; GFX8-NEXT: v_readlane_b32 s55, v23, 24
-; GFX8-NEXT: v_readlane_b32 s54, v23, 23
-; GFX8-NEXT: v_readlane_b32 s53, v23, 22
-; GFX8-NEXT: v_readlane_b32 s52, v23, 21
-; GFX8-NEXT: v_readlane_b32 s51, v23, 20
-; GFX8-NEXT: v_readlane_b32 s50, v23, 19
-; GFX8-NEXT: v_readlane_b32 s49, v23, 18
-; GFX8-NEXT: v_readlane_b32 s48, v23, 17
-; GFX8-NEXT: v_readlane_b32 s47, v23, 16
-; GFX8-NEXT: v_readlane_b32 s46, v23, 15
-; GFX8-NEXT: v_readlane_b32 s45, v23, 14
-; GFX8-NEXT: v_readlane_b32 s44, v23, 13
-; GFX8-NEXT: v_readlane_b32 s43, v23, 12
-; GFX8-NEXT: v_readlane_b32 s42, v23, 11
-; GFX8-NEXT: v_readlane_b32 s41, v23, 10
-; GFX8-NEXT: v_readlane_b32 s40, v23, 9
-; GFX8-NEXT: v_readlane_b32 s39, v23, 8
-; GFX8-NEXT: v_readlane_b32 s38, v23, 7
+; GFX8-NEXT: v_readlane_b32 s53, v23, 14
+; GFX8-NEXT: v_readlane_b32 s52, v23, 13
+; GFX8-NEXT: v_readlane_b32 s51, v23, 12
+; GFX8-NEXT: v_readlane_b32 s50, v23, 11
+; GFX8-NEXT: v_readlane_b32 s49, v23, 10
+; GFX8-NEXT: v_readlane_b32 s48, v23, 9
+; GFX8-NEXT: v_readlane_b32 s47, v23, 8
+; GFX8-NEXT: v_readlane_b32 s46, v23, 7
; GFX8-NEXT: v_readlane_b32 s37, v23, 6
; GFX8-NEXT: v_readlane_b32 s36, v23, 5
; GFX8-NEXT: v_readlane_b32 s35, v23, 4
@@ -226,30 +170,17 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0
; GFX900-NEXT: v_writelane_b32 v23, s35, 4
; GFX900-NEXT: v_writelane_b32 v23, s36, 5
; GFX900-NEXT: v_writelane_b32 v23, s37, 6
-; GFX900-NEXT: v_writelane_b32 v23, s38, 7
-; GFX900-NEXT: v_writelane_b32 v23, s39, 8
-; GFX900-NEXT: v_writelane_b32 v23, s40, 9
-; GFX900-NEXT: v_writelane_b32 v23, s41, 10
-; GFX900-NEXT: v_writelane_b32 v23, s42, 11
-; GFX900-NEXT: v_writelane_b32 v23, s43, 12
-; GFX900-NEXT: v_writelane_b32 v23, s44, 13
-; GFX900-NEXT: v_writelane_b32 v23, s45, 14
-; GFX900-NEXT: v_writelane_b32 v23, s46, 15
-; GFX900-NEXT: v_writelane_b32 v23, s47, 16
-; GFX900-NEXT: v_writelane_b32 v23, s48, 17
-; GFX900-NEXT: v_writelane_b32 v23, s49, 18
-; GFX900-NEXT: v_writelane_b32 v23, s50, 19
-; GFX900-NEXT: v_writelane_b32 v23, s51, 20
-; GFX900-NEXT: v_writelane_b32 v23, s52, 21
-; GFX900-NEXT: v_writelane_b32 v23, s53, 22
-; GFX900-NEXT: v_writelane_b32 v23, s54, 23
-; GFX900-NEXT: v_writelane_b32 v23, s55, 24
-; GFX900-NEXT: v_writelane_b32 v23, s56, 25
+; GFX900-NEXT: v_writelane_b32 v23, s46, 7
+; GFX900-NEXT: v_writelane_b32 v23, s47, 8
+; GFX900-NEXT: v_writelane_b32 v23, s48, 9
+; GFX900-NEXT: v_writelane_b32 v23, s49, 10
+; GFX900-NEXT: v_writelane_b32 v23, s50, 11
+; GFX900-NEXT: v_writelane_b32 v23, s51, 12
; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32
-; GFX900-NEXT: v_writelane_b32 v23, s57, 26
+; GFX900-NEXT: v_writelane_b32 v23, s52, 13
; GFX900-NEXT: v_add_u32_e32 v0, 64, v0
; GFX900-NEXT: s_and_b64 s[4:5], 0, exec
-; GFX900-NEXT: v_writelane_b32 v23, s58, 27
+; GFX900-NEXT: v_writelane_b32 v23, s53, 14
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use alloca0 v0
; GFX900-NEXT: ;;#ASMEND
@@ -259,35 +190,20 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0
; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32
; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32
; GFX900-NEXT: v_add_u32_e32 v0, 0x4040, v0
-; GFX900-NEXT: v_writelane_b32 v23, s59, 28
; GFX900-NEXT: v_readfirstlane_b32 s59, v0
; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_readlane_b32 s59, v23, 28
-; GFX900-NEXT: v_readlane_b32 s58, v23, 27
-; GFX900-NEXT: v_readlane_b32 s57, v23, 26
-; GFX900-NEXT: v_readlane_b32 s56, v23, 25
-; GFX900-NEXT: v_readlane_b32 s55, v23, 24
-; GFX900-NEXT: v_readlane_b32 s54, v23, 23
-; GFX900-NEXT: v_readlane_b32 s53, v23, 22
-; GFX900-NEXT: v_readlane_b32 s52, v23, 21
-; GFX900-NEXT: v_readlane_b32 s51, v23, 20
-; GFX900-NEXT: v_readlane_b32 s50, v23, 19
-; GFX900-NEXT: v_readlane_b32 s49, v23, 18
-; GFX900-NEXT: v_readlane_b32 s48, v23, 17
-; GFX900-NEXT: v_readlane_b32 s47, v23, 16
-; GFX900-NEXT: v_readlane_b32 s46, v23, 15
-; GFX900-NEXT: v_readlane_b32 s45, v23, 14
-; GFX900-NEXT: v_readlane_b32 s44, v23, 13
-; GFX900-NEXT: v_readlane_b32 s43, v23, 12
-; GFX900-NEXT: v_readlane_b32 s42, v23, 11
-; GFX900-NEXT: v_readlane_b32 s41, v23, 10
-; GFX900-NEXT: v_readlane_b32 s40, v23, 9
-; GFX900-NEXT: v_readlane_b32 s39, v23, 8
-; GFX900-NEXT: v_readlane_b32 s38, v23, 7
+; GFX900-NEXT: v_readlane_b32 s53, v23, 14
+; GFX900-NEXT: v_readlane_b32 s52, v23, 13
+; GFX900-NEXT: v_readlane_b32 s51, v23, 12
+; GFX900-NEXT: v_readlane_b32 s50, v23, 11
+; GFX900-NEXT: v_readlane_b32 s49, v23, 10
+; GFX900-NEXT: v_readlane_b32 s48, v23, 9
+; GFX900-NEXT: v_readlane_b32 s47, v23, 8
+; GFX900-NEXT: v_readlane_b32 s46, v23, 7
; GFX900-NEXT: v_readlane_b32 s37, v23, 6
; GFX900-NEXT: v_readlane_b32 s36, v23, 5
; GFX900-NEXT: v_readlane_b32 s35, v23, 4
@@ -316,33 +232,17 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0
; GFX942-NEXT: v_writelane_b32 v23, s35, 4
; GFX942-NEXT: v_writelane_b32 v23, s36, 5
; GFX942-NEXT: v_writelane_b32 v23, s37, 6
-; GFX942-NEXT: v_writelane_b32 v23, s38, 7
-; GFX942-NEXT: v_writelane_b32 v23, s39, 8
-; GFX942-NEXT: v_writelane_b32 v23, s40, 9
-; GFX942-NEXT: v_writelane_b32 v23, s41, 10
-; GFX942-NEXT: v_writelane_b32 v23, s42, 11
-; GFX942-NEXT: v_writelane_b32 v23, s43, 12
-; GFX942-NEXT: v_writelane_b32 v23, s44, 13
-; GFX942-NEXT: v_writelane_b32 v23, s45, 14
-; GFX942-NEXT: v_writelane_b32 v23, s46, 15
-; GFX942-NEXT: v_writelane_b32 v23, s47, 16
-; GFX942-NEXT: v_writelane_b32 v23, s48, 17
-; GFX942-NEXT: v_writelane_b32 v23, s49, 18
-; GFX942-NEXT: v_writelane_b32 v23, s50, 19
-; GFX942-NEXT: v_writelane_b32 v23, s51, 20
-; GFX942-NEXT: v_writelane_b32 v23, s52, 21
-; GFX942-NEXT: v_writelane_b32 v23, s53, 22
-; GFX942-NEXT: v_writelane_b32 v23, s54, 23
-; GFX942-NEXT: v_writelane_b32 v23, s55, 24
-; GFX942-NEXT: v_writelane_b32 v23, s56, 25
-; GFX942-NEXT: v_writelane_b32 v23, s57, 26
-; GFX942-NEXT: v_writelane_b32 v23, s58, 27
-; GFX942-NEXT: v_writelane_b32 v23, s59, 28
-; GFX942-NEXT: v_writelane_b32 v23, s60, 29
+; GFX942-NEXT: v_writelane_b32 v23, s46, 7
+; GFX942-NEXT: v_writelane_b32 v23, s47, 8
+; GFX942-NEXT: v_writelane_b32 v23, s48, 9
+; GFX942-NEXT: v_writelane_b32 v23, s49, 10
+; GFX942-NEXT: v_writelane_b32 v23, s50, 11
+; GFX942-NEXT: v_writelane_b32 v23, s51, 12
; GFX942-NEXT: s_add_i32 s0, s32, 64
-; GFX942-NEXT: v_writelane_b32 v23, s61, 30
+; GFX942-NEXT: v_writelane_b32 v23, s52, 13
; GFX942-NEXT: v_mov_b32_e32 v0, s0
; GFX942-NEXT: s_and_b64 s[60:61], 0, exec
+; GFX942-NEXT: v_writelane_b32 v23, s53, 14
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use alloca0 v0
; GFX942-NEXT: ;;#ASMEND
@@ -356,30 +256,14 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_readlane_b32 s61, v23, 30
-; GFX942-NEXT: v_readlane_b32 s60, v23, 29
-; GFX942-NEXT: v_readlane_b32 s59, v23, 28
-; GFX942-NEXT: v_readlane_b32 s58, v23, 27
-; GFX942-NEXT: v_readlane_b32 s57, v23, 26
-; GFX942-NEXT: v_readlane_b32 s56, v23, 25
-; GFX942-NEXT: v_readlane_b32 s55, v23, 24
-; GFX942-NEXT: v_readlane_b32 s54, v23, 23
-; GFX942-NEXT: v_readlane_b32 s53, v23, 22
-; GFX942-NEXT: v_readlane_b32 s52, v23, 21
-; GFX942-NEXT: v_readlane_b32 s51, v23, 20
-; GFX942-NEXT: v_readlane_b32 s50, v23, 19
-; GFX942-NEXT: v_readlane_b32 s49, v23, 18
-; GFX942-NEXT: v_readlane_b32 s48, v23, 17
-; GFX942-NEXT: v_readlane_b32 s47, v23, 16
-; GFX942-NEXT: v_readlane_b32 s46, v23, 15
-; GFX942-NEXT: v_readlane_b32 s45, v23, 14
-; GFX942-NEXT: v_readlane_b32 s44, v23, 13
-; GFX942-NEXT: v_readlane_b32 s43, v23, 12
-; GFX942-NEXT: v_readlane_b32 s42, v23, 11
-; GFX942-NEXT: v_readlane_b32 s41, v23, 10
-; GFX942-NEXT: v_readlane_b32 s40, v23, 9
-; GFX942-NEXT: v_readlane_b32 s39, v23, 8
-; GFX942-NEXT: v_readlane_b32 s38, v23, 7
+; GFX942-NEXT: v_readlane_b32 s53, v23, 14
+; GFX942-NEXT: v_readlane_b32 s52, v23, 13
+; GFX942-NEXT: v_readlane_b32 s51, v23, 12
+; GFX942-NEXT: v_readlane_b32 s50, v23, 11
+; GFX942-NEXT: v_readlane_b32 s49, v23, 10
+; GFX942-NEXT: v_readlane_b32 s48, v23, 9
+; GFX942-NEXT: v_readlane_b32 s47, v23, 8
+; GFX942-NEXT: v_readlane_b32 s46, v23, 7
; GFX942-NEXT: v_readlane_b32 s37, v23, 6
; GFX942-NEXT: v_readlane_b32 s36, v23, 5
; GFX942-NEXT: v_readlane_b32 s35, v23, 4
@@ -415,59 +299,31 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0
; GFX10_1-NEXT: v_writelane_b32 v23, s35, 4
; GFX10_1-NEXT: v_writelane_b32 v23, s36, 5
; GFX10_1-NEXT: v_writelane_b32 v23, s37, 6
-; GFX10_1-NEXT: v_writelane_b32 v23, s38, 7
-; GFX10_1-NEXT: v_writelane_b32 v23, s39, 8
-; GFX10_1-NEXT: v_writelane_b32 v23, s40, 9
-; GFX10_1-NEXT: v_writelane_b32 v23, s41, 10
-; GFX10_1-NEXT: v_writelane_b32 v23, s42, 11
-; GFX10_1-NEXT: v_writelane_b32 v23, s43, 12
-; GFX10_1-NEXT: v_writelane_b32 v23, s44, 13
-; GFX10_1-NEXT: v_writelane_b32 v23, s45, 14
-; GFX10_1-NEXT: v_writelane_b32 v23, s46, 15
-; GFX10_1-NEXT: v_writelane_b32 v23, s47, 16
-; GFX10_1-NEXT: v_writelane_b32 v23, s48, 17
-; GFX10_1-NEXT: v_writelane_b32 v23, s49, 18
-; GFX10_1-NEXT: v_writelane_b32 v23, s50, 19
-; GFX10_1-NEXT: v_writelane_b32 v23, s51, 20
-; GFX10_1-NEXT: v_writelane_b32 v23, s52, 21
-; GFX10_1-NEXT: v_writelane_b32 v23, s53, 22
-; GFX10_1-NEXT: v_writelane_b32 v23, s54, 23
-; GFX10_1-NEXT: v_writelane_b32 v23, s55, 24
-; GFX10_1-NEXT: v_writelane_b32 v23, s56, 25
-; GFX10_1-NEXT: v_writelane_b32 v23, s57, 26
-; GFX10_1-NEXT: v_writelane_b32 v23, s58, 27
+; GFX10_1-NEXT: v_writelane_b32 v23, s46, 7
+; GFX10_1-NEXT: v_writelane_b32 v23, s47, 8
+; GFX10_1-NEXT: v_writelane_b32 v23, s48, 9
+; GFX10_1-NEXT: v_writelane_b32 v23, s49, 10
+; GFX10_1-NEXT: v_writelane_b32 v23, s50, 11
+; GFX10_1-NEXT: v_writelane_b32 v23, s51, 12
+; GFX10_1-NEXT: v_writelane_b32 v23, s52, 13
+; GFX10_1-NEXT: v_writelane_b32 v23, s53, 14
; GFX10_1-NEXT: ;;#ASMSTART
; GFX10_1-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
; GFX10_1-NEXT: ;;#ASMEND
; GFX10_1-NEXT: v_lshrrev_b32_e64 v24, 5, s32
-; GFX10_1-NEXT: v_writelane_b32 v23, s59, 28
; GFX10_1-NEXT: v_add_nc_u32_e32 v24, 0x4040, v24
; GFX10_1-NEXT: v_readfirstlane_b32 s59, v24
; GFX10_1-NEXT: ;;#ASMSTART
; GFX10_1-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc
; GFX10_1-NEXT: ;;#ASMEND
-; GFX10_1-NEXT: v_readlane_b32 s59, v23, 28
-; GFX10_1-NEXT: v_readlane_b32 s58, v23, 27
-; GFX10_1-NEXT: v_readlane_b32 s57, v23, 26
-; GFX10_1-NEXT: v_readlane_b32 s56, v23, 25
-; GFX10_1-NEXT: v_readlane_b32 s55, v23, 24
-; GFX10_1-NEXT: v_readlane_b32 s54, v23, 23
-; GFX10_1-NEXT: v_readlane_b32 s53, v23, 22
-; GFX10_1-NEXT: v_readlane_b32 s52, v23, 21
-; GFX10_1-NEXT: v_readlane_b32 s51, v23, 20
-; GFX10_1-NEXT: v_readlane_b32 s50, v23, 19
-; GFX10_1-NEXT: v_readlane_b32 s49, v23, 18
-; GFX10_1-NEXT: v_readlane_b32 s48, v23, 17
-; GFX10_1-NEXT: v_readlane_b32 s47, v23, 16
-; GFX10_1-NEXT: v_readlane_b32 s46, v23, 15
-; GFX10_1-NEXT: v_readlane_b32 s45, v23, 14
-; GFX10_1-NEXT: v_readlane_b32 s44, v23, 13
-; GFX10_1-NEXT: v_readlane_b32 s43, v23, 12
-; GFX10_1-NEXT: v_readlane_b32 s42, v23, 11
-; GFX10_1-NEXT: v_readlane_b32 s41, v23, 10
-; GFX10_1-NEXT: v_readlane_b32 s40, v23, 9
-; GFX10_1-NEXT: v_readlane_b32 s39, v23, 8
-; GFX10_1-NEXT: v_readlane_b32 s38, v23, 7
+; GFX10_1-NEXT: v_readlane_b32 s53, v23, 14
+; GFX10_1-NEXT: v_readlane_b32 s52, v23, 13
+; GFX10_1-NEXT: v_readlane_b32 s51, v23, 12
+; GFX10_1-NEXT: v_readlane_b32 s50, v23, 11
+; GFX10_1-NEXT: v_readlane_b32 s49, v23, 10
+; GFX10_1-NEXT: v_readlane_b32 s48, v23, 9
+; GFX10_1-NEXT: v_readlane_b32 s47, v23, 8
+; GFX10_1-NEXT: v_readlane_b32 s46, v23, 7
; GFX10_1-NEXT: v_readlane_b32 s37, v23, 6
; GFX10_1-NEXT: v_readlane_b32 s36, v23, 5
; GFX10_1-NEXT: v_readlane_b32 s35, v23, 4
@@ -503,59 +359,31 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0
; GFX10_3-NEXT: v_writelane_b32 v23, s35, 4
; GFX10_3-NEXT: v_writelane_b32 v23, s36, 5
; GFX10_3-NEXT: v_writelane_b32 v23, s37, 6
-; GFX10_3-NEXT: v_writelane_b32 v23, s38, 7
-; GFX10_3-NEXT: v_writelane_b32 v23, s39, 8
-; GFX10_3-NEXT: v_writelane_b32 v23, s40, 9
-; GFX10_3-NEXT: v_writelane_b32 v23, s41, 10
-; GFX10_3-NEXT: v_writelane_b32 v23, s42, 11
-; GFX10_3-NEXT: v_writelane_b32 v23, s43, 12
-; GFX10_3-NEXT: v_writelane_b32 v23, s44, 13
-; GFX10_3-NEXT: v_writelane_b32 v23, s45, 14
-; GFX10_3-NEXT: v_writelane_b32 v23, s46, 15
-; GFX10_3-NEXT: v_writelane_b32 v23, s47, 16
-; GFX10_3-NEXT: v_writelane_b32 v23, s48, 17
-; GFX10_3-NEXT: v_writelane_b32 v23, s49, 18
-; GFX10_3-NEXT: v_writelane_b32 v23, s50, 19
-; GFX10_3-NEXT: v_writelane_b32 v23, s51, 20
-; GFX10_3-NEXT: v_writelane_b32 v23, s52, 21
-; GFX10_3-NEXT: v_writelane_b32 v23, s53, 22
-; GFX10_3-NEXT: v_writelane_b32 v23, s54, 23
-; GFX10_3-NEXT: v_writelane_b32 v23, s55, 24
-; GFX10_3-NEXT: v_writelane_b32 v23, s56, 25
-; GFX10_3-NEXT: v_writelane_b32 v23, s57, 26
-; GFX10_3-NEXT: v_writelane_b32 v23, s58, 27
+; GFX10_3-NEXT: v_writelane_b32 v23, s46, 7
+; GFX10_3-NEXT: v_writelane_b32 v23, s47, 8
+; GFX10_3-NEXT: v_writelane_b32 v23, s48, 9
+; GFX10_3-NEXT: v_writelane_b32 v23, s49, 10
+; GFX10_3-NEXT: v_writelane_b32 v23, s50, 11
+; GFX10_3-NEXT: v_writelane_b32 v23, s51, 12
+; GFX10_3-NEXT: v_writelane_b32 v23, s52, 13
+; GFX10_3-NEXT: v_writelane_b32 v23, s53, 14
; GFX10_3-NEXT: ;;#ASMSTART
; GFX10_3-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
; GFX10_3-NEXT: ;;#ASMEND
; GFX10_3-NEXT: v_lshrrev_b32_e64 v24, 5, s32
-; GFX10_3-NEXT: v_writelane_b32 v23, s59, 28
; GFX10_3-NEXT: v_add_nc_u32_e32 v24, 0x4040, v24
; GFX10_3-NEXT: v_readfirstlane_b32 s59, v24
; GFX10_3-NEXT: ;;#ASMSTART
; GFX10_3-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc
; GFX10_3-NEXT: ;;#ASMEND
-; GFX10_3-NEXT: v_readlane_b32 s59, v23, 28
-; GFX10_3-NEXT: v_readlane_b32 s58, v23, 27
-; GFX10_3-NEXT: v_readlane_b32 s57, v23, 26
-; GFX10_3-NEXT: v_readlane_b32 s56, v23, 25
-; GFX10_3-NEXT: v_readlane_b32 s55, v23, 24
-; GFX10_3-NEXT: v_readlane_b32 s54, v23, 23
-; GFX10_3-NEXT: v_readlane_b32 s53, v23, 22
-; GFX10_3-NEXT: v_readlane_b32 s52, v23, 21
-; GFX10_3-NEXT: v_readlane_b32 s51, v23, 20
-; GFX10_3-NEXT: v_readlane_b32 s50, v23, 19
-; GFX10_3-NEXT: v_readlane_b32 s49, v23, 18
-; GFX10_3-NEXT: v_readlane_b32 s48, v23, 17
-; GFX10_3-NEXT: v_readlane_b32 s47, v23, 16
-; GFX10_3-NEXT: v_readlane_b32 s46, v23, 15
-; GFX10_3-NEXT: v_readlane_b32 s45, v23, 14
-; GFX10_3-NEXT: v_readlane_b32 s44, v23, 13
-; GFX10_3-NEXT: v_readlane_b32 s43, v23, 12
-; GFX10_3-NEXT: v_readlane_b32 s42, v23, 11
-; GFX10_3-NEXT: v_readlane_b32 s41, v23, 10
-; GFX10_3-NEXT: v_readlane_b32 s40, v23, 9
-; GFX10_3-NEXT: v_readlane_b32 s39, v23, 8
-; GFX10_3-NEXT: v_readlane_b32 s38, v23, 7
+; GFX10_3-NEXT: v_readlane_b32 s53, v23, 14
+; GFX10_3-NEXT: v_readlane_b32 s52, v23, 13
+; GFX10_3-NEXT: v_readlane_b32 s51, v23, 12
+; GFX10_3-NEXT: v_readlane_b32 s50, v23, 11
+; GFX10_3-NEXT: v_readlane_b32 s49, v23, 10
+; GFX10_3-NEXT: v_readlane_b32 s48, v23, 9
+; GFX10_3-NEXT: v_readlane_b32 s47, v23, 8
+; GFX10_3-NEXT: v_readlane_b32 s46, v23, 7
; GFX10_3-NEXT: v_readlane_b32 s37, v23, 6
; GFX10_3-NEXT: v_readlane_b32 s36, v23, 5
; GFX10_3-NEXT: v_readlane_b32 s35, v23, 4
@@ -591,65 +419,33 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0
; GFX11-NEXT: v_writelane_b32 v23, s35, 4
; GFX11-NEXT: v_writelane_b32 v23, s36, 5
; GFX11-NEXT: v_writelane_b32 v23, s37, 6
-; GFX11-NEXT: v_writelane_b32 v23, s38, 7
-; GFX11-NEXT: v_writelane_b32 v23, s39, 8
-; GFX11-NEXT: v_writelane_b32 v23, s40, 9
-; GFX11-NEXT: v_writelane_b32 v23, s41, 10
-; GFX11-NEXT: v_writelane_b32 v23, s42, 11
-; GFX11-NEXT: v_writelane_b32 v23, s43, 12
-; GFX11-NEXT: v_writelane_b32 v23, s44, 13
-; GFX11-NEXT: v_writelane_b32 v23, s45, 14
-; GFX11-NEXT: v_writelane_b32 v23, s46, 15
-; GFX11-NEXT: v_writelane_b32 v23, s47, 16
-; GFX11-NEXT: v_writelane_b32 v23, s48, 17
-; GFX11-NEXT: v_writelane_b32 v23, s49, 18
-; GFX11-NEXT: v_writelane_b32 v23, s50, 19
-; GFX11-NEXT: v_writelane_b32 v23, s51, 20
-; GFX11-NEXT: v_writelane_b32 v23, s52, 21
-; GFX11-NEXT: v_writelane_b32 v23, s53, 22
-; GFX11-NEXT: v_writelane_b32 v23, s54, 23
-; GFX11-NEXT: v_writelane_b32 v23, s55, 24
-; GFX11-NEXT: v_writelane_b32 v23, s56, 25
-; GFX11-NEXT: v_writelane_b32 v23, s57, 26
-; GFX11-NEXT: v_writelane_b32 v23, s58, 27
+; GFX11-NEXT: v_writelane_b32 v23, s46, 7
+; GFX11-NEXT: v_writelane_b32 v23, s47, 8
+; GFX11-NEXT: v_writelane_b32 v23, s48, 9
+; GFX11-NEXT: v_writelane_b32 v23, s49, 10
+; GFX11-NEXT: v_writelane_b32 v23, s50, 11
+; GFX11-NEXT: v_writelane_b32 v23, s51, 12
+; GFX11-NEXT: v_writelane_b32 v23, s52, 13
+; GFX11-NEXT: v_writelane_b32 v23, s53, 14
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
; GFX11-NEXT: ;;#ASMEND
-; GFX11-NEXT: s_addc_u32 s32, s32, 0x4040
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
-; GFX11-NEXT: s_bitcmp1_b32 s32, 0
-; GFX11-NEXT: v_writelane_b32 v23, s59, 28
-; GFX11-NEXT: s_bitset0_b32 s32, 0
-; GFX11-NEXT: s_mov_b32 s59, s32
-; GFX11-NEXT: s_addc_u32 s32, s32, 0xffffbfc0
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX11-NEXT: s_bitcmp1_b32 s32, 0
-; GFX11-NEXT: s_bitset0_b32 s32, 0
+; GFX11-NEXT: s_addc_u32 s60, s32, 0x4040
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
+; GFX11-NEXT: s_bitcmp1_b32 s60, 0
+; GFX11-NEXT: s_bitset0_b32 s60, 0
+; GFX11-NEXT: s_mov_b32 s59, s60
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc
; GFX11-NEXT: ;;#ASMEND
-; GFX11-NEXT: v_readlane_b32 s59, v23, 28
-; GFX11-NEXT: v_readlane_b32 s58, v23, 27
-; GFX11-NEXT: v_readlane_b32 s57, v23, 26
-; GFX11-NEXT: v_readlane_b32 s56, v23, 25
-; GFX11-NEXT: v_readlane_b32 s55, v23, 24
-; GFX11-NEXT: v_readlane_b32 s54, v23, 23
-; GFX11-NEXT: v_readlane_b32 s53, v23, 22
-; GFX11-NEXT: v_readlane_b32 s52, v23, 21
-; GFX11-NEXT: v_readlane_b32 s51, v23, 20
-; GFX11-NEXT: v_readlane_b32 s50, v23, 19
-; GFX11-NEXT: v_readlane_b32 s49, v23, 18
-; GFX11-NEXT: v_readlane_b32 s48, v23, 17
-; GFX11-NEXT: v_readlane_b32 s47, v23, 16
-; GFX11-NEXT: v_readlane_b32 s46, v23, 15
-; GFX11-NEXT: v_readlane_b32 s45, v23, 14
-; GFX11-NEXT: v_readlane_b32 s44, v23, 13
-; GFX11-NEXT: v_readlane_b32 s43, v23, 12
-; GFX11-NEXT: v_readlane_b32 s42, v23, 11
-; GFX11-NEXT: v_readlane_b32 s41, v23, 10
-; GFX11-NEXT: v_readlane_b32 s40, v23, 9
-; GFX11-NEXT: v_readlane_b32 s39, v23, 8
-; GFX11-NEXT: v_readlane_b32 s38, v23, 7
+; GFX11-NEXT: v_readlane_b32 s53, v23, 14
+; GFX11-NEXT: v_readlane_b32 s52, v23, 13
+; GFX11-NEXT: v_readlane_b32 s51, v23, 12
+; GFX11-NEXT: v_readlane_b32 s50, v23, 11
+; GFX11-NEXT: v_readlane_b32 s49, v23, 10
+; GFX11-NEXT: v_readlane_b32 s48, v23, 9
+; GFX11-NEXT: v_readlane_b32 s47, v23, 8
+; GFX11-NEXT: v_readlane_b32 s46, v23, 7
; GFX11-NEXT: v_readlane_b32 s37, v23, 6
; GFX11-NEXT: v_readlane_b32 s36, v23, 5
; GFX11-NEXT: v_readlane_b32 s35, v23, 4
@@ -687,66 +483,34 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0
; GFX12-NEXT: v_writelane_b32 v23, s35, 4
; GFX12-NEXT: v_writelane_b32 v23, s36, 5
; GFX12-NEXT: v_writelane_b32 v23, s37, 6
-; GFX12-NEXT: v_writelane_b32 v23, s38, 7
-; GFX12-NEXT: v_writelane_b32 v23, s39, 8
-; GFX12-NEXT: v_writelane_b32 v23, s40, 9
-; GFX12-NEXT: v_writelane_b32 v23, s41, 10
-; GFX12-NEXT: v_writelane_b32 v23, s42, 11
-; GFX12-NEXT: v_writelane_b32 v23, s43, 12
-; GFX12-NEXT: v_writelane_b32 v23, s44, 13
-; GFX12-NEXT: v_writelane_b32 v23, s45, 14
-; GFX12-NEXT: v_writelane_b32 v23, s46, 15
-; GFX12-NEXT: v_writelane_b32 v23, s47, 16
-; GFX12-NEXT: v_writelane_b32 v23, s48, 17
-; GFX12-NEXT: v_writelane_b32 v23, s49, 18
-; GFX12-NEXT: v_writelane_b32 v23, s50, 19
-; GFX12-NEXT: v_writelane_b32 v23, s51, 20
-; GFX12-NEXT: v_writelane_b32 v23, s52, 21
-; GFX12-NEXT: v_writelane_b32 v23, s53, 22
-; GFX12-NEXT: v_writelane_b32 v23, s54, 23
-; GFX12-NEXT: v_writelane_b32 v23, s55, 24
-; GFX12-NEXT: v_writelane_b32 v23, s56, 25
-; GFX12-NEXT: v_writelane_b32 v23, s57, 26
-; GFX12-NEXT: v_writelane_b32 v23, s58, 27
+; GFX12-NEXT: v_writelane_b32 v23, s46, 7
+; GFX12-NEXT: v_writelane_b32 v23, s47, 8
+; GFX12-NEXT: v_writelane_b32 v23, s48, 9
+; GFX12-NEXT: v_writelane_b32 v23, s49, 10
+; GFX12-NEXT: v_writelane_b32 v23, s50, 11
+; GFX12-NEXT: v_writelane_b32 v23, s51, 12
+; GFX12-NEXT: v_writelane_b32 v23, s52, 13
+; GFX12-NEXT: v_writelane_b32 v23, s53, 14
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
; GFX12-NEXT: ;;#ASMEND
-; GFX12-NEXT: s_add_co_ci_u32 s32, s32, 0x4000
+; GFX12-NEXT: s_add_co_ci_u32 s60, s32, 0x4000
; GFX12-NEXT: s_wait_alu 0xfffe
-; GFX12-NEXT: s_bitcmp1_b32 s32, 0
-; GFX12-NEXT: v_writelane_b32 v23, s59, 28
-; GFX12-NEXT: s_bitset0_b32 s32, 0
+; GFX12-NEXT: s_bitcmp1_b32 s60, 0
+; GFX12-NEXT: s_bitset0_b32 s60, 0
; GFX12-NEXT: s_wait_alu 0xfffe
-; GFX12-NEXT: s_mov_b32 s59, s32
-; GFX12-NEXT: s_add_co_ci_u32 s32, s32, 0xffffc000
-; GFX12-NEXT: s_wait_alu 0xfffe
-; GFX12-NEXT: s_bitcmp1_b32 s32, 0
-; GFX12-NEXT: s_bitset0_b32 s32, 0
+; GFX12-NEXT: s_mov_b32 s59, s60
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc
; GFX12-NEXT: ;;#ASMEND
-; GFX12-NEXT: v_readlane_b32 s59, v23, 28
-; GFX12-NEXT: v_readlane_b32 s58, v23, 27
-; GFX12-NEXT: v_readlane_b32 s57, v23, 26
-; GFX12-NEXT: v_readlane_b32 s56, v23, 25
-; GFX12-NEXT: v_readlane_b32 s55, v23, 24
-; GFX12-NEXT: v_readlane_b32 s54, v23, 23
-; GFX12-NEXT: v_readlane_b32 s53, v23, 22
-; GFX12-NEXT: v_readlane_b32 s52, v23, 21
-; GFX12-NEXT: v_readlane_b32 s51, v23, 20
-; GFX12-NEXT: v_readlane_b32 s50, v23, 19
-; GFX12-NEXT: v_readlane_b32 s49, v23, 18
-; GFX12-NEXT: v_readlane_b32 s48, v23, 17
-; GFX12-NEXT: v_readlane_b32 s47, v23, 16
-; GFX12-NEXT: v_readlane_b32 s46, v23, 15
-; GFX12-NEXT: v_readlane_b32 s45, v23, 14
-; GFX12-NEXT: v_readlane_b32 s44, v23, 13
-; GFX12-NEXT: v_readlane_b32 s43, v23, 12
-; GFX12-NEXT: v_readlane_b32 s42, v23, 11
-; GFX12-NEXT: v_readlane_b32 s41, v23, 10
-; GFX12-NEXT: v_readlane_b32 s40, v23, 9
-; GFX12-NEXT: v_readlane_b32 s39, v23, 8
-; GFX12-NEXT: v_readlane_b32 s38, v23, 7
+; GFX12-NEXT: v_readlane_b32 s53, v23, 14
+; GFX12-NEXT: v_readlane_b32 s52, v23, 13
+; GFX12-NEXT: v_readlane_b32 s51, v23, 12
+; GFX12-NEXT: v_readlane_b32 s50, v23, 11
+; GFX12-NEXT: v_readlane_b32 s49, v23, 10
+; GFX12-NEXT: v_readlane_b32 s48, v23, 9
+; GFX12-NEXT: v_readlane_b32 s47, v23, 8
+; GFX12-NEXT: v_readlane_b32 s46, v23, 7
; GFX12-NEXT: v_readlane_b32 s37, v23, 6
; GFX12-NEXT: v_readlane_b32 s36, v23, 5
; GFX12-NEXT: v_readlane_b32 s35, v23, 4
@@ -817,60 +581,32 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe
; GFX7-NEXT: v_writelane_b32 v21, s35, 4
; GFX7-NEXT: v_writelane_b32 v21, s36, 5
; GFX7-NEXT: v_writelane_b32 v21, s37, 6
-; GFX7-NEXT: v_writelane_b32 v21, s38, 7
-; GFX7-NEXT: v_writelane_b32 v21, s39, 8
-; GFX7-NEXT: v_writelane_b32 v21, s40, 9
-; GFX7-NEXT: v_writelane_b32 v21, s41, 10
-; GFX7-NEXT: v_writelane_b32 v21, s42, 11
-; GFX7-NEXT: v_writelane_b32 v21, s43, 12
-; GFX7-NEXT: v_writelane_b32 v21, s44, 13
-; GFX7-NEXT: v_writelane_b32 v21, s45, 14
-; GFX7-NEXT: v_writelane_b32 v21, s46, 15
-; GFX7-NEXT: v_writelane_b32 v21, s47, 16
-; GFX7-NEXT: v_writelane_b32 v21, s48, 17
-; GFX7-NEXT: v_writelane_b32 v21, s49, 18
-; GFX7-NEXT: v_writelane_b32 v21, s50, 19
-; GFX7-NEXT: v_writelane_b32 v21, s51, 20
-; GFX7-NEXT: v_writelane_b32 v21, s52, 21
-; GFX7-NEXT: v_writelane_b32 v21, s53, 22
-; GFX7-NEXT: v_writelane_b32 v21, s54, 23
-; GFX7-NEXT: v_writelane_b32 v21, s55, 24
-; GFX7-NEXT: v_writelane_b32 v21, s56, 25
-; GFX7-NEXT: v_writelane_b32 v21, s57, 26
+; GFX7-NEXT: v_writelane_b32 v21, s46, 7
+; GFX7-NEXT: v_writelane_b32 v21, s47, 8
+; GFX7-NEXT: v_writelane_b32 v21, s48, 9
+; GFX7-NEXT: v_writelane_b32 v21, s49, 10
+; GFX7-NEXT: v_writelane_b32 v21, s50, 11
+; GFX7-NEXT: v_writelane_b32 v21, s51, 12
+; GFX7-NEXT: v_writelane_b32 v21, s52, 13
; GFX7-NEXT: s_and_b64 s[4:5], 0, exec
-; GFX7-NEXT: v_writelane_b32 v21, s58, 27
+; GFX7-NEXT: v_writelane_b32 v21, s53, 14
; GFX7-NEXT: ;;#ASMSTART
; GFX7-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
; GFX7-NEXT: ;;#ASMEND
; GFX7-NEXT: v_mad_u32_u24 v22, 16, 64, s32
; GFX7-NEXT: v_lshrrev_b32_e32 v22, 6, v22
-; GFX7-NEXT: v_writelane_b32 v21, s59, 28
; GFX7-NEXT: v_readfirstlane_b32 s59, v22
; GFX7-NEXT: ;;#ASMSTART
; GFX7-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc
; GFX7-NEXT: ;;#ASMEND
-; GFX7-NEXT: v_readlane_b32 s59, v21, 28
-; GFX7-NEXT: v_readlane_b32 s58, v21, 27
-; GFX7-NEXT: v_readlane_b32 s57, v21, 26
-; GFX7-NEXT: v_readlane_b32 s56, v21, 25
-; GFX7-NEXT: v_readlane_b32 s55, v21, 24
-; GFX7-NEXT: v_readlane_b32 s54, v21, 23
-; GFX7-NEXT: v_readlane_b32 s53, v21, 22
-; GFX7-NEXT: v_readlane_b32 s52, v21, 21
-; GFX7-NEXT: v_readlane_b32 s51, v21, 20
-; GFX7-NEXT: v_readlane_b32 s50, v21, 19
-; GFX7-NEXT: v_readlane_b32 s49, v21, 18
-; GFX7-NEXT: v_readlane_b32 s48, v21, 17
-; GFX7-NEXT: v_readlane_b32 s47, v21, 16
-; GFX7-NEXT: v_readlane_b32 s46, v21, 15
-; GFX7-NEXT: v_readlane_b32 s45, v21, 14
-; GFX7-NEXT: v_readlane_b32 s44, v21, 13
-; GFX7-NEXT: v_readlane_b32 s43, v21, 12
-; GFX7-NEXT: v_readlane_b32 s42, v21, 11
-; GFX7-NEXT: v_readlane_b32 s41, v21, 10
-; GFX7-NEXT: v_readlane_b32 s40, v21, 9
-; GFX7-NEXT: v_readlane_b32 s39, v21, 8
-; GFX7-NEXT: v_readlane_b32 s38, v21, 7
+; GFX7-NEXT: v_readlane_b32 s53, v21, 14
+; GFX7-NEXT: v_readlane_b32 s52, v21, 13
+; GFX7-NEXT: v_readlane_b32 s51, v21, 12
+; GFX7-NEXT: v_readlane_b32 s50, v21, 11
+; GFX7-NEXT: v_readlane_b32 s49, v21, 10
+; GFX7-NEXT: v_readlane_b32 s48, v21, 9
+; GFX7-NEXT: v_readlane_b32 s47, v21, 8
+; GFX7-NEXT: v_readlane_b32 s46, v21, 7
; GFX7-NEXT: v_readlane_b32 s37, v21, 6
; GFX7-NEXT: v_readlane_b32 s36, v21, 5
; GFX7-NEXT: v_readlane_b32 s35, v21, 4
@@ -899,60 +635,32 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe
; GFX8-NEXT: v_writelane_b32 v21, s35, 4
; GFX8-NEXT: v_writelane_b32 v21, s36, 5
; GFX8-NEXT: v_writelane_b32 v21, s37, 6
-; GFX8-NEXT: v_writelane_b32 v21, s38, 7
-; GFX8-NEXT: v_writelane_b32 v21, s39, 8
-; GFX8-NEXT: v_writelane_b32 v21, s40, 9
-; GFX8-NEXT: v_writelane_b32 v21, s41, 10
-; GFX8-NEXT: v_writelane_b32 v21, s42, 11
-; GFX8-NEXT: v_writelane_b32 v21, s43, 12
-; GFX8-NEXT: v_writelane_b32 v21, s44, 13
-; GFX8-NEXT: v_writelane_b32 v21, s45, 14
-; GFX8-NEXT: v_writelane_b32 v21, s46, 15
-; GFX8-NEXT: v_writelane_b32 v21, s47, 16
-; GFX8-NEXT: v_writelane_b32 v21, s48, 17
-; GFX8-NEXT: v_writelane_b32 v21, s49, 18
-; GFX8-NEXT: v_writelane_b32 v21, s50, 19
-; GFX8-NEXT: v_writelane_b32 v21, s51, 20
-; GFX8-NEXT: v_writelane_b32 v21, s52, 21
-; GFX8-NEXT: v_writelane_b32 v21, s53, 22
-; GFX8-NEXT: v_writelane_b32 v21, s54, 23
-; GFX8-NEXT: v_writelane_b32 v21, s55, 24
-; GFX8-NEXT: v_writelane_b32 v21, s56, 25
-; GFX8-NEXT: v_writelane_b32 v21, s57, 26
+; GFX8-NEXT: v_writelane_b32 v21, s46, 7
+; GFX8-NEXT: v_writelane_b32 v21, s47, 8
+; GFX8-NEXT: v_writelane_b32 v21, s48, 9
+; GFX8-NEXT: v_writelane_b32 v21, s49, 10
+; GFX8-NEXT: v_writelane_b32 v21, s50, 11
+; GFX8-NEXT: v_writelane_b32 v21, s51, 12
+; GFX8-NEXT: v_writelane_b32 v21, s52, 13
; GFX8-NEXT: s_and_b64 s[4:5], 0, exec
-; GFX8-NEXT: v_writelane_b32 v21, s58, 27
+; GFX8-NEXT: v_writelane_b32 v21, s53, 14
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: v_mad_u32_u24 v22, 16, 64, s32
; GFX8-NEXT: v_lshrrev_b32_e32 v22, 6, v22
-; GFX8-NEXT: v_writelane_b32 v21, s59, 28
; GFX8-NEXT: v_readfirstlane_b32 s59, v22
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc
; GFX8-NEXT: ;;#ASMEND
-; GFX8-NEXT: v_readlane_b32 s59, v21, 28
-; GFX8-NEXT: v_readlane_b32 s58, v21, 27
-; GFX8-NEXT: v_readlane_b32 s57, v21, 26
-; GFX8-NEXT: v_readlane_b32 s56, v21, 25
-; GFX8-NEXT: v_readlane_b32 s55, v21, 24
-; GFX8-NEXT: v_readlane_b32 s54, v21, 23
-; GFX8-NEXT: v_readlane_b32 s53, v21, 22
-; GFX8-NEXT: v_readlane_b32 s52, v21, 21
-; GFX8-NEXT: v_readlane_b32 s51, v21, 20
-; GFX8-NEXT: v_readlane_b32 s50, v21, 19
-; GFX8-NEXT: v_readlane_b32 s49, v21, 18
-; GFX8-NEXT: v_readlane_b32 s48, v21, 17
-; GFX8-NEXT: v_readlane_b32 s47, v21, 16
-; GFX8-NEXT: v_readlane_b32 s46, v21, 15
-; GFX8-NEXT: v_readlane_b32 s45, v21, 14
-; GFX8-NEXT: v_readlane_b32 s44, v21, 13
-; GFX8-NEXT: v_readlane_b32 s43, v21, 12
-; GFX8-NEXT: v_readlane_b32 s42, v21, 11
-; GFX8-NEXT: v_readlane_b32 s41, v21, 10
-; GFX8-NEXT: v_readlane_b32 s40, v21, 9
-; GFX8-NEXT: v_readlane_b32 s39, v21, 8
-; GFX8-NEXT: v_readlane_b32 s38, v21, 7
+; GFX8-NEXT: v_readlane_b32 s53, v21, 14
+; GFX8-NEXT: v_readlane_b32 s52, v21, 13
+; GFX8-NEXT: v_readlane_b32 s51, v21, 12
+; GFX8-NEXT: v_readlane_b32 s50, v21, 11
+; GFX8-NEXT: v_readlane_b32 s49, v21, 10
+; GFX8-NEXT: v_readlane_b32 s48, v21, 9
+; GFX8-NEXT: v_readlane_b32 s47, v21, 8
+; GFX8-NEXT: v_readlane_b32 s46, v21, 7
; GFX8-NEXT: v_readlane_b32 s37, v21, 6
; GFX8-NEXT: v_readlane_b32 s36, v21, 5
; GFX8-NEXT: v_readlane_b32 s35, v21, 4
@@ -981,60 +689,32 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe
; GFX900-NEXT: v_writelane_b32 v21, s35, 4
; GFX900-NEXT: v_writelane_b32 v21, s36, 5
; GFX900-NEXT: v_writelane_b32 v21, s37, 6
-; GFX900-NEXT: v_writelane_b32 v21, s38, 7
-; GFX900-NEXT: v_writelane_b32 v21, s39, 8
-; GFX900-NEXT: v_writelane_b32 v21, s40, 9
-; GFX900-NEXT: v_writelane_b32 v21, s41, 10
-; GFX900-NEXT: v_writelane_b32 v21, s42, 11
-; GFX900-NEXT: v_writelane_b32 v21, s43, 12
-; GFX900-NEXT: v_writelane_b32 v21, s44, 13
-; GFX900-NEXT: v_writelane_b32 v21, s45, 14
-; GFX900-NEXT: v_writelane_b32 v21, s46, 15
-; GFX900-NEXT: v_writelane_b32 v21, s47, 16
-; GFX900-NEXT: v_writelane_b32 v21, s48, 17
-; GFX900-NEXT: v_writelane_b32 v21, s49, 18
-; GFX900-NEXT: v_writelane_b32 v21, s50, 19
-; GFX900-NEXT: v_writelane_b32 v21, s51, 20
-; GFX900-NEXT: v_writelane_b32 v21, s52, 21
-; GFX900-NEXT: v_writelane_b32 v21, s53, 22
-; GFX900-NEXT: v_writelane_b32 v21, s54, 23
-; GFX900-NEXT: v_writelane_b32 v21, s55, 24
-; GFX900-NEXT: v_writelane_b32 v21, s56, 25
-; GFX900-NEXT: v_writelane_b32 v21, s57, 26
+; GFX900-NEXT: v_writelane_b32 v21, s46, 7
+; GFX900-NEXT: v_writelane_b32 v21, s47, 8
+; GFX900-NEXT: v_writelane_b32 v21, s48, 9
+; GFX900-NEXT: v_writelane_b32 v21, s49, 10
+; GFX900-NEXT: v_writelane_b32 v21, s50, 11
+; GFX900-NEXT: v_writelane_b32 v21, s51, 12
+; GFX900-NEXT: v_writelane_b32 v21, s52, 13
; GFX900-NEXT: s_and_b64 s[4:5], 0, exec
-; GFX900-NEXT: v_writelane_b32 v21, s58, 27
+; GFX900-NEXT: v_writelane_b32 v21, s53, 14
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_lshrrev_b32_e64 v22, 6, s32
; GFX900-NEXT: v_add_u32_e32 v22, 16, v22
-; GFX900-NEXT: v_writelane_b32 v21, s59, 28
; GFX900-NEXT: v_readfirstlane_b32 s59, v22
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_readlane_b32 s59, v21, 28
-; GFX900-NEXT: v_readlane_b32 s58, v21, 27
-; GFX900-NEXT: v_readlane_b32 s57, v21, 26
-; GFX900-NEXT: v_readlane_b32 s56, v21, 25
-; GFX900-NEXT: v_readlane_b32 s55, v21, 24
-; GFX900-NEXT: v_readlane_b32 s54, v21, 23
-; GFX900-NEXT: v_readlane_b32 s53, v21, 22
-; GFX900-NEXT: v_readlane_b32 s52, v21, 21
-; GFX900-NEXT: v_readlane_b32 s51, v21, 20
-; GFX900-NEXT: v_readlane_b32 s50, v21, 19
-; GFX900-NEXT: v_readlane_b32 s49, v21, 18
-; GFX900-NEXT: v_readlane_b32 s48, v21, 17
-; GFX900-NEXT: v_readlane_b32 s47, v21, 16
-; GFX900-NEXT: v_readlane_b32 s46, v21, 15
-; GFX900-NEXT: v_readlane_b32 s45, v21, 14
-; GFX900-NEXT: v_readlane_b32 s44, v21, 13
-; GFX900-NEXT: v_readlane_b32 s43, v21, 12
-; GFX900-NEXT: v_readlane_b32 s42, v21, 11
-; GFX900-NEXT: v_readlane_b32 s41, v21, 10
-; GFX900-NEXT: v_readlane_b32 s40, v21, 9
-; GFX900-NEXT: v_readlane_b32 s39, v21, 8
-; GFX900-NEXT: v_readlane_b32 s38, v21, 7
+; GFX900-NEXT: v_readlane_b32 s53, v21, 14
+; GFX900-NEXT: v_readlane_b32 s52, v21, 13
+; GFX900-NEXT: v_readlane_b32 s51, v21, 12
+; GFX900-NEXT: v_readlane_b32 s50, v21, 11
+; GFX900-NEXT: v_readlane_b32 s49, v21, 10
+; GFX900-NEXT: v_readlane_b32 s48, v21, 9
+; GFX900-NEXT: v_readlane_b32 s47, v21, 8
+; GFX900-NEXT: v_readlane_b32 s46, v21, 7
; GFX900-NEXT: v_readlane_b32 s37, v21, 6
; GFX900-NEXT: v_readlane_b32 s36, v21, 5
; GFX900-NEXT: v_readlane_b32 s35, v21, 4
@@ -1063,31 +743,15 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe
; GFX942-NEXT: v_writelane_b32 v21, s35, 4
; GFX942-NEXT: v_writelane_b32 v21, s36, 5
; GFX942-NEXT: v_writelane_b32 v21, s37, 6
-; GFX942-NEXT: v_writelane_b32 v21, s38, 7
-; GFX942-NEXT: v_writelane_b32 v21, s39, 8
-; GFX942-NEXT: v_writelane_b32 v21, s40, 9
-; GFX942-NEXT: v_writelane_b32 v21, s41, 10
-; GFX942-NEXT: v_writelane_b32 v21, s42, 11
-; GFX942-NEXT: v_writelane_b32 v21, s43, 12
-; GFX942-NEXT: v_writelane_b32 v21, s44, 13
-; GFX942-NEXT: v_writelane_b32 v21, s45, 14
-; GFX942-NEXT: v_writelane_b32 v21, s46, 15
-; GFX942-NEXT: v_writelane_b32 v21, s47, 16
-; GFX942-NEXT: v_writelane_b32 v21, s48, 17
-; GFX942-NEXT: v_writelane_b32 v21, s49, 18
-; GFX942-NEXT: v_writelane_b32 v21, s50, 19
-; GFX942-NEXT: v_writelane_b32 v21, s51, 20
-; GFX942-NEXT: v_writelane_b32 v21, s52, 21
-; GFX942-NEXT: v_writelane_b32 v21, s53, 22
-; GFX942-NEXT: v_writelane_b32 v21, s54, 23
-; GFX942-NEXT: v_writelane_b32 v21, s55, 24
-; GFX942-NEXT: v_writelane_b32 v21, s56, 25
-; GFX942-NEXT: v_writelane_b32 v21, s57, 26
-; GFX942-NEXT: v_writelane_b32 v21, s58, 27
-; GFX942-NEXT: v_writelane_b32 v21, s59, 28
-; GFX942-NEXT: v_writelane_b32 v21, s60, 29
-; GFX942-NEXT: v_writelane_b32 v21, s61, 30
+; GFX942-NEXT: v_writelane_b32 v21, s46, 7
+; GFX942-NEXT: v_writelane_b32 v21, s47, 8
+; GFX942-NEXT: v_writelane_b32 v21, s48, 9
+; GFX942-NEXT: v_writelane_b32 v21, s49, 10
+; GFX942-NEXT: v_writelane_b32 v21, s50, 11
+; GFX942-NEXT: v_writelane_b32 v21, s51, 12
+; GFX942-NEXT: v_writelane_b32 v21, s52, 13
; GFX942-NEXT: s_and_b64 s[60:61], 0, exec
+; GFX942-NEXT: v_writelane_b32 v21, s53, 14
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
; GFX942-NEXT: ;;#ASMEND
@@ -1098,30 +762,14 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_readlane_b32 s61, v21, 30
-; GFX942-NEXT: v_readlane_b32 s60, v21, 29
-; GFX942-NEXT: v_readlane_b32 s59, v21, 28
-; GFX942-NEXT: v_readlane_b32 s58, v21, 27
-; GFX942-NEXT: v_readlane_b32 s57, v21, 26
-; GFX942-NEXT: v_readlane_b32 s56, v21, 25
-; GFX942-NEXT: v_readlane_b32 s55, v21, 24
-; GFX942-NEXT: v_readlane_b32 s54, v21, 23
-; GFX942-NEXT: v_readlane_b32 s53, v21, 22
-; GFX942-NEXT: v_readlane_b32 s52, v21, 21
-; GFX942-NEXT: v_readlane_b32 s51, v21, 20
-; GFX942-NEXT: v_readlane_b32 s50, v21, 19
-; GFX942-NEXT: v_readlane_b32 s49, v21, 18
-; GFX942-NEXT: v_readlane_b32 s48, v21, 17
-; GFX942-NEXT: v_readlane_b32 s47, v21, 16
-; GFX942-NEXT: v_readlane_b32 s46, v21, 15
-; GFX942-NEXT: v_readlane_b32 s45, v21, 14
-; GFX942-NEXT: v_readlane_b32 s44, v21, 13
-; GFX942-NEXT: v_readlane_b32 s43, v21, 12
-; GFX942-NEXT: v_readlane_b32 s42, v21, 11
-; GFX942-NEXT: v_readlane_b32 s41, v21, 10
-; GFX942-NEXT: v_readlane_b32 s40, v21, 9
-; GFX942-NEXT: v_readlane_b32 s39, v21, 8
-; GFX942-NEXT: v_readlane_b32 s38, v21, 7
+; GFX942-NEXT: v_readlane_b32 s53, v21, 14
+; GFX942-NEXT: v_readlane_b32 s52, v21, 13
+; GFX942-NEXT: v_readlane_b32 s51, v21, 12
+; GFX942-NEXT: v_readlane_b32 s50, v21, 11
+; GFX942-NEXT: v_readlane_b32 s49, v21, 10
+; GFX942-NEXT: v_readlane_b32 s48, v21, 9
+; GFX942-NEXT: v_readlane_b32 s47, v21, 8
+; GFX942-NEXT: v_readlane_b32 s46, v21, 7
; GFX942-NEXT: v_readlane_b32 s37, v21, 6
; GFX942-NEXT: v_readlane_b32 s36, v21, 5
; GFX942-NEXT: v_readlane_b32 s35, v21, 4
@@ -1145,66 +793,38 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe
; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
; GFX10_1-NEXT: s_mov_b32 exec_lo, s4
; GFX10_1-NEXT: v_writelane_b32 v21, s30, 0
+; GFX10_1-NEXT: s_and_b32 s59, 0, exec_lo
; GFX10_1-NEXT: v_writelane_b32 v21, s31, 1
; GFX10_1-NEXT: v_writelane_b32 v21, s33, 2
; GFX10_1-NEXT: v_writelane_b32 v21, s34, 3
; GFX10_1-NEXT: v_writelane_b32 v21, s35, 4
; GFX10_1-NEXT: v_writelane_b32 v21, s36, 5
; GFX10_1-NEXT: v_writelane_b32 v21, s37, 6
-; GFX10_1-NEXT: v_writelane_b32 v21, s38, 7
-; GFX10_1-NEXT: v_writelane_b32 v21, s39, 8
-; GFX10_1-NEXT: v_writelane_b32 v21, s40, 9
-; GFX10_1-NEXT: v_writelane_b32 v21, s41, 10
-; GFX10_1-NEXT: v_writelane_b32 v21, s42, 11
-; GFX10_1-NEXT: v_writelane_b32 v21, s43, 12
-; GFX10_1-NEXT: v_writelane_b32 v21, s44, 13
-; GFX10_1-NEXT: v_writelane_b32 v21, s45, 14
-; GFX10_1-NEXT: v_writelane_b32 v21, s46, 15
-; GFX10_1-NEXT: v_writelane_b32 v21, s47, 16
-; GFX10_1-NEXT: v_writelane_b32 v21, s48, 17
-; GFX10_1-NEXT: v_writelane_b32 v21, s49, 18
-; GFX10_1-NEXT: v_writelane_b32 v21, s50, 19
-; GFX10_1-NEXT: v_writelane_b32 v21, s51, 20
-; GFX10_1-NEXT: v_writelane_b32 v21, s52, 21
-; GFX10_1-NEXT: v_writelane_b32 v21, s53, 22
-; GFX10_1-NEXT: v_writelane_b32 v21, s54, 23
-; GFX10_1-NEXT: v_writelane_b32 v21, s55, 24
-; GFX10_1-NEXT: v_writelane_b32 v21, s56, 25
-; GFX10_1-NEXT: v_writelane_b32 v21, s57, 26
-; GFX10_1-NEXT: v_writelane_b32 v21, s58, 27
+; GFX10_1-NEXT: v_writelane_b32 v21, s46, 7
+; GFX10_1-NEXT: v_writelane_b32 v21, s47, 8
+; GFX10_1-NEXT: v_writelane_b32 v21, s48, 9
+; GFX10_1-NEXT: v_writelane_b32 v21, s49, 10
+; GFX10_1-NEXT: v_writelane_b32 v21, s50, 11
+; GFX10_1-NEXT: v_writelane_b32 v21, s51, 12
+; GFX10_1-NEXT: v_writelane_b32 v21, s52, 13
+; GFX10_1-NEXT: v_writelane_b32 v21, s53, 14
; GFX10_1-NEXT: ;;#ASMSTART
; GFX10_1-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
; GFX10_1-NEXT: ;;#ASMEND
; GFX10_1-NEXT: v_lshrrev_b32_e64 v22, 5, s32
-; GFX10_1-NEXT: v_writelane_b32 v21, s59, 28
-; GFX10_1-NEXT: s_and_b32 s59, 0, exec_lo
; GFX10_1-NEXT: v_add_nc_u32_e32 v22, 16, v22
; GFX10_1-NEXT: v_readfirstlane_b32 s59, v22
; GFX10_1-NEXT: ;;#ASMSTART
; GFX10_1-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc
; GFX10_1-NEXT: ;;#ASMEND
-; GFX10_1-NEXT: v_readlane_b32 s59, v21, 28
-; GFX10_1-NEXT: v_readlane_b32 s58, v21, 27
-; GFX10_1-NEXT: v_readlane_b32 s57, v21, 26
-; GFX10_1-NEXT: v_readlane_b32 s56, v21, 25
-; GFX10_1-NEXT: v_readlane_b32 s55, v21, 24
-; GFX10_1-NEXT: v_readlane_b32 s54, v21, 23
-; GFX10_1-NEXT: v_readlane_b32 s53, v21, 22
-; GFX10_1-NEXT: v_readlane_b32 s52, v21, 21
-; GFX10_1-NEXT: v_readlane_b32 s51, v21, 20
-; GFX10_1-NEXT: v_readlane_b32 s50, v21, 19
-; GFX10_1-NEXT: v_readlane_b32 s49, v21, 18
-; GFX10_1-NEXT: v_readlane_b32 s48, v21, 17
-; GFX10_1-NEXT: v_readlane_b32 s47, v21, 16
-; GFX10_1-NEXT: v_readlane_b32 s46, v21, 15
-; GFX10_1-NEXT: v_readlane_b32 s45, v21, 14
-; GFX10_1-NEXT: v_readlane_b32 s44, v21, 13
-; GFX10_1-NEXT: v_readlane_b32 s43, v21, 12
-; GFX10_1-NEXT: v_readlane_b32 s42, v21, 11
-; GFX10_1-NEXT: v_readlane_b32 s41, v21, 10
-; GFX10_1-NEXT: v_readlane_b32 s40, v21, 9
-; GFX10_1-NEXT: v_readlane_b32 s39, v21, 8
-; GFX10_1-NEXT: v_readlane_b32 s38, v21, 7
+; GFX10_1-NEXT: v_readlane_b32 s53, v21, 14
+; GFX10_1-NEXT: v_readlane_b32 s52, v21, 13
+; GFX10_1-NEXT: v_readlane_b32 s51, v21, 12
+; GFX10_1-NEXT: v_readlane_b32 s50, v21, 11
+; GFX10_1-NEXT: v_readlane_b32 s49, v21, 10
+; GFX10_1-NEXT: v_readlane_b32 s48, v21, 9
+; GFX10_1-NEXT: v_readlane_b32 s47, v21, 8
+; GFX10_1-NEXT: v_readlane_b32 s46, v21, 7
; GFX10_1-NEXT: v_readlane_b32 s37, v21, 6
; GFX10_1-NEXT: v_readlane_b32 s36, v21, 5
; GFX10_1-NEXT: v_readlane_b32 s35, v21, 4
@@ -1228,66 +848,38 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe
; GFX10_3-NEXT: buffer_store_dword v21, off, s[0:3], s5 ; 4-byte Folded Spill
; GFX10_3-NEXT: s_mov_b32 exec_lo, s4
; GFX10_3-NEXT: v_writelane_b32 v21, s30, 0
+; GFX10_3-NEXT: s_and_b32 s59, 0, exec_lo
; GFX10_3-NEXT: v_writelane_b32 v21, s31, 1
; GFX10_3-NEXT: v_writelane_b32 v21, s33, 2
; GFX10_3-NEXT: v_writelane_b32 v21, s34, 3
; GFX10_3-NEXT: v_writelane_b32 v21, s35, 4
; GFX10_3-NEXT: v_writelane_b32 v21, s36, 5
; GFX10_3-NEXT: v_writelane_b32 v21, s37, 6
-; GFX10_3-NEXT: v_writelane_b32 v21, s38, 7
-; GFX10_3-NEXT: v_writelane_b32 v21, s39, 8
-; GFX10_3-NEXT: v_writelane_b32 v21, s40, 9
-; GFX10_3-NEXT: v_writelane_b32 v21, s41, 10
-; GFX10_3-NEXT: v_writelane_b32 v21, s42, 11
-; GFX10_3-NEXT: v_writelane_b32 v21, s43, 12
-; GFX10_3-NEXT: v_writelane_b32 v21, s44, 13
-; GFX10_3-NEXT: v_writelane_b32 v21, s45, 14
-; GFX10_3-NEXT: v_writelane_b32 v21, s46, 15
-; GFX10_3-NEXT: v_writelane_b32 v21, s47, 16
-; GFX10_3-NEXT: v_writelane_b32 v21, s48, 17
-; GFX10_3-NEXT: v_writelane_b32 v21, s49, 18
-; GFX10_3-NEXT: v_writelane_b32 v21, s50, 19
-; GFX10_3-NEXT: v_writelane_b32 v21, s51, 20
-; GFX10_3-NEXT: v_writelane_b32 v21, s52, 21
-; GFX10_3-NEXT: v_writelane_b32 v21, s53, 22
-; GFX10_3-NEXT: v_writelane_b32 v21, s54, 23
-; GFX10_3-NEXT: v_writelane_b32 v21, s55, 24
-; GFX10_3-NEXT: v_writelane_b32 v21, s56, 25
-; GFX10_3-NEXT: v_writelane_b32 v21, s57, 26
-; GFX10_3-NEXT: v_writelane_b32 v21, s58, 27
+; GFX10_3-NEXT: v_writelane_b32 v21, s46, 7
+; GFX10_3-NEXT: v_writelane_b32 v21, s47, 8
+; GFX10_3-NEXT: v_writelane_b32 v21, s48, 9
+; GFX10_3-NEXT: v_writelane_b32 v21, s49, 10
+; GFX10_3-NEXT: v_writelane_b32 v21, s50, 11
+; GFX10_3-NEXT: v_writelane_b32 v21, s51, 12
+; GFX10_3-NEXT: v_writelane_b32 v21, s52, 13
+; GFX10_3-NEXT: v_writelane_b32 v21, s53, 14
; GFX10_3-NEXT: ;;#ASMSTART
; GFX10_3-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
; GFX10_3-NEXT: ;;#ASMEND
; GFX10_3-NEXT: v_lshrrev_b32_e64 v22, 5, s32
-; GFX10_3-NEXT: v_writelane_b32 v21, s59, 28
-; GFX10_3-NEXT: s_and_b32 s59, 0, exec_lo
; GFX10_3-NEXT: v_add_nc_u32_e32 v22, 16, v22
; GFX10_3-NEXT: v_readfirstlane_b32 s59, v22
; GFX10_3-NEXT: ;;#ASMSTART
; GFX10_3-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc
; GFX10_3-NEXT: ;;#ASMEND
-; GFX10_3-NEXT: v_readlane_b32 s59, v21, 28
-; GFX10_3-NEXT: v_readlane_b32 s58, v21, 27
-; GFX10_3-NEXT: v_readlane_b32 s57, v21, 26
-; GFX10_3-NEXT: v_readlane_b32 s56, v21, 25
-; GFX10_3-NEXT: v_readlane_b32 s55, v21, 24
-; GFX10_3-NEXT: v_readlane_b32 s54, v21, 23
-; GFX10_3-NEXT: v_readlane_b32 s53, v21, 22
-; GFX10_3-NEXT: v_readlane_b32 s52, v21, 21
-; GFX10_3-NEXT: v_readlane_b32 s51, v21, 20
-; GFX10_3-NEXT: v_readlane_b32 s50, v21, 19
-; GFX10_3-NEXT: v_readlane_b32 s49, v21, 18
-; GFX10_3-NEXT: v_readlane_b32 s48, v21, 17
-; GFX10_3-NEXT: v_readlane_b32 s47, v21, 16
-; GFX10_3-NEXT: v_readlane_b32 s46, v21, 15
-; GFX10_3-NEXT: v_readlane_b32 s45, v21, 14
-; GFX10_3-NEXT: v_readlane_b32 s44, v21, 13
-; GFX10_3-NEXT: v_readlane_b32 s43, v21, 12
-; GFX10_3-NEXT: v_readlane_b32 s42, v21, 11
-; GFX10_3-NEXT: v_readlane_b32 s41, v21, 10
-; GFX10_3-NEXT: v_readlane_b32 s40, v21, 9
-; GFX10_3-NEXT: v_readlane_b32 s39, v21, 8
-; GFX10_3-NEXT: v_readlane_b32 s38, v21, 7
+; GFX10_3-NEXT: v_readlane_b32 s53, v21, 14
+; GFX10_3-NEXT: v_readlane_b32 s52, v21, 13
+; GFX10_3-NEXT: v_readlane_b32 s51, v21, 12
+; GFX10_3-NEXT: v_readlane_b32 s50, v21, 11
+; GFX10_3-NEXT: v_readlane_b32 s49, v21, 10
+; GFX10_3-NEXT: v_readlane_b32 s48, v21, 9
+; GFX10_3-NEXT: v_readlane_b32 s47, v21, 8
+; GFX10_3-NEXT: v_readlane_b32 s46, v21, 7
; GFX10_3-NEXT: v_readlane_b32 s37, v21, 6
; GFX10_3-NEXT: v_readlane_b32 s36, v21, 5
; GFX10_3-NEXT: v_readlane_b32 s35, v21, 4
@@ -1310,72 +902,40 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe
; GFX11-NEXT: scratch_store_b32 off, v21, s1 ; 4-byte Folded Spill
; GFX11-NEXT: s_mov_b32 exec_lo, s0
; GFX11-NEXT: v_writelane_b32 v21, s30, 0
+; GFX11-NEXT: s_and_b32 s59, 0, exec_lo
; GFX11-NEXT: v_writelane_b32 v21, s31, 1
; GFX11-NEXT: v_writelane_b32 v21, s33, 2
; GFX11-NEXT: v_writelane_b32 v21, s34, 3
; GFX11-NEXT: v_writelane_b32 v21, s35, 4
; GFX11-NEXT: v_writelane_b32 v21, s36, 5
; GFX11-NEXT: v_writelane_b32 v21, s37, 6
-; GFX11-NEXT: v_writelane_b32 v21, s38, 7
-; GFX11-NEXT: v_writelane_b32 v21, s39, 8
-; GFX11-NEXT: v_writelane_b32 v21, s40, 9
-; GFX11-NEXT: v_writelane_b32 v21, s41, 10
-; GFX11-NEXT: v_writelane_b32 v21, s42, 11
-; GFX11-NEXT: v_writelane_b32 v21, s43, 12
-; GFX11-NEXT: v_writelane_b32 v21, s44, 13
-; GFX11-NEXT: v_writelane_b32 v21, s45, 14
-; GFX11-NEXT: v_writelane_b32 v21, s46, 15
-; GFX11-NEXT: v_writelane_b32 v21, s47, 16
-; GFX11-NEXT: v_writelane_b32 v21, s48, 17
-; GFX11-NEXT: v_writelane_b32 v21, s49, 18
-; GFX11-NEXT: v_writelane_b32 v21, s50, 19
-; GFX11-NEXT: v_writelane_b32 v21, s51, 20
-; GFX11-NEXT: v_writelane_b32 v21, s52, 21
-; GFX11-NEXT: v_writelane_b32 v21, s53, 22
-; GFX11-NEXT: v_writelane_b32 v21, s54, 23
-; GFX11-NEXT: v_writelane_b32 v21, s55, 24
-; GFX11-NEXT: v_writelane_b32 v21, s56, 25
-; GFX11-NEXT: v_writelane_b32 v21, s57, 26
-; GFX11-NEXT: v_writelane_b32 v21, s58, 27
+; GFX11-NEXT: v_writelane_b32 v21, s46, 7
+; GFX11-NEXT: v_writelane_b32 v21, s47, 8
+; GFX11-NEXT: v_writelane_b32 v21, s48, 9
+; GFX11-NEXT: v_writelane_b32 v21, s49, 10
+; GFX11-NEXT: v_writelane_b32 v21, s50, 11
+; GFX11-NEXT: v_writelane_b32 v21, s51, 12
+; GFX11-NEXT: v_writelane_b32 v21, s52, 13
+; GFX11-NEXT: v_writelane_b32 v21, s53, 14
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
; GFX11-NEXT: ;;#ASMEND
-; GFX11-NEXT: v_writelane_b32 v21, s59, 28
-; GFX11-NEXT: s_and_b32 s59, 0, exec_lo
-; GFX11-NEXT: s_addc_u32 s32, s32, 16
+; GFX11-NEXT: s_addc_u32 s60, s32, 16
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX11-NEXT: s_bitcmp1_b32 s32, 0
-; GFX11-NEXT: s_bitset0_b32 s32, 0
-; GFX11-NEXT: s_mov_b32 s59, s32
-; GFX11-NEXT: s_addc_u32 s32, s32, -16
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX11-NEXT: s_bitcmp1_b32 s32, 0
-; GFX11-NEXT: s_bitset0_b32 s32, 0
+; GFX11-NEXT: s_bitcmp1_b32 s60, 0
+; GFX11-NEXT: s_bitset0_b32 s60, 0
+; GFX11-NEXT: s_mov_b32 s59, s60
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc
; GFX11-NEXT: ;;#ASMEND
-; GFX11-NEXT: v_readlane_b32 s59, v21, 28
-; GFX11-NEXT: v_readlane_b32 s58, v21, 27
-; GFX11-NEXT: v_readlane_b32 s57, v21, 26
-; GFX11-NEXT: v_readlane_b32 s56, v21, 25
-; GFX11-NEXT: v_readlane_b32 s55, v21, 24
-; GFX11-NEXT: v_readlane_b32 s54, v21, 23
-; GFX11-NEXT: v_readlane_b32 s53, v21, 22
-; GFX11-NEXT: v_readlane_b32 s52, v21, 21
-; GFX11-NEXT: v_readlane_b32 s51, v21, 20
-; GFX11-NEXT: v_readlane_b32 s50, v21, 19
-; GFX11-NEXT: v_readlane_b32 s49, v21, 18
-; GFX11-NEXT: v_readlane_b32 s48, v21, 17
-; GFX11-NEXT: v_readlane_b32 s47, v21, 16
-; GFX11-NEXT: v_readlane_b32 s46, v21, 15
-; GFX11-NEXT: v_readlane_b32 s45, v21, 14
-; GFX11-NEXT: v_readlane_b32 s44, v21, 13
-; GFX11-NEXT: v_readlane_b32 s43, v21, 12
-; GFX11-NEXT: v_readlane_b32 s42, v21, 11
-; GFX11-NEXT: v_readlane_b32 s41, v21, 10
-; GFX11-NEXT: v_readlane_b32 s40, v21, 9
-; GFX11-NEXT: v_readlane_b32 s39, v21, 8
-; GFX11-NEXT: v_readlane_b32 s38, v21, 7
+; GFX11-NEXT: v_readlane_b32 s53, v21, 14
+; GFX11-NEXT: v_readlane_b32 s52, v21, 13
+; GFX11-NEXT: v_readlane_b32 s51, v21, 12
+; GFX11-NEXT: v_readlane_b32 s50, v21, 11
+; GFX11-NEXT: v_readlane_b32 s49, v21, 10
+; GFX11-NEXT: v_readlane_b32 s48, v21, 9
+; GFX11-NEXT: v_readlane_b32 s47, v21, 8
+; GFX11-NEXT: v_readlane_b32 s46, v21, 7
; GFX11-NEXT: v_readlane_b32 s37, v21, 6
; GFX11-NEXT: v_readlane_b32 s36, v21, 5
; GFX11-NEXT: v_readlane_b32 s35, v21, 4
@@ -1402,65 +962,37 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: s_mov_b32 exec_lo, s0
; GFX12-NEXT: v_writelane_b32 v21, s30, 0
+; GFX12-NEXT: s_and_b32 s59, 0, exec_lo
; GFX12-NEXT: v_writelane_b32 v21, s31, 1
; GFX12-NEXT: v_writelane_b32 v21, s33, 2
; GFX12-NEXT: v_writelane_b32 v21, s34, 3
; GFX12-NEXT: v_writelane_b32 v21, s35, 4
; GFX12-NEXT: v_writelane_b32 v21, s36, 5
; GFX12-NEXT: v_writelane_b32 v21, s37, 6
-; GFX12-NEXT: v_writelane_b32 v21, s38, 7
-; GFX12-NEXT: v_writelane_b32 v21, s39, 8
-; GFX12-NEXT: v_writelane_b32 v21, s40, 9
-; GFX12-NEXT: v_writelane_b32 v21, s41, 10
-; GFX12-NEXT: v_writelane_b32 v21, s42, 11
-; GFX12-NEXT: v_writelane_b32 v21, s43, 12
-; GFX12-NEXT: v_writelane_b32 v21, s44, 13
-; GFX12-NEXT: v_writelane_b32 v21, s45, 14
-; GFX12-NEXT: v_writelane_b32 v21, s46, 15
-; GFX12-NEXT: v_writelane_b32 v21, s47, 16
-; GFX12-NEXT: v_writelane_b32 v21, s48, 17
-; GFX12-NEXT: v_writelane_b32 v21, s49, 18
-; GFX12-NEXT: v_writelane_b32 v21, s50, 19
-; GFX12-NEXT: v_writelane_b32 v21, s51, 20
-; GFX12-NEXT: v_writelane_b32 v21, s52, 21
-; GFX12-NEXT: v_writelane_b32 v21, s53, 22
-; GFX12-NEXT: v_writelane_b32 v21, s54, 23
-; GFX12-NEXT: v_writelane_b32 v21, s55, 24
-; GFX12-NEXT: v_writelane_b32 v21, s56, 25
-; GFX12-NEXT: v_writelane_b32 v21, s57, 26
-; GFX12-NEXT: v_writelane_b32 v21, s58, 27
+; GFX12-NEXT: v_writelane_b32 v21, s46, 7
+; GFX12-NEXT: v_writelane_b32 v21, s47, 8
+; GFX12-NEXT: v_writelane_b32 v21, s48, 9
+; GFX12-NEXT: v_writelane_b32 v21, s49, 10
+; GFX12-NEXT: v_writelane_b32 v21, s50, 11
+; GFX12-NEXT: v_writelane_b32 v21, s51, 12
+; GFX12-NEXT: v_writelane_b32 v21, s52, 13
+; GFX12-NEXT: v_writelane_b32 v21, s53, 14
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
; GFX12-NEXT: ;;#ASMEND
-; GFX12-NEXT: v_writelane_b32 v21, s59, 28
-; GFX12-NEXT: s_and_b32 s59, 0, exec_lo
; GFX12-NEXT: s_mov_b32 s59, s32
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT: v_readlane_b32 s59, v21, 28
-; GFX12-NEXT: v_readlane_b32 s58, v21, 27
-; GFX12-NEXT: v_readlane_b32 s57, v21, 26
-; GFX12-NEXT: v_readlane_b32 s56, v21, 25
-; GFX12-NEXT: v_readlane_b32 s55, v21, 24
-; GFX12-NEXT: v_readlane_b32 s54, v21, 23
-; GFX12-NEXT: v_readlane_b32 s53, v21, 22
-; GFX12-NEXT: v_readlane_b32 s52, v21, 21
-; GFX12-NEXT: v_readlane_b32 s51, v21, 20
-; GFX12-NEXT: v_readlane_b32 s50, v21, 19
-; GFX12-NEXT: v_readlane_b32 s49, v21, 18
-; GFX12-NEXT: v_readlane_b32 s48, v21, 17
-; GFX12-NEXT: v_readlane_b32 s47, v21, 16
-; GFX12-NEXT: v_readlane_b32 s46, v21, 15
-; GFX12-NEXT: v_readlane_b32 s45, v21, 14
-; GFX12-NEXT: v_readlane_b32 s44, v21, 13
-; GFX12-NEXT: v_readlane_b32 s43, v21, 12
-; GFX12-NEXT: v_readlane_b32 s42, v21, 11
-; GFX12-NEXT: v_readlane_b32 s41, v21, 10
-; GFX12-NEXT: v_readlane_b32 s40, v21, 9
-; GFX12-NEXT: v_readlane_b32 s39, v21, 8
-; GFX12-NEXT: v_readlane_b32 s38, v21, 7
+; GFX12-NEXT: v_readlane_b32 s53, v21, 14
+; GFX12-NEXT: v_readlane_b32 s52, v21, 13
+; GFX12-NEXT: v_readlane_b32 s51, v21, 12
+; GFX12-NEXT: v_readlane_b32 s50, v21, 11
+; GFX12-NEXT: v_readlane_b32 s49, v21, 10
+; GFX12-NEXT: v_readlane_b32 s48, v21, 9
+; GFX12-NEXT: v_readlane_b32 s47, v21, 8
+; GFX12-NEXT: v_readlane_b32 s46, v21, 7
; GFX12-NEXT: v_readlane_b32 s37, v21, 6
; GFX12-NEXT: v_readlane_b32 s36, v21, 5
; GFX12-NEXT: v_readlane_b32 s35, v21, 4
@@ -1523,8 +1055,8 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX7-NEXT: s_add_i32 s6, s32, 0x201100
; GFX7-NEXT: buffer_store_dword v22, off, s[0:3], s6 ; 4-byte Folded Spill
; GFX7-NEXT: s_mov_b64 exec, s[4:5]
-; GFX7-NEXT: v_writelane_b32 v23, s28, 28
-; GFX7-NEXT: v_writelane_b32 v23, s29, 29
+; GFX7-NEXT: v_writelane_b32 v23, s28, 15
+; GFX7-NEXT: v_writelane_b32 v23, s29, 16
; GFX7-NEXT: v_writelane_b32 v23, s30, 0
; GFX7-NEXT: v_writelane_b32 v23, s31, 1
; GFX7-NEXT: v_writelane_b32 v23, s33, 2
@@ -1532,34 +1064,21 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX7-NEXT: v_writelane_b32 v23, s35, 4
; GFX7-NEXT: v_writelane_b32 v23, s36, 5
; GFX7-NEXT: v_writelane_b32 v23, s37, 6
-; GFX7-NEXT: v_writelane_b32 v23, s38, 7
-; GFX7-NEXT: v_writelane_b32 v23, s39, 8
-; GFX7-NEXT: v_writelane_b32 v23, s40, 9
-; GFX7-NEXT: v_writelane_b32 v23, s41, 10
-; GFX7-NEXT: v_writelane_b32 v23, s42, 11
-; GFX7-NEXT: v_writelane_b32 v23, s43, 12
-; GFX7-NEXT: v_writelane_b32 v23, s44, 13
-; GFX7-NEXT: v_writelane_b32 v23, s45, 14
-; GFX7-NEXT: v_writelane_b32 v23, s46, 15
-; GFX7-NEXT: v_writelane_b32 v23, s47, 16
-; GFX7-NEXT: v_writelane_b32 v23, s48, 17
-; GFX7-NEXT: v_writelane_b32 v23, s49, 18
-; GFX7-NEXT: v_writelane_b32 v23, s50, 19
-; GFX7-NEXT: v_writelane_b32 v23, s51, 20
-; GFX7-NEXT: v_writelane_b32 v23, s52, 21
-; GFX7-NEXT: v_writelane_b32 v23, s53, 22
-; GFX7-NEXT: v_writelane_b32 v23, s54, 23
-; GFX7-NEXT: v_writelane_b32 v23, s55, 24
+; GFX7-NEXT: v_writelane_b32 v23, s46, 7
+; GFX7-NEXT: v_writelane_b32 v23, s47, 8
+; GFX7-NEXT: v_writelane_b32 v23, s48, 9
+; GFX7-NEXT: v_writelane_b32 v23, s49, 10
+; GFX7-NEXT: v_writelane_b32 v23, s50, 11
; GFX7-NEXT: s_lshr_b32 s5, s32, 6
-; GFX7-NEXT: v_writelane_b32 v23, s56, 25
+; GFX7-NEXT: v_writelane_b32 v23, s51, 12
; GFX7-NEXT: v_lshr_b32_e64 v0, s32, 6
; GFX7-NEXT: s_add_i32 s4, s5, 0x4240
; GFX7-NEXT: ; implicit-def: $vgpr22 : SGPR spill to VGPR lane
-; GFX7-NEXT: v_writelane_b32 v23, s57, 26
+; GFX7-NEXT: v_writelane_b32 v23, s52, 13
; GFX7-NEXT: v_add_i32_e32 v0, vcc, 64, v0
; GFX7-NEXT: v_writelane_b32 v22, s4, 0
; GFX7-NEXT: s_and_b64 s[4:5], 0, exec
-; GFX7-NEXT: v_writelane_b32 v23, s59, 27
+; GFX7-NEXT: v_writelane_b32 v23, s53, 14
; GFX7-NEXT: ;;#ASMSTART
; GFX7-NEXT: ; use alloca0 v0
; GFX7-NEXT: ;;#ASMEND
@@ -1570,27 +1089,14 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX7-NEXT: ;;#ASMSTART
; GFX7-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc
; GFX7-NEXT: ;;#ASMEND
-; GFX7-NEXT: v_readlane_b32 s59, v23, 27
-; GFX7-NEXT: v_readlane_b32 s57, v23, 26
-; GFX7-NEXT: v_readlane_b32 s56, v23, 25
-; GFX7-NEXT: v_readlane_b32 s55, v23, 24
-; GFX7-NEXT: v_readlane_b32 s54, v23, 23
-; GFX7-NEXT: v_readlane_b32 s53, v23, 22
-; GFX7-NEXT: v_readlane_b32 s52, v23, 21
-; GFX7-NEXT: v_readlane_b32 s51, v23, 20
-; GFX7-NEXT: v_readlane_b32 s50, v23, 19
-; GFX7-NEXT: v_readlane_b32 s49, v23, 18
-; GFX7-NEXT: v_readlane_b32 s48, v23, 17
-; GFX7-NEXT: v_readlane_b32 s47, v23, 16
-; GFX7-NEXT: v_readlane_b32 s46, v23, 15
-; GFX7-NEXT: v_readlane_b32 s45, v23, 14
-; GFX7-NEXT: v_readlane_b32 s44, v23, 13
-; GFX7-NEXT: v_readlane_b32 s43, v23, 12
-; GFX7-NEXT: v_readlane_b32 s42, v23, 11
-; GFX7-NEXT: v_readlane_b32 s41, v23, 10
-; GFX7-NEXT: v_readlane_b32 s40, v23, 9
-; GFX7-NEXT: v_readlane_b32 s39, v23, 8
-; GFX7-NEXT: v_readlane_b32 s38, v23, 7
+; GFX7-NEXT: v_readlane_b32 s53, v23, 14
+; GFX7-NEXT: v_readlane_b32 s52, v23, 13
+; GFX7-NEXT: v_readlane_b32 s51, v23, 12
+; GFX7-NEXT: v_readlane_b32 s50, v23, 11
+; GFX7-NEXT: v_readlane_b32 s49, v23, 10
+; GFX7-NEXT: v_readlane_b32 s48, v23, 9
+; GFX7-NEXT: v_readlane_b32 s47, v23, 8
+; GFX7-NEXT: v_readlane_b32 s46, v23, 7
; GFX7-NEXT: v_readlane_b32 s37, v23, 6
; GFX7-NEXT: v_readlane_b32 s36, v23, 5
; GFX7-NEXT: v_readlane_b32 s35, v23, 4
@@ -1598,8 +1104,8 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX7-NEXT: v_readlane_b32 s33, v23, 2
; GFX7-NEXT: v_readlane_b32 s31, v23, 1
; GFX7-NEXT: v_readlane_b32 s30, v23, 0
-; GFX7-NEXT: v_readlane_b32 s28, v23, 28
-; GFX7-NEXT: v_readlane_b32 s29, v23, 29
+; GFX7-NEXT: v_readlane_b32 s28, v23, 15
+; GFX7-NEXT: v_readlane_b32 s29, v23, 16
; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1
; GFX7-NEXT: s_add_i32 s6, s32, 0x201000
; GFX7-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload
@@ -1623,32 +1129,19 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX8-NEXT: v_writelane_b32 v22, s35, 4
; GFX8-NEXT: v_writelane_b32 v22, s36, 5
; GFX8-NEXT: v_writelane_b32 v22, s37, 6
-; GFX8-NEXT: v_writelane_b32 v22, s38, 7
-; GFX8-NEXT: v_writelane_b32 v22, s39, 8
-; GFX8-NEXT: v_writelane_b32 v22, s40, 9
-; GFX8-NEXT: v_writelane_b32 v22, s41, 10
-; GFX8-NEXT: v_writelane_b32 v22, s42, 11
-; GFX8-NEXT: v_writelane_b32 v22, s43, 12
-; GFX8-NEXT: v_writelane_b32 v22, s44, 13
-; GFX8-NEXT: v_writelane_b32 v22, s45, 14
-; GFX8-NEXT: v_writelane_b32 v22, s46, 15
-; GFX8-NEXT: v_writelane_b32 v22, s47, 16
-; GFX8-NEXT: v_writelane_b32 v22, s48, 17
-; GFX8-NEXT: v_writelane_b32 v22, s49, 18
-; GFX8-NEXT: v_writelane_b32 v22, s50, 19
-; GFX8-NEXT: v_writelane_b32 v22, s51, 20
-; GFX8-NEXT: v_writelane_b32 v22, s52, 21
-; GFX8-NEXT: v_writelane_b32 v22, s53, 22
-; GFX8-NEXT: v_writelane_b32 v22, s54, 23
-; GFX8-NEXT: v_writelane_b32 v22, s55, 24
-; GFX8-NEXT: v_writelane_b32 v22, s56, 25
-; GFX8-NEXT: v_writelane_b32 v22, s57, 26
+; GFX8-NEXT: v_writelane_b32 v22, s46, 7
+; GFX8-NEXT: v_writelane_b32 v22, s47, 8
+; GFX8-NEXT: v_writelane_b32 v22, s48, 9
+; GFX8-NEXT: v_writelane_b32 v22, s49, 10
+; GFX8-NEXT: v_writelane_b32 v22, s50, 11
; GFX8-NEXT: s_lshr_b32 s4, s32, 6
-; GFX8-NEXT: v_writelane_b32 v22, s59, 27
+; GFX8-NEXT: v_writelane_b32 v22, s51, 12
; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32
; GFX8-NEXT: s_add_i32 s59, s4, 0x4240
+; GFX8-NEXT: v_writelane_b32 v22, s52, 13
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0
; GFX8-NEXT: s_and_b64 s[4:5], 0, exec
+; GFX8-NEXT: v_writelane_b32 v22, s53, 14
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use alloca0 v0
; GFX8-NEXT: ;;#ASMEND
@@ -1658,27 +1151,14 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc
; GFX8-NEXT: ;;#ASMEND
-; GFX8-NEXT: v_readlane_b32 s59, v22, 27
-; GFX8-NEXT: v_readlane_b32 s57, v22, 26
-; GFX8-NEXT: v_readlane_b32 s56, v22, 25
-; GFX8-NEXT: v_readlane_b32 s55, v22, 24
-; GFX8-NEXT: v_readlane_b32 s54, v22, 23
-; GFX8-NEXT: v_readlane_b32 s53, v22, 22
-; GFX8-NEXT: v_readlane_b32 s52, v22, 21
-; GFX8-NEXT: v_readlane_b32 s51, v22, 20
-; GFX8-NEXT: v_readlane_b32 s50, v22, 19
-; GFX8-NEXT: v_readlane_b32 s49, v22, 18
-; GFX8-NEXT: v_readlane_b32 s48, v22, 17
-; GFX8-NEXT: v_readlane_b32 s47, v22, 16
-; GFX8-NEXT: v_readlane_b32 s46, v22, 15
-; GFX8-NEXT: v_readlane_b32 s45, v22, 14
-; GFX8-NEXT: v_readlane_b32 s44, v22, 13
-; GFX8-NEXT: v_readlane_b32 s43, v22, 12
-; GFX8-NEXT: v_readlane_b32 s42, v22, 11
-; GFX8-NEXT: v_readlane_b32 s41, v22, 10
-; GFX8-NEXT: v_readlane_b32 s40, v22, 9
-; GFX8-NEXT: v_readlane_b32 s39, v22, 8
-; GFX8-NEXT: v_readlane_b32 s38, v22, 7
+; GFX8-NEXT: v_readlane_b32 s53, v22, 14
+; GFX8-NEXT: v_readlane_b32 s52, v22, 13
+; GFX8-NEXT: v_readlane_b32 s51, v22, 12
+; GFX8-NEXT: v_readlane_b32 s50, v22, 11
+; GFX8-NEXT: v_readlane_b32 s49, v22, 10
+; GFX8-NEXT: v_readlane_b32 s48, v22, 9
+; GFX8-NEXT: v_readlane_b32 s47, v22, 8
+; GFX8-NEXT: v_readlane_b32 s46, v22, 7
; GFX8-NEXT: v_readlane_b32 s37, v22, 6
; GFX8-NEXT: v_readlane_b32 s36, v22, 5
; GFX8-NEXT: v_readlane_b32 s35, v22, 4
@@ -1707,32 +1187,19 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX900-NEXT: v_writelane_b32 v22, s35, 4
; GFX900-NEXT: v_writelane_b32 v22, s36, 5
; GFX900-NEXT: v_writelane_b32 v22, s37, 6
-; GFX900-NEXT: v_writelane_b32 v22, s38, 7
-; GFX900-NEXT: v_writelane_b32 v22, s39, 8
-; GFX900-NEXT: v_writelane_b32 v22, s40, 9
-; GFX900-NEXT: v_writelane_b32 v22, s41, 10
-; GFX900-NEXT: v_writelane_b32 v22, s42, 11
-; GFX900-NEXT: v_writelane_b32 v22, s43, 12
-; GFX900-NEXT: v_writelane_b32 v22, s44, 13
-; GFX900-NEXT: v_writelane_b32 v22, s45, 14
-; GFX900-NEXT: v_writelane_b32 v22, s46, 15
-; GFX900-NEXT: v_writelane_b32 v22, s47, 16
-; GFX900-NEXT: v_writelane_b32 v22, s48, 17
-; GFX900-NEXT: v_writelane_b32 v22, s49, 18
-; GFX900-NEXT: v_writelane_b32 v22, s50, 19
-; GFX900-NEXT: v_writelane_b32 v22, s51, 20
-; GFX900-NEXT: v_writelane_b32 v22, s52, 21
-; GFX900-NEXT: v_writelane_b32 v22, s53, 22
-; GFX900-NEXT: v_writelane_b32 v22, s54, 23
-; GFX900-NEXT: v_writelane_b32 v22, s55, 24
-; GFX900-NEXT: v_writelane_b32 v22, s56, 25
-; GFX900-NEXT: v_writelane_b32 v22, s57, 26
+; GFX900-NEXT: v_writelane_b32 v22, s46, 7
+; GFX900-NEXT: v_writelane_b32 v22, s47, 8
+; GFX900-NEXT: v_writelane_b32 v22, s48, 9
+; GFX900-NEXT: v_writelane_b32 v22, s49, 10
+; GFX900-NEXT: v_writelane_b32 v22, s50, 11
; GFX900-NEXT: s_lshr_b32 s4, s32, 6
-; GFX900-NEXT: v_writelane_b32 v22, s59, 27
+; GFX900-NEXT: v_writelane_b32 v22, s51, 12
; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32
; GFX900-NEXT: s_add_i32 s59, s4, 0x4240
+; GFX900-NEXT: v_writelane_b32 v22, s52, 13
; GFX900-NEXT: v_add_u32_e32 v0, 64, v0
; GFX900-NEXT: s_and_b64 s[4:5], 0, exec
+; GFX900-NEXT: v_writelane_b32 v22, s53, 14
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use alloca0 v0
; GFX900-NEXT: ;;#ASMEND
@@ -1742,27 +1209,14 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_readlane_b32 s59, v22, 27
-; GFX900-NEXT: v_readlane_b32 s57, v22, 26
-; GFX900-NEXT: v_readlane_b32 s56, v22, 25
-; GFX900-NEXT: v_readlane_b32 s55, v22, 24
-; GFX900-NEXT: v_readlane_b32 s54, v22, 23
-; GFX900-NEXT: v_readlane_b32 s53, v22, 22
-; GFX900-NEXT: v_readlane_b32 s52, v22, 21
-; GFX900-NEXT: v_readlane_b32 s51, v22, 20
-; GFX900-NEXT: v_readlane_b32 s50, v22, 19
-; GFX900-NEXT: v_readlane_b32 s49, v22, 18
-; GFX900-NEXT: v_readlane_b32 s48, v22, 17
-; GFX900-NEXT: v_readlane_b32 s47, v22, 16
-; GFX900-NEXT: v_readlane_b32 s46, v22, 15
-; GFX900-NEXT: v_readlane_b32 s45, v22, 14
-; GFX900-NEXT: v_readlane_b32 s44, v22, 13
-; GFX900-NEXT: v_readlane_b32 s43, v22, 12
-; GFX900-NEXT: v_readlane_b32 s42, v22, 11
-; GFX900-NEXT: v_readlane_b32 s41, v22, 10
-; GFX900-NEXT: v_readlane_b32 s40, v22, 9
-; GFX900-NEXT: v_readlane_b32 s39, v22, 8
-; GFX900-NEXT: v_readlane_b32 s38, v22, 7
+; GFX900-NEXT: v_readlane_b32 s53, v22, 14
+; GFX900-NEXT: v_readlane_b32 s52, v22, 13
+; GFX900-NEXT: v_readlane_b32 s51, v22, 12
+; GFX900-NEXT: v_readlane_b32 s50, v22, 11
+; GFX900-NEXT: v_readlane_b32 s49, v22, 10
+; GFX900-NEXT: v_readlane_b32 s48, v22, 9
+; GFX900-NEXT: v_readlane_b32 s47, v22, 8
+; GFX900-NEXT: v_readlane_b32 s46, v22, 7
; GFX900-NEXT: v_readlane_b32 s37, v22, 6
; GFX900-NEXT: v_readlane_b32 s36, v22, 5
; GFX900-NEXT: v_readlane_b32 s35, v22, 4
@@ -1791,30 +1245,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX942-NEXT: v_writelane_b32 v22, s35, 4
; GFX942-NEXT: v_writelane_b32 v22, s36, 5
; GFX942-NEXT: v_writelane_b32 v22, s37, 6
-; GFX942-NEXT: v_writelane_b32 v22, s38, 7
-; GFX942-NEXT: v_writelane_b32 v22, s39, 8
-; GFX942-NEXT: v_writelane_b32 v22, s40, 9
-; GFX942-NEXT: v_writelane_b32 v22, s41, 10
-; GFX942-NEXT: v_writelane_b32 v22, s42, 11
-; GFX942-NEXT: v_writelane_b32 v22, s43, 12
-; GFX942-NEXT: v_writelane_b32 v22, s44, 13
-; GFX942-NEXT: v_writelane_b32 v22, s45, 14
-; GFX942-NEXT: v_writelane_b32 v22, s46, 15
-; GFX942-NEXT: v_writelane_b32 v22, s47, 16
-; GFX942-NEXT: v_writelane_b32 v22, s48, 17
-; GFX942-NEXT: v_writelane_b32 v22, s49, 18
-; GFX942-NEXT: v_writelane_b32 v22, s50, 19
-; GFX942-NEXT: v_writelane_b32 v22, s51, 20
-; GFX942-NEXT: v_writelane_b32 v22, s52, 21
-; GFX942-NEXT: v_writelane_b32 v22, s53, 22
-; GFX942-NEXT: v_writelane_b32 v22, s54, 23
-; GFX942-NEXT: v_writelane_b32 v22, s55, 24
-; GFX942-NEXT: v_writelane_b32 v22, s56, 25
-; GFX942-NEXT: v_writelane_b32 v22, s57, 26
+; GFX942-NEXT: v_writelane_b32 v22, s46, 7
+; GFX942-NEXT: v_writelane_b32 v22, s47, 8
+; GFX942-NEXT: v_writelane_b32 v22, s48, 9
+; GFX942-NEXT: v_writelane_b32 v22, s49, 10
+; GFX942-NEXT: v_writelane_b32 v22, s50, 11
+; GFX942-NEXT: v_writelane_b32 v22, s51, 12
; GFX942-NEXT: s_add_i32 s0, s32, 64
-; GFX942-NEXT: v_writelane_b32 v22, s59, 27
+; GFX942-NEXT: v_writelane_b32 v22, s52, 13
; GFX942-NEXT: v_mov_b32_e32 v0, s0
-; GFX942-NEXT: v_writelane_b32 v22, s60, 28
+; GFX942-NEXT: v_writelane_b32 v22, s53, 14
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use alloca0 v0
; GFX942-NEXT: ;;#ASMEND
@@ -1822,34 +1262,18 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX942-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_add_i32 s59, s32, 0x4240
-; GFX942-NEXT: v_writelane_b32 v22, s61, 29
; GFX942-NEXT: s_and_b64 s[60:61], 0, exec
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_readlane_b32 s61, v22, 29
-; GFX942-NEXT: v_readlane_b32 s60, v22, 28
-; GFX942-NEXT: v_readlane_b32 s59, v22, 27
-; GFX942-NEXT: v_readlane_b32 s57, v22, 26
-; GFX942-NEXT: v_readlane_b32 s56, v22, 25
-; GFX942-NEXT: v_readlane_b32 s55, v22, 24
-; GFX942-NEXT: v_readlane_b32 s54, v22, 23
-; GFX942-NEXT: v_readlane_b32 s53, v22, 22
-; GFX942-NEXT: v_readlane_b32 s52, v22, 21
-; GFX942-NEXT: v_readlane_b32 s51, v22, 20
-; GFX942-NEXT: v_readlane_b32 s50, v22, 19
-; GFX942-NEXT: v_readlane_b32 s49, v22, 18
-; GFX942-NEXT: v_readlane_b32 s48, v22, 17
-; GFX942-NEXT: v_readlane_b32 s47, v22, 16
-; GFX942-NEXT: v_readlane_b32 s46, v22, 15
-; GFX942-NEXT: v_readlane_b32 s45, v22, 14
-; GFX942-NEXT: v_readlane_b32 s44, v22, 13
-; GFX942-NEXT: v_readlane_b32 s43, v22, 12
-; GFX942-NEXT: v_readlane_b32 s42, v22, 11
-; GFX942-NEXT: v_readlane_b32 s41, v22, 10
-; GFX942-NEXT: v_readlane_b32 s40, v22, 9
-; GFX942-NEXT: v_readlane_b32 s39, v22, 8
-; GFX942-NEXT: v_readlane_b32 s38, v22, 7
+; GFX942-NEXT: v_readlane_b32 s53, v22, 14
+; GFX942-NEXT: v_readlane_b32 s52, v22, 13
+; GFX942-NEXT: v_readlane_b32 s51, v22, 12
+; GFX942-NEXT: v_readlane_b32 s50, v22, 11
+; GFX942-NEXT: v_readlane_b32 s49, v22, 10
+; GFX942-NEXT: v_readlane_b32 s48, v22, 9
+; GFX942-NEXT: v_readlane_b32 s47, v22, 8
+; GFX942-NEXT: v_readlane_b32 s46, v22, 7
; GFX942-NEXT: v_readlane_b32 s37, v22, 6
; GFX942-NEXT: v_readlane_b32 s36, v22, 5
; GFX942-NEXT: v_readlane_b32 s35, v22, 4
@@ -1875,8 +1299,10 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX10_1-NEXT: v_writelane_b32 v22, s30, 0
; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32
; GFX10_1-NEXT: s_lshr_b32 s4, s32, 5
+; GFX10_1-NEXT: s_add_i32 s59, s4, 0x4240
; GFX10_1-NEXT: v_writelane_b32 v22, s31, 1
; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0
+; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo
; GFX10_1-NEXT: ;;#ASMSTART
; GFX10_1-NEXT: ; use alloca0 v0
; GFX10_1-NEXT: ;;#ASMEND
@@ -1885,56 +1311,28 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX10_1-NEXT: v_writelane_b32 v22, s35, 4
; GFX10_1-NEXT: v_writelane_b32 v22, s36, 5
; GFX10_1-NEXT: v_writelane_b32 v22, s37, 6
-; GFX10_1-NEXT: v_writelane_b32 v22, s38, 7
-; GFX10_1-NEXT: v_writelane_b32 v22, s39, 8
-; GFX10_1-NEXT: v_writelane_b32 v22, s40, 9
-; GFX10_1-NEXT: v_writelane_b32 v22, s41, 10
-; GFX10_1-NEXT: v_writelane_b32 v22, s42, 11
-; GFX10_1-NEXT: v_writelane_b32 v22, s43, 12
-; GFX10_1-NEXT: v_writelane_b32 v22, s44, 13
-; GFX10_1-NEXT: v_writelane_b32 v22, s45, 14
-; GFX10_1-NEXT: v_writelane_b32 v22, s46, 15
-; GFX10_1-NEXT: v_writelane_b32 v22, s47, 16
-; GFX10_1-NEXT: v_writelane_b32 v22, s48, 17
-; GFX10_1-NEXT: v_writelane_b32 v22, s49, 18
-; GFX10_1-NEXT: v_writelane_b32 v22, s50, 19
-; GFX10_1-NEXT: v_writelane_b32 v22, s51, 20
-; GFX10_1-NEXT: v_writelane_b32 v22, s52, 21
-; GFX10_1-NEXT: v_writelane_b32 v22, s53, 22
-; GFX10_1-NEXT: v_writelane_b32 v22, s54, 23
-; GFX10_1-NEXT: v_writelane_b32 v22, s55, 24
-; GFX10_1-NEXT: v_writelane_b32 v22, s56, 25
-; GFX10_1-NEXT: v_writelane_b32 v22, s57, 26
-; GFX10_1-NEXT: v_writelane_b32 v22, s59, 27
-; GFX10_1-NEXT: s_add_i32 s59, s4, 0x4240
-; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo
+; GFX10_1-NEXT: v_writelane_b32 v22, s46, 7
+; GFX10_1-NEXT: v_writelane_b32 v22, s47, 8
+; GFX10_1-NEXT: v_writelane_b32 v22, s48, 9
+; GFX10_1-NEXT: v_writelane_b32 v22, s49, 10
+; GFX10_1-NEXT: v_writelane_b32 v22, s50, 11
+; GFX10_1-NEXT: v_writelane_b32 v22, s51, 12
+; GFX10_1-NEXT: v_writelane_b32 v22, s52, 13
+; GFX10_1-NEXT: v_writelane_b32 v22, s53, 14
; GFX10_1-NEXT: ;;#ASMSTART
; GFX10_1-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
; GFX10_1-NEXT: ;;#ASMEND
; GFX10_1-NEXT: ;;#ASMSTART
; GFX10_1-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc
; GFX10_1-NEXT: ;;#ASMEND
-; GFX10_1-NEXT: v_readlane_b32 s59, v22, 27
-; GFX10_1-NEXT: v_readlane_b32 s57, v22, 26
-; GFX10_1-NEXT: v_readlane_b32 s56, v22, 25
-; GFX10_1-NEXT: v_readlane_b32 s55, v22, 24
-; GFX10_1-NEXT: v_readlane_b32 s54, v22, 23
-; GFX10_1-NEXT: v_readlane_b32 s53, v22, 22
-; GFX10_1-NEXT: v_readlane_b32 s52, v22, 21
-; GFX10_1-NEXT: v_readlane_b32 s51, v22, 20
-; GFX10_1-NEXT: v_readlane_b32 s50, v22, 19
-; GFX10_1-NEXT: v_readlane_b32 s49, v22, 18
-; GFX10_1-NEXT: v_readlane_b32 s48, v22, 17
-; GFX10_1-NEXT: v_readlane_b32 s47, v22, 16
-; GFX10_1-NEXT: v_readlane_b32 s46, v22, 15
-; GFX10_1-NEXT: v_readlane_b32 s45, v22, 14
-; GFX10_1-NEXT: v_readlane_b32 s44, v22, 13
-; GFX10_1-NEXT: v_readlane_b32 s43, v22, 12
-; GFX10_1-NEXT: v_readlane_b32 s42, v22, 11
-; GFX10_1-NEXT: v_readlane_b32 s41, v22, 10
-; GFX10_1-NEXT: v_readlane_b32 s40, v22, 9
-; GFX10_1-NEXT: v_readlane_b32 s39, v22, 8
-; GFX10_1-NEXT: v_readlane_b32 s38, v22, 7
+; GFX10_1-NEXT: v_readlane_b32 s53, v22, 14
+; GFX10_1-NEXT: v_readlane_b32 s52, v22, 13
+; GFX10_1-NEXT: v_readlane_b32 s51, v22, 12
+; GFX10_1-NEXT: v_readlane_b32 s50, v22, 11
+; GFX10_1-NEXT: v_readlane_b32 s49, v22, 10
+; GFX10_1-NEXT: v_readlane_b32 s48, v22, 9
+; GFX10_1-NEXT: v_readlane_b32 s47, v22, 8
+; GFX10_1-NEXT: v_readlane_b32 s46, v22, 7
; GFX10_1-NEXT: v_readlane_b32 s37, v22, 6
; GFX10_1-NEXT: v_readlane_b32 s36, v22, 5
; GFX10_1-NEXT: v_readlane_b32 s35, v22, 4
@@ -1960,8 +1358,10 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX10_3-NEXT: v_writelane_b32 v22, s30, 0
; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32
; GFX10_3-NEXT: s_lshr_b32 s4, s32, 5
+; GFX10_3-NEXT: s_add_i32 s59, s4, 0x4240
; GFX10_3-NEXT: v_writelane_b32 v22, s31, 1
; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0
+; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo
; GFX10_3-NEXT: ;;#ASMSTART
; GFX10_3-NEXT: ; use alloca0 v0
; GFX10_3-NEXT: ;;#ASMEND
@@ -1970,56 +1370,28 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX10_3-NEXT: v_writelane_b32 v22, s35, 4
; GFX10_3-NEXT: v_writelane_b32 v22, s36, 5
; GFX10_3-NEXT: v_writelane_b32 v22, s37, 6
-; GFX10_3-NEXT: v_writelane_b32 v22, s38, 7
-; GFX10_3-NEXT: v_writelane_b32 v22, s39, 8
-; GFX10_3-NEXT: v_writelane_b32 v22, s40, 9
-; GFX10_3-NEXT: v_writelane_b32 v22, s41, 10
-; GFX10_3-NEXT: v_writelane_b32 v22, s42, 11
-; GFX10_3-NEXT: v_writelane_b32 v22, s43, 12
-; GFX10_3-NEXT: v_writelane_b32 v22, s44, 13
-; GFX10_3-NEXT: v_writelane_b32 v22, s45, 14
-; GFX10_3-NEXT: v_writelane_b32 v22, s46, 15
-; GFX10_3-NEXT: v_writelane_b32 v22, s47, 16
-; GFX10_3-NEXT: v_writelane_b32 v22, s48, 17
-; GFX10_3-NEXT: v_writelane_b32 v22, s49, 18
-; GFX10_3-NEXT: v_writelane_b32 v22, s50, 19
-; GFX10_3-NEXT: v_writelane_b32 v22, s51, 20
-; GFX10_3-NEXT: v_writelane_b32 v22, s52, 21
-; GFX10_3-NEXT: v_writelane_b32 v22, s53, 22
-; GFX10_3-NEXT: v_writelane_b32 v22, s54, 23
-; GFX10_3-NEXT: v_writelane_b32 v22, s55, 24
-; GFX10_3-NEXT: v_writelane_b32 v22, s56, 25
-; GFX10_3-NEXT: v_writelane_b32 v22, s57, 26
-; GFX10_3-NEXT: v_writelane_b32 v22, s59, 27
-; GFX10_3-NEXT: s_add_i32 s59, s4, 0x4240
-; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo
+; GFX10_3-NEXT: v_writelane_b32 v22, s46, 7
+; GFX10_3-NEXT: v_writelane_b32 v22, s47, 8
+; GFX10_3-NEXT: v_writelane_b32 v22, s48, 9
+; GFX10_3-NEXT: v_writelane_b32 v22, s49, 10
+; GFX10_3-NEXT: v_writelane_b32 v22, s50, 11
+; GFX10_3-NEXT: v_writelane_b32 v22, s51, 12
+; GFX10_3-NEXT: v_writelane_b32 v22, s52, 13
+; GFX10_3-NEXT: v_writelane_b32 v22, s53, 14
; GFX10_3-NEXT: ;;#ASMSTART
; GFX10_3-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
; GFX10_3-NEXT: ;;#ASMEND
; GFX10_3-NEXT: ;;#ASMSTART
; GFX10_3-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc
; GFX10_3-NEXT: ;;#ASMEND
-; GFX10_3-NEXT: v_readlane_b32 s59, v22, 27
-; GFX10_3-NEXT: v_readlane_b32 s57, v22, 26
-; GFX10_3-NEXT: v_readlane_b32 s56, v22, 25
-; GFX10_3-NEXT: v_readlane_b32 s55, v22, 24
-; GFX10_3-NEXT: v_readlane_b32 s54, v22, 23
-; GFX10_3-NEXT: v_readlane_b32 s53, v22, 22
-; GFX10_3-NEXT: v_readlane_b32 s52, v22, 21
-; GFX10_3-NEXT: v_readlane_b32 s51, v22, 20
-; GFX10_3-NEXT: v_readlane_b32 s50, v22, 19
-; GFX10_3-NEXT: v_readlane_b32 s49, v22, 18
-; GFX10_3-NEXT: v_readlane_b32 s48, v22, 17
-; GFX10_3-NEXT: v_readlane_b32 s47, v22, 16
-; GFX10_3-NEXT: v_readlane_b32 s46, v22, 15
-; GFX10_3-NEXT: v_readlane_b32 s45, v22, 14
-; GFX10_3-NEXT: v_readlane_b32 s44, v22, 13
-; GFX10_3-NEXT: v_readlane_b32 s43, v22, 12
-; GFX10_3-NEXT: v_readlane_b32 s42, v22, 11
-; GFX10_3-NEXT: v_readlane_b32 s41, v22, 10
-; GFX10_3-NEXT: v_readlane_b32 s40, v22, 9
-; GFX10_3-NEXT: v_readlane_b32 s39, v22, 8
-; GFX10_3-NEXT: v_readlane_b32 s38, v22, 7
+; GFX10_3-NEXT: v_readlane_b32 s53, v22, 14
+; GFX10_3-NEXT: v_readlane_b32 s52, v22, 13
+; GFX10_3-NEXT: v_readlane_b32 s51, v22, 12
+; GFX10_3-NEXT: v_readlane_b32 s50, v22, 11
+; GFX10_3-NEXT: v_readlane_b32 s49, v22, 10
+; GFX10_3-NEXT: v_readlane_b32 s48, v22, 9
+; GFX10_3-NEXT: v_readlane_b32 s47, v22, 8
+; GFX10_3-NEXT: v_readlane_b32 s46, v22, 7
; GFX10_3-NEXT: v_readlane_b32 s37, v22, 6
; GFX10_3-NEXT: v_readlane_b32 s36, v22, 5
; GFX10_3-NEXT: v_readlane_b32 s35, v22, 4
@@ -2043,67 +1415,41 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX11-NEXT: s_mov_b32 exec_lo, s0
; GFX11-NEXT: v_writelane_b32 v22, s30, 0
; GFX11-NEXT: s_add_i32 s0, s32, 64
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT: s_add_i32 s59, s32, 0x4240
; GFX11-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-NEXT: s_and_b32 s0, 0, exec_lo
+; GFX11-NEXT: v_writelane_b32 v22, s31, 1
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use alloca0 v0
; GFX11-NEXT: ;;#ASMEND
-; GFX11-NEXT: v_writelane_b32 v22, s31, 1
; GFX11-NEXT: v_writelane_b32 v22, s33, 2
; GFX11-NEXT: v_writelane_b32 v22, s34, 3
; GFX11-NEXT: v_writelane_b32 v22, s35, 4
; GFX11-NEXT: v_writelane_b32 v22, s36, 5
; GFX11-NEXT: v_writelane_b32 v22, s37, 6
-; GFX11-NEXT: v_writelane_b32 v22, s38, 7
-; GFX11-NEXT: v_writelane_b32 v22, s39, 8
-; GFX11-NEXT: v_writelane_b32 v22, s40, 9
-; GFX11-NEXT: v_writelane_b32 v22, s41, 10
-; GFX11-NEXT: v_writelane_b32 v22, s42, 11
-; GFX11-NEXT: v_writelane_b32 v22, s43, 12
-; GFX11-NEXT: v_writelane_b32 v22, s44, 13
-; GFX11-NEXT: v_writelane_b32 v22, s45, 14
-; GFX11-NEXT: v_writelane_b32 v22, s46, 15
-; GFX11-NEXT: v_writelane_b32 v22, s47, 16
-; GFX11-NEXT: v_writelane_b32 v22, s48, 17
-; GFX11-NEXT: v_writelane_b32 v22, s49, 18
-; GFX11-NEXT: v_writelane_b32 v22, s50, 19
-; GFX11-NEXT: v_writelane_b32 v22, s51, 20
-; GFX11-NEXT: v_writelane_b32 v22, s52, 21
-; GFX11-NEXT: v_writelane_b32 v22, s53, 22
-; GFX11-NEXT: v_writelane_b32 v22, s54, 23
-; GFX11-NEXT: v_writelane_b32 v22, s55, 24
-; GFX11-NEXT: v_writelane_b32 v22, s56, 25
-; GFX11-NEXT: v_writelane_b32 v22, s57, 26
-; GFX11-NEXT: v_writelane_b32 v22, s59, 27
-; GFX11-NEXT: s_add_i32 s59, s32, 0x4240
-; GFX11-NEXT: s_and_b32 s0, 0, exec_lo
+; GFX11-NEXT: v_writelane_b32 v22, s46, 7
+; GFX11-NEXT: v_writelane_b32 v22, s47, 8
+; GFX11-NEXT: v_writelane_b32 v22, s48, 9
+; GFX11-NEXT: v_writelane_b32 v22, s49, 10
+; GFX11-NEXT: v_writelane_b32 v22, s50, 11
+; GFX11-NEXT: v_writelane_b32 v22, s51, 12
+; GFX11-NEXT: v_writelane_b32 v22, s52, 13
+; GFX11-NEXT: v_writelane_b32 v22, s53, 14
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc
; GFX11-NEXT: ;;#ASMEND
-; GFX11-NEXT: v_readlane_b32 s59, v22, 27
-; GFX11-NEXT: v_readlane_b32 s57, v22, 26
-; GFX11-NEXT: v_readlane_b32 s56, v22, 25
-; GFX11-NEXT: v_readlane_b32 s55, v22, 24
-; GFX11-NEXT: v_readlane_b32 s54, v22, 23
-; GFX11-NEXT: v_readlane_b32 s53, v22, 22
-; GFX11-NEXT: v_readlane_b32 s52, v22, 21
-; GFX11-NEXT: v_readlane_b32 s51, v22, 20
-; GFX11-NEXT: v_readlane_b32 s50, v22, 19
-; GFX11-NEXT: v_readlane_b32 s49, v22, 18
-; GFX11-NEXT: v_readlane_b32 s48, v22, 17
-; GFX11-NEXT: v_readlane_b32 s47, v22, 16
-; GFX11-NEXT: v_readlane_b32 s46, v22, 15
-; GFX11-NEXT: v_readlane_b32 s45, v22, 14
-; GFX11-NEXT: v_readlane_b32 s44, v22, 13
-; GFX11-NEXT: v_readlane_b32 s43, v22, 12
-; GFX11-NEXT: v_readlane_b32 s42, v22, 11
-; GFX11-NEXT: v_readlane_b32 s41, v22, 10
-; GFX11-NEXT: v_readlane_b32 s40, v22, 9
-; GFX11-NEXT: v_readlane_b32 s39, v22, 8
-; GFX11-NEXT: v_readlane_b32 s38, v22, 7
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_readlane_b32 s53, v22, 14
+; GFX11-NEXT: v_readlane_b32 s52, v22, 13
+; GFX11-NEXT: v_readlane_b32 s51, v22, 12
+; GFX11-NEXT: v_readlane_b32 s50, v22, 11
+; GFX11-NEXT: v_readlane_b32 s49, v22, 10
+; GFX11-NEXT: v_readlane_b32 s48, v22, 9
+; GFX11-NEXT: v_readlane_b32 s47, v22, 8
+; GFX11-NEXT: v_readlane_b32 s46, v22, 7
; GFX11-NEXT: v_readlane_b32 s37, v22, 6
; GFX11-NEXT: v_readlane_b32 s36, v22, 5
; GFX11-NEXT: v_readlane_b32 s35, v22, 4
@@ -2130,7 +1476,9 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: s_mov_b32 exec_lo, s0
; GFX12-NEXT: v_writelane_b32 v22, s30, 0
+; GFX12-NEXT: s_add_co_i32 s59, s32, 0x4200
; GFX12-NEXT: v_mov_b32_e32 v0, s32
+; GFX12-NEXT: s_and_b32 s0, 0, exec_lo
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use alloca0 v0
; GFX12-NEXT: ;;#ASMEND
@@ -2140,56 +1488,29 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX12-NEXT: v_writelane_b32 v22, s35, 4
; GFX12-NEXT: v_writelane_b32 v22, s36, 5
; GFX12-NEXT: v_writelane_b32 v22, s37, 6
-; GFX12-NEXT: v_writelane_b32 v22, s38, 7
-; GFX12-NEXT: v_writelane_b32 v22, s39, 8
-; GFX12-NEXT: v_writelane_b32 v22, s40, 9
-; GFX12-NEXT: v_writelane_b32 v22, s41, 10
-; GFX12-NEXT: v_writelane_b32 v22, s42, 11
-; GFX12-NEXT: v_writelane_b32 v22, s43, 12
-; GFX12-NEXT: v_writelane_b32 v22, s44, 13
-; GFX12-NEXT: v_writelane_b32 v22, s45, 14
-; GFX12-NEXT: v_writelane_b32 v22, s46, 15
-; GFX12-NEXT: v_writelane_b32 v22, s47, 16
-; GFX12-NEXT: v_writelane_b32 v22, s48, 17
-; GFX12-NEXT: v_writelane_b32 v22, s49, 18
-; GFX12-NEXT: v_writelane_b32 v22, s50, 19
-; GFX12-NEXT: v_writelane_b32 v22, s51, 20
-; GFX12-NEXT: v_writelane_b32 v22, s52, 21
-; GFX12-NEXT: v_writelane_b32 v22, s53, 22
-; GFX12-NEXT: v_writelane_b32 v22, s54, 23
-; GFX12-NEXT: v_writelane_b32 v22, s55, 24
-; GFX12-NEXT: v_writelane_b32 v22, s56, 25
-; GFX12-NEXT: v_writelane_b32 v22, s57, 26
-; GFX12-NEXT: v_writelane_b32 v22, s59, 27
-; GFX12-NEXT: s_add_co_i32 s59, s32, 0x4200
-; GFX12-NEXT: s_and_b32 s0, 0, exec_lo
+; GFX12-NEXT: v_writelane_b32 v22, s46, 7
+; GFX12-NEXT: v_writelane_b32 v22, s47, 8
+; GFX12-NEXT: v_writelane_b32 v22, s48, 9
+; GFX12-NEXT: v_writelane_b32 v22, s49, 10
+; GFX12-NEXT: v_writelane_b32 v22, s50, 11
+; GFX12-NEXT: v_writelane_b32 v22, s51, 12
+; GFX12-NEXT: v_writelane_b32 v22, s52, 13
+; GFX12-NEXT: v_writelane_b32 v22, s53, 14
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc
; GFX12-NEXT: ;;#ASMEND
-; GFX12-NEXT: v_readlane_b32 s59, v22, 27
-; GFX12-NEXT: v_readlane_b32 s57, v22, 26
-; GFX12-NEXT: v_readlane_b32 s56, v22, 25
-; GFX12-NEXT: v_readlane_b32 s55, v22, 24
-; GFX12-NEXT: v_readlane_b32 s54, v22, 23
-; GFX12-NEXT: v_readlane_b32 s53, v22, 22
-; GFX12-NEXT: v_readlane_b32 s52, v22, 21
-; GFX12-NEXT: v_readlane_b32 s51, v22, 20
-; GFX12-NEXT: v_readlane_b32 s50, v22, 19
-; GFX12-NEXT: v_readlane_b32 s49, v22, 18
-; GFX12-NEXT: v_readlane_b32 s48, v22, 17
-; GFX12-NEXT: v_readlane_b32 s47, v22, 16
-; GFX12-NEXT: v_readlane_b32 s46, v22, 15
-; GFX12-NEXT: v_readlane_b32 s45, v22, 14
-; GFX12-NEXT: v_readlane_b32 s44, v22, 13
-; GFX12-NEXT: v_readlane_b32 s43, v22, 12
-; GFX12-NEXT: v_readlane_b32 s42, v22, 11
-; GFX12-NEXT: v_readlane_b32 s41, v22, 10
-; GFX12-NEXT: v_readlane_b32 s40, v22, 9
-; GFX12-NEXT: v_readlane_b32 s39, v22, 8
-; GFX12-NEXT: v_readlane_b32 s38, v22, 7
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT: v_readlane_b32 s53, v22, 14
+; GFX12-NEXT: v_readlane_b32 s52, v22, 13
+; GFX12-NEXT: v_readlane_b32 s51, v22, 12
+; GFX12-NEXT: v_readlane_b32 s50, v22, 11
+; GFX12-NEXT: v_readlane_b32 s49, v22, 10
+; GFX12-NEXT: v_readlane_b32 s48, v22, 9
+; GFX12-NEXT: v_readlane_b32 s47, v22, 8
+; GFX12-NEXT: v_readlane_b32 s46, v22, 7
; GFX12-NEXT: v_readlane_b32 s37, v22, 6
; GFX12-NEXT: v_readlane_b32 s36, v22, 5
; GFX12-NEXT: v_readlane_b32 s35, v22, 4
diff --git a/llvm/test/CodeGen/AMDGPU/mcexpr-knownbits-assign-crash-gh-issue-110930.ll b/llvm/test/CodeGen/AMDGPU/mcexpr-knownbits-assign-crash-gh-issue-110930.ll
index 0112453e32bfc..790b934c2b1bf 100644
--- a/llvm/test/CodeGen/AMDGPU/mcexpr-knownbits-assign-crash-gh-issue-110930.ll
+++ b/llvm/test/CodeGen/AMDGPU/mcexpr-knownbits-assign-crash-gh-issue-110930.ll
@@ -8,7 +8,7 @@
; CHECK-LABEL: I_Quit:
; CHECK: .set I_Quit.num_vgpr, max(41, amdgpu.max_num_vgpr)
; CHECK: .set I_Quit.num_agpr, max(0, amdgpu.max_num_agpr)
-; CHECK: .set I_Quit.numbered_sgpr, max(48, amdgpu.max_num_sgpr)
+; CHECK: .set I_Quit.numbered_sgpr, max(64, amdgpu.max_num_sgpr)
; CHECK: .set I_Quit.private_seg_size, 16
; CHECK: .set I_Quit.uses_vcc, 1
; CHECK: .set I_Quit.uses_flat_scratch, 1
@@ -80,7 +80,7 @@ define void @P_SetThingPosition() {
; CHECK-LABEL: P_SetupPsprites:
; CHECK: .set P_SetupPsprites.num_vgpr, max(41, amdgpu.max_num_vgpr)
; CHECK: .set P_SetupPsprites.num_agpr, max(0, amdgpu.max_num_agpr)
-; CHECK: .set P_SetupPsprites.numbered_sgpr, max(48, amdgpu.max_num_sgpr)
+; CHECK: .set P_SetupPsprites.numbered_sgpr, max(64, amdgpu.max_num_sgpr)
; CHECK: .set P_SetupPsprites.private_seg_size, 16
; CHECK: .set P_SetupPsprites.uses_vcc, 1
; CHECK: .set P_SetupPsprites.uses_flat_scratch, 1
@@ -110,7 +110,7 @@ define void @HU_Start() {
; CHECK-LABEL: P_SpawnPlayer:
; CHECK: .set P_SpawnPlayer.num_vgpr, max(43, G_PlayerReborn.num_vgpr, P_SetThingPosition.num_vgpr, P_SetupPsprites.num_vgpr, HU_Start.num_vgpr)
; CHECK: .set P_SpawnPlayer.num_agpr, max(0, G_PlayerReborn.num_agpr, P_SetThingPosition.num_agpr, P_SetupPsprites.num_agpr, HU_Start.num_agpr)
-; CHECK: .set P_SpawnPlayer.numbered_sgpr, max(60, G_PlayerReborn.numbered_sgpr, P_SetThingPosition.numbered_sgpr, P_SetupPsprites.numbered_sgpr, HU_Start.numbered_sgpr)
+; CHECK: .set P_SpawnPlayer.numbered_sgpr, max(84, G_PlayerReborn.numbered_sgpr, P_SetThingPosition.numbered_sgpr, P_SetupPsprites.numbered_sgpr, HU_Start.numbered_sgpr)
; CHECK: .set P_SpawnPlayer.private_seg_size, 16+(max(G_PlayerReborn.private_seg_size, P_SetThingPosition.private_seg_size, P_SetupPsprites.private_seg_size, HU_Start.private_seg_size))
; CHECK: .set P_SpawnPlayer.uses_vcc, or(1, G_PlayerReborn.uses_vcc, P_SetThingPosition.uses_vcc, P_SetupPsprites.uses_vcc, HU_Start.uses_vcc)
; CHECK: .set P_SpawnPlayer.uses_flat_scratch, or(0, G_PlayerReborn.uses_flat_scratch, P_SetThingPosition.uses_flat_scratch, P_SetupPsprites.uses_flat_scratch, HU_Start.uses_flat_scratch)
@@ -128,7 +128,7 @@ define void @P_SpawnPlayer() {
; CHECK-LABEL: I_Error:
; CHECK: .set I_Error.num_vgpr, max(41, amdgpu.max_num_vgpr)
; CHECK: .set I_Error.num_agpr, max(0, amdgpu.max_num_agpr)
-; CHECK: .set I_Error.numbered_sgpr, max(48, amdgpu.max_num_sgpr)
+; CHECK: .set I_Error.numbered_sgpr, max(64, amdgpu.max_num_sgpr)
; CHECK: .set I_Error.private_seg_size, 16
; CHECK: .set I_Error.uses_vcc, 1
; CHECK: .set I_Error.uses_flat_scratch, 1
@@ -144,7 +144,7 @@ define void @I_Error(...) {
; CHECK-LABEL: G_DoReborn:
; CHECK: .set G_DoReborn.num_vgpr, max(44, P_RemoveMobj.num_vgpr, P_SpawnMobj.num_vgpr, P_SpawnPlayer.num_vgpr, I_Error.num_vgpr)
; CHECK: .set G_DoReborn.num_agpr, max(0, P_RemoveMobj.num_agpr, P_SpawnMobj.num_agpr, P_SpawnPlayer.num_agpr, I_Error.num_agpr)
-; CHECK: .set G_DoReborn.numbered_sgpr, max(72, P_RemoveMobj.numbered_sgpr, P_SpawnMobj.numbered_sgpr, P_SpawnPlayer.numbered_sgpr, I_Error.numbered_sgpr)
+; CHECK: .set G_DoReborn.numbered_sgpr, max(104, P_RemoveMobj.numbered_sgpr, P_SpawnMobj.numbered_sgpr, P_SpawnPlayer.numbered_sgpr, I_Error.numbered_sgpr)
; CHECK: .set G_DoReborn.private_seg_size, 32+(max(P_RemoveMobj.private_seg_size, P_SpawnMobj.private_seg_size, P_SpawnPlayer.private_seg_size, I_Error.private_seg_size))
; CHECK: .set G_DoReborn.uses_vcc, or(1, P_RemoveMobj.uses_vcc, P_SpawnMobj.uses_vcc, P_SpawnPlayer.uses_vcc, I_Error.uses_vcc)
; CHECK: .set G_DoReborn.uses_flat_scratch, or(0, P_RemoveMobj.uses_flat_scratch, P_SpawnMobj.uses_flat_scratch, P_SpawnPlayer.uses_flat_scratch, I_Error.uses_flat_scratch)
@@ -218,7 +218,7 @@ define void @F_Ticker() {
; CHECK-LABEL: G_CheckDemoStatus:
; CHECK: .set G_CheckDemoStatus.num_vgpr, max(43, I_Quit.num_vgpr, D_AdvanceDemo.num_vgpr, I_Error.num_vgpr)
; CHECK: .set G_CheckDemoStatus.num_agpr, max(0, I_Quit.num_agpr, D_AdvanceDemo.num_agpr, I_Error.num_agpr)
-; CHECK: .set G_CheckDemoStatus.numbered_sgpr, max(60, I_Quit.numbered_sgpr, D_AdvanceDemo.numbered_sgpr, I_Error.numbered_sgpr)
+; CHECK: .set G_CheckDemoStatus.numbered_sgpr, max(84, I_Quit.numbered_sgpr, D_AdvanceDemo.numbered_sgpr, I_Error.numbered_sgpr)
; CHECK: .set G_CheckDemoStatus.private_seg_size, 32+(max(I_Quit.private_seg_size, D_AdvanceDemo.private_seg_size, I_Error.private_seg_size))
; CHECK: .set G_CheckDemoStatus.uses_vcc, or(1, I_Quit.uses_vcc, D_AdvanceDemo.uses_vcc, I_Error.uses_vcc)
; CHECK: .set G_CheckDemoStatus.uses_flat_scratch, or(0, I_Quit.uses_flat_scratch, D_AdvanceDemo.uses_flat_scratch, I_Error.uses_flat_scratch)
@@ -264,7 +264,7 @@ define ptr @P_SaveGameFile() {
; CHECK-LABEL: R_FlatNumForName:
; CHECK: .set R_FlatNumForName.num_vgpr, max(42, I_Error.num_vgpr)
; CHECK: .set R_FlatNumForName.num_agpr, max(0, I_Error.num_agpr)
-; CHECK: .set R_FlatNumForName.numbered_sgpr, max(48, I_Error.numbered_sgpr)
+; CHECK: .set R_FlatNumForName.numbered_sgpr, max(64, I_Error.numbered_sgpr)
; CHECK: .set R_FlatNumForName.private_seg_size, 16+(max(I_Error.private_seg_size))
; CHECK: .set R_FlatNumForName.uses_vcc, or(1, I_Error.uses_vcc)
; CHECK: .set R_FlatNumForName.uses_flat_scratch, or(0, I_Error.uses_flat_scratch)
@@ -279,7 +279,7 @@ define i32 @R_FlatNumForName() {
; CHECK-LABEL: R_TextureNumForName:
; CHECK: .set R_TextureNumForName.num_vgpr, max(42, R_FlatNumForName.num_vgpr)
; CHECK: .set R_TextureNumForName.num_agpr, max(0, R_FlatNumForName.num_agpr)
-; CHECK: .set R_TextureNumForName.numbered_sgpr, max(48, R_FlatNumForName.numbered_sgpr)
+; CHECK: .set R_TextureNumForName.numbered_sgpr, max(64, R_FlatNumForName.numbered_sgpr)
; CHECK: .set R_TextureNumForName.private_seg_size, 16+(max(R_FlatNumForName.private_seg_size))
; CHECK: .set R_TextureNumForName.uses_vcc, or(1, R_FlatNumForName.uses_vcc)
; CHECK: .set R_TextureNumForName.uses_flat_scratch, or(0, R_FlatNumForName.uses_flat_scratch)
@@ -292,10 +292,10 @@ define i32 @R_TextureNumForName() {
}
; CHECK-LABEL: G_Ticker:
-; CHECK: .set G_Ticker.num_vgpr, max(46, G_DoReborn.num_vgpr, F_Ticker.num_vgpr, AM_Stop.num_vgpr, F_StartFinale.num_vgpr, D_AdvanceDemo.num_vgpr, R_FlatNumForName.num_vgpr, R_TextureNumForName.num_vgpr, P_TempSaveGameFile.num_vgpr, P_SaveGameFile.num_vgpr, I_Error.num_vgpr)
+; CHECK: .set G_Ticker.num_vgpr, max(47, G_DoReborn.num_vgpr, F_Ticker.num_vgpr, AM_Stop.num_vgpr, F_StartFinale.num_vgpr, D_AdvanceDemo.num_vgpr, R_FlatNumForName.num_vgpr, R_TextureNumForName.num_vgpr, P_TempSaveGameFile.num_vgpr, P_SaveGameFile.num_vgpr, I_Error.num_vgpr)
; CHECK: .set G_Ticker.num_agpr, max(0, G_DoReborn.num_agpr, F_Ticker.num_agpr, AM_Stop.num_agpr, F_StartFinale.num_agpr, D_AdvanceDemo.num_agpr, R_FlatNumForName.num_agpr, R_TextureNumForName.num_agpr, P_TempSaveGameFile.num_agpr, P_SaveGameFile.num_agpr, I_Error.num_agpr)
-; CHECK: .set G_Ticker.numbered_sgpr, max(84, G_DoReborn.numbered_sgpr, F_Ticker.numbered_sgpr, AM_Stop.numbered_sgpr, F_StartFinale.numbered_sgpr, D_AdvanceDemo.numbered_sgpr, R_FlatNumForName.numbered_sgpr, R_TextureNumForName.numbered_sgpr, P_TempSaveGameFile.numbered_sgpr, P_SaveGameFile.numbered_sgpr, I_Error.numbered_sgpr)
-; CHECK: .set G_Ticker.private_seg_size, 32+(max(G_DoReborn.private_seg_size, F_Ticker.private_seg_size, AM_Stop.private_seg_size, F_StartFinale.private_seg_size, D_AdvanceDemo.private_seg_size, R_FlatNumForName.private_seg_size, R_TextureNumForName.private_seg_size, P_TempSaveGameFile.private_seg_size, P_SaveGameFile.private_seg_size, I_Error.private_seg_size))
+; CHECK: .set G_Ticker.numbered_sgpr, max(105, G_DoReborn.numbered_sgpr, F_Ticker.numbered_sgpr, AM_Stop.numbered_sgpr, F_StartFinale.numbered_sgpr, D_AdvanceDemo.numbered_sgpr, R_FlatNumForName.numbered_sgpr, R_TextureNumForName.numbered_sgpr, P_TempSaveGameFile.numbered_sgpr, P_SaveGameFile.numbered_sgpr, I_Error.numbered_sgpr)
+; CHECK: .set G_Ticker.private_seg_size, 48+(max(G_DoReborn.private_seg_size, F_Ticker.private_seg_size, AM_Stop.private_seg_size, F_StartFinale.private_seg_size, D_AdvanceDemo.private_seg_size, R_FlatNumForName.private_seg_size, R_TextureNumForName.private_seg_size, P_TempSaveGameFile.private_seg_size, P_SaveGameFile.private_seg_size, I_Error.private_seg_size))
; CHECK: .set G_Ticker.uses_vcc, or(1, G_DoReborn.uses_vcc, F_Ticker.uses_vcc, AM_Stop.uses_vcc, F_StartFinale.uses_vcc, D_AdvanceDemo.uses_vcc, R_FlatNumForName.uses_vcc, R_TextureNumForName.uses_vcc, P_TempSaveGameFile.uses_vcc, P_SaveGameFile.uses_vcc, I_Error.uses_vcc)
; CHECK: .set G_Ticker.uses_flat_scratch, or(0, G_DoReborn.uses_flat_scratch, F_Ticker.uses_flat_scratch, AM_Stop.uses_flat_scratch, F_StartFinale.uses_flat_scratch, D_AdvanceDemo.uses_flat_scratch, R_FlatNumForName.uses_flat_scratch, R_TextureNumForName.uses_flat_scratch, P_TempSaveGameFile.uses_flat_scratch, P_SaveGameFile.uses_flat_scratch, I_Error.uses_flat_scratch)
; CHECK: .set G_Ticker.has_dyn_sized_stack, or(0, G_DoReborn.has_dyn_sized_stack, F_Ticker.has_dyn_sized_stack, AM_Stop.has_dyn_sized_stack, F_StartFinale.has_dyn_sized_stack, D_AdvanceDemo.has_dyn_sized_stack, R_FlatNumForName.has_dyn_sized_stack, R_TextureNumForName.has_dyn_sized_stack, P_TempSaveGameFile.has_dyn_sized_stack, P_SaveGameFile.has_dyn_sized_stack, I_Error.has_dyn_sized_stack)
@@ -316,9 +316,9 @@ define void @G_Ticker() {
}
; CHECK-LABEL: RunTic:
-; CHECK: .set RunTic.num_vgpr, max(46, G_CheckDemoStatus.num_vgpr, D_AdvanceDemo.num_vgpr, G_Ticker.num_vgpr)
+; CHECK: .set RunTic.num_vgpr, max(47, G_CheckDemoStatus.num_vgpr, D_AdvanceDemo.num_vgpr, G_Ticker.num_vgpr)
; CHECK: .set RunTic.num_agpr, max(0, G_CheckDemoStatus.num_agpr, D_AdvanceDemo.num_agpr, G_Ticker.num_agpr)
-; CHECK: .set RunTic.numbered_sgpr, max(84, G_CheckDemoStatus.numbered_sgpr, D_AdvanceDemo.numbered_sgpr, G_Ticker.numbered_sgpr)
+; CHECK: .set RunTic.numbered_sgpr, max(105, G_CheckDemoStatus.numbered_sgpr, D_AdvanceDemo.numbered_sgpr, G_Ticker.numbered_sgpr)
; CHECK: .set RunTic.private_seg_size, 32+(max(G_CheckDemoStatus.private_seg_size, D_AdvanceDemo.private_seg_size, G_Ticker.private_seg_size))
; CHECK: .set RunTic.uses_vcc, or(1, G_CheckDemoStatus.uses_vcc, D_AdvanceDemo.uses_vcc, G_Ticker.uses_vcc)
; CHECK: .set RunTic.uses_flat_scratch, or(0, G_CheckDemoStatus.uses_flat_scratch, D_AdvanceDemo.uses_flat_scratch, G_Ticker.uses_flat_scratch)
diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir
index ba6524caf668d..8e957c1c31013 100644
--- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir
@@ -27,39 +27,25 @@ body: |
liveins: $vgpr1
; CHECK-LABEL: name: scavenge_sgpr_pei_no_sgprs
- ; CHECK: liveins: $vgpr1, $vgpr2
+ ; CHECK: liveins: $sgpr38, $sgpr39, $vgpr1
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $sgpr4 = COPY $sgpr33
+ ; CHECK-NEXT: $sgpr38 = frame-setup COPY $sgpr33
; CHECK-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
; CHECK-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
- ; CHECK-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; CHECK-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 1048832, implicit-def dead $scc
- ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
- ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7
- ; CHECK-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, undef $vgpr2
- ; CHECK-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr34, 1, undef $vgpr2
+ ; CHECK-NEXT: $sgpr39 = frame-setup COPY $sgpr34
; CHECK-NEXT: $sgpr34 = frame-setup COPY $sgpr32
; CHECK-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
- ; CHECK-NEXT: $sgpr33 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc
- ; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, 8192, implicit-def $scc
- ; CHECK-NEXT: $vgpr0 = COPY killed $sgpr33
- ; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, -8192, implicit-def $scc
- ; CHECK-NEXT: $sgpr33 = S_LSHL_B32 $sgpr33, 6, implicit-def $scc
- ; CHECK-NEXT: $sgpr33 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc
- ; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, 16384, implicit-def $scc
- ; CHECK-NEXT: $vgpr3 = COPY killed $sgpr33
- ; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, -16384, implicit-def $scc
- ; CHECK-NEXT: $sgpr33 = S_LSHL_B32 $sgpr33, 6, implicit-def $scc
- ; CHECK-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
+ ; CHECK-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+ ; CHECK-NEXT: $sgpr40 = S_MOV_B32 8192
+ ; CHECK-NEXT: $vgpr0, dead $sgpr40_sgpr41 = V_ADD_CO_U32_e64 killed $sgpr40, killed $vgpr0, 0, implicit $exec
+ ; CHECK-NEXT: $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+ ; CHECK-NEXT: $sgpr40 = S_MOV_B32 16384
+ ; CHECK-NEXT: $vgpr2, dead $sgpr40_sgpr41 = V_ADD_CO_U32_e64 killed $sgpr40, killed $vgpr2, 0, implicit $exec
+ ; CHECK-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
; CHECK-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
- ; CHECK-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 0
- ; CHECK-NEXT: $sgpr34 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 1
- ; CHECK-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; CHECK-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 1048832, implicit-def dead $scc
- ; CHECK-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
- ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7
- ; CHECK-NEXT: $sgpr33 = COPY $sgpr4
+ ; CHECK-NEXT: $sgpr34 = frame-destroy COPY $sgpr39
+ ; CHECK-NEXT: $sgpr33 = frame-destroy COPY $sgpr38
; CHECK-NEXT: S_ENDPGM 0, implicit $vcc
S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
$vgpr0 = V_MOV_B32_e32 %stack.0, implicit $exec
@@ -89,36 +75,24 @@ body: |
liveins: $vgpr1
; CHECK-LABEL: name: scavenge_sgpr_pei_one_sgpr
- ; CHECK: liveins: $sgpr29, $vgpr1, $vgpr2
+ ; CHECK: liveins: $sgpr29, $sgpr38, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $sgpr29 = frame-setup COPY $sgpr33
; CHECK-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
; CHECK-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
- ; CHECK-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; CHECK-NEXT: $sgpr6 = S_ADD_I32 $sgpr33, 1048832, implicit-def dead $scc
- ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
- ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
- ; CHECK-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr34, 0, undef $vgpr2
+ ; CHECK-NEXT: $sgpr38 = frame-setup COPY $sgpr34
; CHECK-NEXT: $sgpr34 = frame-setup COPY $sgpr32
; CHECK-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
- ; CHECK-NEXT: $sgpr33 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc
- ; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, 8192, implicit-def $scc
- ; CHECK-NEXT: $vgpr0 = COPY killed $sgpr33
- ; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, -8192, implicit-def $scc
- ; CHECK-NEXT: $sgpr33 = S_LSHL_B32 $sgpr33, 6, implicit-def $scc
- ; CHECK-NEXT: $sgpr33 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc
- ; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, 16384, implicit-def $scc
- ; CHECK-NEXT: $vgpr3 = COPY killed $sgpr33
- ; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, -16384, implicit-def $scc
- ; CHECK-NEXT: $sgpr33 = S_LSHL_B32 $sgpr33, 6, implicit-def $scc
- ; CHECK-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr31
+ ; CHECK-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+ ; CHECK-NEXT: $sgpr40 = S_MOV_B32 8192
+ ; CHECK-NEXT: $vgpr0, dead $sgpr40_sgpr41 = V_ADD_CO_U32_e64 killed $sgpr40, killed $vgpr0, 0, implicit $exec
+ ; CHECK-NEXT: $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+ ; CHECK-NEXT: $sgpr40 = S_MOV_B32 16384
+ ; CHECK-NEXT: $vgpr2, dead $sgpr40_sgpr41 = V_ADD_CO_U32_e64 killed $sgpr40, killed $vgpr2, 0, implicit $exec
+ ; CHECK-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr31
; CHECK-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
- ; CHECK-NEXT: $sgpr34 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 0
- ; CHECK-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; CHECK-NEXT: $sgpr6 = S_ADD_I32 $sgpr33, 1048832, implicit-def dead $scc
- ; CHECK-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
- ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
+ ; CHECK-NEXT: $sgpr34 = frame-destroy COPY $sgpr38
; CHECK-NEXT: $sgpr33 = frame-destroy COPY $sgpr29
; CHECK-NEXT: S_ENDPGM 0, implicit $vcc
S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
@@ -158,16 +132,12 @@ body: |
; CHECK-NEXT: $sgpr34 = frame-setup COPY $sgpr32
; CHECK-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
- ; CHECK-NEXT: $sgpr33 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc
- ; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, 8192, implicit-def $scc
- ; CHECK-NEXT: $vgpr0 = COPY killed $sgpr33
- ; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, -8192, implicit-def $scc
- ; CHECK-NEXT: $sgpr33 = S_LSHL_B32 $sgpr33, 6, implicit-def $scc
- ; CHECK-NEXT: $sgpr33 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc
- ; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, 16384, implicit-def $scc
- ; CHECK-NEXT: $vgpr2 = COPY killed $sgpr33
- ; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, -16384, implicit-def $scc
- ; CHECK-NEXT: $sgpr33 = S_LSHL_B32 $sgpr33, 6, implicit-def $scc
+ ; CHECK-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+ ; CHECK-NEXT: $sgpr38 = S_MOV_B32 8192
+ ; CHECK-NEXT: $vgpr0, dead $sgpr38_sgpr39 = V_ADD_CO_U32_e64 killed $sgpr38, killed $vgpr0, 0, implicit $exec
+ ; CHECK-NEXT: $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+ ; CHECK-NEXT: $sgpr38 = S_MOV_B32 16384
+ ; CHECK-NEXT: $vgpr2, dead $sgpr38_sgpr39 = V_ADD_CO_U32_e64 killed $sgpr38, killed $vgpr2, 0, implicit $exec
; CHECK-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr31
; CHECK-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
; CHECK-NEXT: $sgpr34 = frame-destroy COPY $sgpr29
diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir
index 162d12f651d4a..88556040486e2 100644
--- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir
@@ -23,64 +23,42 @@ body: |
liveins: $vgpr1
; MUBUF-LABEL: name: scavenge_sgpr_pei_no_sgprs
- ; MUBUF: liveins: $vgpr1, $vgpr2
+ ; MUBUF: liveins: $sgpr38, $sgpr39, $vgpr1
; MUBUF-NEXT: {{ $}}
- ; MUBUF-NEXT: $sgpr4 = COPY $sgpr33
+ ; MUBUF-NEXT: $sgpr38 = frame-setup COPY $sgpr33
; MUBUF-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
; MUBUF-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
- ; MUBUF-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; MUBUF-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 1048832, implicit-def dead $scc
- ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
- ; MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7
- ; MUBUF-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, undef $vgpr2
- ; MUBUF-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr34, 1, undef $vgpr2
+ ; MUBUF-NEXT: $sgpr39 = frame-setup COPY $sgpr34
; MUBUF-NEXT: $sgpr34 = frame-setup COPY $sgpr32
; MUBUF-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
; MUBUF-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
; MUBUF-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
; MUBUF-NEXT: $vgpr0 = V_ADD_U32_e32 8192, killed $vgpr0, implicit $exec
- ; MUBUF-NEXT: $vgpr3 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
- ; MUBUF-NEXT: $vgpr3 = V_ADD_U32_e32 16384, killed $vgpr3, implicit $exec
- ; MUBUF-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
+ ; MUBUF-NEXT: $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+ ; MUBUF-NEXT: $vgpr2 = V_ADD_U32_e32 16384, killed $vgpr2, implicit $exec
+ ; MUBUF-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
; MUBUF-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
- ; MUBUF-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 0
- ; MUBUF-NEXT: $sgpr34 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 1
- ; MUBUF-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; MUBUF-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 1048832, implicit-def dead $scc
- ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
- ; MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7
- ; MUBUF-NEXT: $sgpr33 = COPY $sgpr4
+ ; MUBUF-NEXT: $sgpr34 = frame-destroy COPY $sgpr39
+ ; MUBUF-NEXT: $sgpr33 = frame-destroy COPY $sgpr38
; MUBUF-NEXT: S_ENDPGM 0, implicit $vcc
;
; FLATSCR-LABEL: name: scavenge_sgpr_pei_no_sgprs
- ; FLATSCR: liveins: $vgpr1, $vgpr2
+ ; FLATSCR: liveins: $sgpr38, $sgpr39, $vgpr1
; FLATSCR-NEXT: {{ $}}
- ; FLATSCR-NEXT: $sgpr4 = COPY $sgpr33
+ ; FLATSCR-NEXT: $sgpr38 = frame-setup COPY $sgpr33
; FLATSCR-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
; FLATSCR-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
- ; FLATSCR-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; FLATSCR-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 16388, implicit-def dead $scc
- ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr2, killed $sgpr5, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5)
- ; FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7
- ; FLATSCR-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, undef $vgpr2
- ; FLATSCR-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr34, 1, undef $vgpr2
+ ; FLATSCR-NEXT: $sgpr39 = frame-setup COPY $sgpr34
; FLATSCR-NEXT: $sgpr34 = frame-setup COPY $sgpr32
; FLATSCR-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
; FLATSCR-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
- ; FLATSCR-NEXT: $sgpr33 = S_ADD_I32 $sgpr33, 8192, implicit-def $scc
- ; FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr33, implicit $exec
- ; FLATSCR-NEXT: $sgpr33 = S_ADD_I32 $sgpr33, -8192, implicit-def $scc
- ; FLATSCR-NEXT: $sgpr33 = S_ADD_I32 $sgpr33, 16384, implicit-def $scc
- ; FLATSCR-NEXT: $vgpr0 = V_OR_B32_e32 $sgpr33, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
- ; FLATSCR-NEXT: $sgpr33 = S_ADD_I32 $sgpr33, -16384, implicit-def $scc
+ ; FLATSCR-NEXT: $sgpr40 = S_ADD_I32 $sgpr33, 8192, implicit-def $scc
+ ; FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr40, implicit $exec
+ ; FLATSCR-NEXT: $sgpr40 = S_ADD_I32 $sgpr33, 16384, implicit-def $scc
+ ; FLATSCR-NEXT: $vgpr0 = V_OR_B32_e32 killed $sgpr40, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
; FLATSCR-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
- ; FLATSCR-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 0
- ; FLATSCR-NEXT: $sgpr34 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 1
- ; FLATSCR-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; FLATSCR-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 16388, implicit-def dead $scc
- ; FLATSCR-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr5, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.3, addrspace 5)
- ; FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7
- ; FLATSCR-NEXT: $sgpr33 = COPY $sgpr4
+ ; FLATSCR-NEXT: $sgpr34 = frame-destroy COPY $sgpr39
+ ; FLATSCR-NEXT: $sgpr33 = frame-destroy COPY $sgpr38
; FLATSCR-NEXT: S_ENDPGM 0, implicit $vcc
S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
$vgpr0 = V_MOV_B32_e32 %stack.0, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir
index a4f936a4d705c..1242e23db6c6a 100644
--- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir
@@ -22,34 +22,22 @@ body: |
liveins: $vgpr1
; CHECK-LABEL: name: scavenge_sgpr_pei
- ; CHECK: liveins: $vgpr1, $vgpr2
+ ; CHECK: liveins: $sgpr38, $sgpr39, $vgpr1
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $sgpr4 = COPY $sgpr33
+ ; CHECK-NEXT: $sgpr38 = frame-setup COPY $sgpr33
; CHECK-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 262080, implicit-def $scc
; CHECK-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294705152, implicit-def dead $scc
- ; CHECK-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; CHECK-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 262400, implicit-def dead $scc
- ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
- ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7
- ; CHECK-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, undef $vgpr2
- ; CHECK-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr34, 1, undef $vgpr2
+ ; CHECK-NEXT: $sgpr39 = frame-setup COPY $sgpr34
; CHECK-NEXT: $sgpr34 = frame-setup COPY $sgpr32
; CHECK-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 786432, implicit-def dead $scc
; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
- ; CHECK-NEXT: $sgpr33 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc
- ; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, 4096, implicit-def $scc
- ; CHECK-NEXT: $vgpr3 = COPY killed $sgpr33
- ; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, -4096, implicit-def $scc
- ; CHECK-NEXT: $sgpr33 = S_LSHL_B32 $sgpr33, 6, implicit-def $scc
- ; CHECK-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
+ ; CHECK-NEXT: $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+ ; CHECK-NEXT: $sgpr40 = S_MOV_B32 4096
+ ; CHECK-NEXT: $vgpr2, dead $sgpr40_sgpr41 = V_ADD_CO_U32_e64 killed $sgpr40, killed $vgpr2, 0, implicit $exec
+ ; CHECK-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
; CHECK-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
- ; CHECK-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 0
- ; CHECK-NEXT: $sgpr34 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 1
- ; CHECK-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; CHECK-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 262400, implicit-def dead $scc
- ; CHECK-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
- ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7
- ; CHECK-NEXT: $sgpr33 = COPY $sgpr4
+ ; CHECK-NEXT: $sgpr34 = frame-destroy COPY $sgpr39
+ ; CHECK-NEXT: $sgpr33 = frame-destroy COPY $sgpr38
; CHECK-NEXT: S_ENDPGM 0, implicit $vcc
S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
$vgpr0 = V_OR_B32_e32 %stack.0, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
diff --git a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll
index a1197aeace86f..d5f3be9a515a2 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll
@@ -10,20 +10,20 @@ declare i64 @_Z13get_global_idj(i32) #0
define amdgpu_kernel void @clmem_read_simplified(ptr addrspace(1) %buffer) {
; GFX8-LABEL: clmem_read_simplified:
; GFX8: ; %bb.0: ; %entry
-; GFX8-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX8-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX8-NEXT: s_mov_b32 s38, -1
-; GFX8-NEXT: s_mov_b32 s39, 0xe80000
-; GFX8-NEXT: s_add_u32 s36, s36, s11
-; GFX8-NEXT: s_addc_u32 s37, s37, 0
+; GFX8-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX8-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX8-NEXT: s_mov_b32 s50, -1
+; GFX8-NEXT: s_mov_b32 s51, 0xe80000
+; GFX8-NEXT: s_add_u32 s48, s48, s11
+; GFX8-NEXT: s_addc_u32 s49, s49, 0
; GFX8-NEXT: s_getpc_b64 s[0:1]
; GFX8-NEXT: s_add_u32 s0, s0, _Z13get_global_idj at gotpcrel32@lo+4
; GFX8-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj at gotpcrel32@hi+12
; GFX8-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24
; GFX8-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
-; GFX8-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX8-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX8-NEXT: v_mov_b32_e32 v31, v0
-; GFX8-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX8-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX8-NEXT: v_mov_b32_e32 v0, 0
; GFX8-NEXT: s_mov_b32 s32, 0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
@@ -91,20 +91,20 @@ define amdgpu_kernel void @clmem_read_simplified(ptr addrspace(1) %buffer) {
;
; GFX9-LABEL: clmem_read_simplified:
; GFX9: ; %bb.0: ; %entry
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s11
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s11
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_getpc_b64 s[0:1]
; GFX9-NEXT: s_add_u32 s0, s0, _Z13get_global_idj at gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj at gotpcrel32@hi+12
; GFX9-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24
; GFX9-NEXT: v_mov_b32_e32 v31, v0
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
@@ -161,12 +161,12 @@ define amdgpu_kernel void @clmem_read_simplified(ptr addrspace(1) %buffer) {
;
; GFX10-LABEL: clmem_read_simplified:
; GFX10: ; %bb.0: ; %entry
-; GFX10-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX10-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX10-NEXT: s_mov_b32 s38, -1
-; GFX10-NEXT: s_mov_b32 s39, 0x31c16000
-; GFX10-NEXT: s_add_u32 s36, s36, s11
-; GFX10-NEXT: s_addc_u32 s37, s37, 0
+; GFX10-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX10-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX10-NEXT: s_mov_b32 s50, -1
+; GFX10-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX10-NEXT: s_add_u32 s48, s48, s11
+; GFX10-NEXT: s_addc_u32 s49, s49, 0
; GFX10-NEXT: s_getpc_b64 s[0:1]
; GFX10-NEXT: s_add_u32 s0, s0, _Z13get_global_idj at gotpcrel32@lo+4
; GFX10-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj at gotpcrel32@hi+12
@@ -174,8 +174,8 @@ define amdgpu_kernel void @clmem_read_simplified(ptr addrspace(1) %buffer) {
; GFX10-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
; GFX10-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24
; GFX10-NEXT: v_mov_b32_e32 v0, 0
-; GFX10-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX10-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX10-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX10-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX10-NEXT: s_mov_b32 s32, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
@@ -342,20 +342,20 @@ entry:
define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1) %buffer) {
; GFX8-LABEL: clmem_read:
; GFX8: ; %bb.0: ; %entry
-; GFX8-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX8-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX8-NEXT: s_mov_b32 s38, -1
-; GFX8-NEXT: s_mov_b32 s39, 0xe80000
-; GFX8-NEXT: s_add_u32 s36, s36, s11
-; GFX8-NEXT: s_addc_u32 s37, s37, 0
+; GFX8-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX8-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX8-NEXT: s_mov_b32 s50, -1
+; GFX8-NEXT: s_mov_b32 s51, 0xe80000
+; GFX8-NEXT: s_add_u32 s48, s48, s11
+; GFX8-NEXT: s_addc_u32 s49, s49, 0
; GFX8-NEXT: s_getpc_b64 s[0:1]
; GFX8-NEXT: s_add_u32 s0, s0, _Z13get_global_idj at gotpcrel32@lo+4
; GFX8-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj at gotpcrel32@hi+12
; GFX8-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24
; GFX8-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
-; GFX8-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX8-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX8-NEXT: v_mov_b32_e32 v31, v0
-; GFX8-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX8-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX8-NEXT: v_mov_b32_e32 v0, 0
; GFX8-NEXT: s_mov_b32 s32, 0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
@@ -469,20 +469,20 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1) %buffer) {
;
; GFX900-LABEL: clmem_read:
; GFX900: ; %bb.0: ; %entry
-; GFX900-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX900-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX900-NEXT: s_mov_b32 s38, -1
-; GFX900-NEXT: s_mov_b32 s39, 0xe00000
-; GFX900-NEXT: s_add_u32 s36, s36, s11
-; GFX900-NEXT: s_addc_u32 s37, s37, 0
+; GFX900-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX900-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX900-NEXT: s_mov_b32 s50, -1
+; GFX900-NEXT: s_mov_b32 s51, 0xe00000
+; GFX900-NEXT: s_add_u32 s48, s48, s11
+; GFX900-NEXT: s_addc_u32 s49, s49, 0
; GFX900-NEXT: s_getpc_b64 s[0:1]
; GFX900-NEXT: s_add_u32 s0, s0, _Z13get_global_idj at gotpcrel32@lo+4
; GFX900-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj at gotpcrel32@hi+12
; GFX900-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24
; GFX900-NEXT: v_mov_b32_e32 v31, v0
; GFX900-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
-; GFX900-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX900-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX900-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX900-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: s_mov_b32 s32, 0
; GFX900-NEXT: s_waitcnt lgkmcnt(0)
@@ -586,12 +586,12 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1) %buffer) {
;
; GFX10-LABEL: clmem_read:
; GFX10: ; %bb.0: ; %entry
-; GFX10-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX10-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX10-NEXT: s_mov_b32 s38, -1
-; GFX10-NEXT: s_mov_b32 s39, 0x31c16000
-; GFX10-NEXT: s_add_u32 s36, s36, s11
-; GFX10-NEXT: s_addc_u32 s37, s37, 0
+; GFX10-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX10-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX10-NEXT: s_mov_b32 s50, -1
+; GFX10-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX10-NEXT: s_add_u32 s48, s48, s11
+; GFX10-NEXT: s_addc_u32 s49, s49, 0
; GFX10-NEXT: s_getpc_b64 s[0:1]
; GFX10-NEXT: s_add_u32 s0, s0, _Z13get_global_idj at gotpcrel32@lo+4
; GFX10-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj at gotpcrel32@hi+12
@@ -599,8 +599,8 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1) %buffer) {
; GFX10-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
; GFX10-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24
; GFX10-NEXT: v_mov_b32_e32 v0, 0
-; GFX10-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX10-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX10-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX10-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX10-NEXT: s_mov_b32 s32, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
@@ -698,20 +698,20 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1) %buffer) {
;
; GFX90A-LABEL: clmem_read:
; GFX90A: ; %bb.0: ; %entry
-; GFX90A-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX90A-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX90A-NEXT: s_mov_b32 s38, -1
-; GFX90A-NEXT: s_mov_b32 s39, 0xe00000
-; GFX90A-NEXT: s_add_u32 s36, s36, s11
-; GFX90A-NEXT: s_addc_u32 s37, s37, 0
+; GFX90A-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX90A-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX90A-NEXT: s_mov_b32 s50, -1
+; GFX90A-NEXT: s_mov_b32 s51, 0xe00000
+; GFX90A-NEXT: s_add_u32 s48, s48, s11
+; GFX90A-NEXT: s_addc_u32 s49, s49, 0
; GFX90A-NEXT: s_getpc_b64 s[0:1]
; GFX90A-NEXT: s_add_u32 s0, s0, _Z13get_global_idj at gotpcrel32@lo+4
; GFX90A-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj at gotpcrel32@hi+12
; GFX90A-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24
; GFX90A-NEXT: v_mov_b32_e32 v31, v0
; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
-; GFX90A-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX90A-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX90A-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX90A-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX90A-NEXT: v_mov_b32_e32 v0, 0
; GFX90A-NEXT: s_mov_b32 s32, 0
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
@@ -1030,20 +1030,20 @@ while.end: ; preds = %while.cond.loopexit
define amdgpu_kernel void @Address32(ptr addrspace(1) %buffer) {
; GFX8-LABEL: Address32:
; GFX8: ; %bb.0: ; %entry
-; GFX8-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX8-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX8-NEXT: s_mov_b32 s38, -1
-; GFX8-NEXT: s_mov_b32 s39, 0xe80000
-; GFX8-NEXT: s_add_u32 s36, s36, s11
-; GFX8-NEXT: s_addc_u32 s37, s37, 0
+; GFX8-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX8-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX8-NEXT: s_mov_b32 s50, -1
+; GFX8-NEXT: s_mov_b32 s51, 0xe80000
+; GFX8-NEXT: s_add_u32 s48, s48, s11
+; GFX8-NEXT: s_addc_u32 s49, s49, 0
; GFX8-NEXT: s_getpc_b64 s[0:1]
; GFX8-NEXT: s_add_u32 s0, s0, _Z13get_global_idj at gotpcrel32@lo+4
; GFX8-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj at gotpcrel32@hi+12
; GFX8-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24
; GFX8-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
-; GFX8-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX8-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX8-NEXT: v_mov_b32_e32 v31, v0
-; GFX8-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX8-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX8-NEXT: v_mov_b32_e32 v0, 0
; GFX8-NEXT: s_mov_b32 s32, 0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
@@ -1116,20 +1116,20 @@ define amdgpu_kernel void @Address32(ptr addrspace(1) %buffer) {
;
; GFX9-LABEL: Address32:
; GFX9: ; %bb.0: ; %entry
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s11
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s11
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_getpc_b64 s[0:1]
; GFX9-NEXT: s_add_u32 s0, s0, _Z13get_global_idj at gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj at gotpcrel32@hi+12
; GFX9-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24
; GFX9-NEXT: v_mov_b32_e32 v31, v0
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
@@ -1173,12 +1173,12 @@ define amdgpu_kernel void @Address32(ptr addrspace(1) %buffer) {
;
; GFX10-LABEL: Address32:
; GFX10: ; %bb.0: ; %entry
-; GFX10-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX10-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX10-NEXT: s_mov_b32 s38, -1
-; GFX10-NEXT: s_mov_b32 s39, 0x31c16000
-; GFX10-NEXT: s_add_u32 s36, s36, s11
-; GFX10-NEXT: s_addc_u32 s37, s37, 0
+; GFX10-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX10-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX10-NEXT: s_mov_b32 s50, -1
+; GFX10-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX10-NEXT: s_add_u32 s48, s48, s11
+; GFX10-NEXT: s_addc_u32 s49, s49, 0
; GFX10-NEXT: s_getpc_b64 s[0:1]
; GFX10-NEXT: s_add_u32 s0, s0, _Z13get_global_idj at gotpcrel32@lo+4
; GFX10-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj at gotpcrel32@hi+12
@@ -1186,8 +1186,8 @@ define amdgpu_kernel void @Address32(ptr addrspace(1) %buffer) {
; GFX10-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
; GFX10-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24
; GFX10-NEXT: v_mov_b32_e32 v0, 0
-; GFX10-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX10-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX10-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX10-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX10-NEXT: s_mov_b32 s32, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
@@ -1345,20 +1345,20 @@ entry:
define amdgpu_kernel void @Offset64(ptr addrspace(1) %buffer) {
; GFX8-LABEL: Offset64:
; GFX8: ; %bb.0: ; %entry
-; GFX8-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX8-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX8-NEXT: s_mov_b32 s38, -1
-; GFX8-NEXT: s_mov_b32 s39, 0xe80000
-; GFX8-NEXT: s_add_u32 s36, s36, s11
-; GFX8-NEXT: s_addc_u32 s37, s37, 0
+; GFX8-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX8-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX8-NEXT: s_mov_b32 s50, -1
+; GFX8-NEXT: s_mov_b32 s51, 0xe80000
+; GFX8-NEXT: s_add_u32 s48, s48, s11
+; GFX8-NEXT: s_addc_u32 s49, s49, 0
; GFX8-NEXT: s_getpc_b64 s[0:1]
; GFX8-NEXT: s_add_u32 s0, s0, _Z13get_global_idj at gotpcrel32@lo+4
; GFX8-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj at gotpcrel32@hi+12
; GFX8-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24
; GFX8-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
-; GFX8-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX8-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX8-NEXT: v_mov_b32_e32 v31, v0
-; GFX8-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX8-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX8-NEXT: v_mov_b32_e32 v0, 0
; GFX8-NEXT: s_mov_b32 s32, 0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
@@ -1397,20 +1397,20 @@ define amdgpu_kernel void @Offset64(ptr addrspace(1) %buffer) {
;
; GFX9-LABEL: Offset64:
; GFX9: ; %bb.0: ; %entry
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s11
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s11
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_getpc_b64 s[0:1]
; GFX9-NEXT: s_add_u32 s0, s0, _Z13get_global_idj at gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj at gotpcrel32@hi+12
; GFX9-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24
; GFX9-NEXT: v_mov_b32_e32 v31, v0
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
@@ -1446,12 +1446,12 @@ define amdgpu_kernel void @Offset64(ptr addrspace(1) %buffer) {
;
; GFX10-LABEL: Offset64:
; GFX10: ; %bb.0: ; %entry
-; GFX10-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX10-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX10-NEXT: s_mov_b32 s38, -1
-; GFX10-NEXT: s_mov_b32 s39, 0x31c16000
-; GFX10-NEXT: s_add_u32 s36, s36, s11
-; GFX10-NEXT: s_addc_u32 s37, s37, 0
+; GFX10-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX10-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX10-NEXT: s_mov_b32 s50, -1
+; GFX10-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX10-NEXT: s_add_u32 s48, s48, s11
+; GFX10-NEXT: s_addc_u32 s49, s49, 0
; GFX10-NEXT: s_getpc_b64 s[0:1]
; GFX10-NEXT: s_add_u32 s0, s0, _Z13get_global_idj at gotpcrel32@lo+4
; GFX10-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj at gotpcrel32@hi+12
@@ -1459,8 +1459,8 @@ define amdgpu_kernel void @Offset64(ptr addrspace(1) %buffer) {
; GFX10-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
; GFX10-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24
; GFX10-NEXT: v_mov_b32_e32 v0, 0
-; GFX10-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX10-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX10-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX10-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX10-NEXT: s_mov_b32 s32, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
@@ -1569,20 +1569,20 @@ entry:
define amdgpu_kernel void @p32Offset64(ptr addrspace(1) %buffer) {
; GFX8-LABEL: p32Offset64:
; GFX8: ; %bb.0: ; %entry
-; GFX8-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX8-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX8-NEXT: s_mov_b32 s38, -1
-; GFX8-NEXT: s_mov_b32 s39, 0xe80000
-; GFX8-NEXT: s_add_u32 s36, s36, s11
-; GFX8-NEXT: s_addc_u32 s37, s37, 0
+; GFX8-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX8-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX8-NEXT: s_mov_b32 s50, -1
+; GFX8-NEXT: s_mov_b32 s51, 0xe80000
+; GFX8-NEXT: s_add_u32 s48, s48, s11
+; GFX8-NEXT: s_addc_u32 s49, s49, 0
; GFX8-NEXT: s_getpc_b64 s[0:1]
; GFX8-NEXT: s_add_u32 s0, s0, _Z13get_global_idj at gotpcrel32@lo+4
; GFX8-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj at gotpcrel32@hi+12
; GFX8-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24
; GFX8-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
-; GFX8-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX8-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX8-NEXT: v_mov_b32_e32 v31, v0
-; GFX8-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX8-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX8-NEXT: v_mov_b32_e32 v0, 0
; GFX8-NEXT: s_mov_b32 s32, 0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
@@ -1619,20 +1619,20 @@ define amdgpu_kernel void @p32Offset64(ptr addrspace(1) %buffer) {
;
; GFX9-LABEL: p32Offset64:
; GFX9: ; %bb.0: ; %entry
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s11
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s11
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_getpc_b64 s[0:1]
; GFX9-NEXT: s_add_u32 s0, s0, _Z13get_global_idj at gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj at gotpcrel32@hi+12
; GFX9-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24
; GFX9-NEXT: v_mov_b32_e32 v31, v0
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
@@ -1664,12 +1664,12 @@ define amdgpu_kernel void @p32Offset64(ptr addrspace(1) %buffer) {
;
; GFX10-LABEL: p32Offset64:
; GFX10: ; %bb.0: ; %entry
-; GFX10-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX10-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX10-NEXT: s_mov_b32 s38, -1
-; GFX10-NEXT: s_mov_b32 s39, 0x31c16000
-; GFX10-NEXT: s_add_u32 s36, s36, s11
-; GFX10-NEXT: s_addc_u32 s37, s37, 0
+; GFX10-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX10-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX10-NEXT: s_mov_b32 s50, -1
+; GFX10-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX10-NEXT: s_add_u32 s48, s48, s11
+; GFX10-NEXT: s_addc_u32 s49, s49, 0
; GFX10-NEXT: s_getpc_b64 s[0:1]
; GFX10-NEXT: s_add_u32 s0, s0, _Z13get_global_idj at gotpcrel32@lo+4
; GFX10-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj at gotpcrel32@hi+12
@@ -1677,8 +1677,8 @@ define amdgpu_kernel void @p32Offset64(ptr addrspace(1) %buffer) {
; GFX10-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
; GFX10-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24
; GFX10-NEXT: v_mov_b32_e32 v0, 0
-; GFX10-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX10-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX10-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX10-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX10-NEXT: s_mov_b32 s32, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
@@ -1776,31 +1776,31 @@ entry:
define amdgpu_kernel void @DiffBase(ptr addrspace(1) %buffer1,
; GFX8-LABEL: DiffBase:
; GFX8: ; %bb.0: ; %entry
-; GFX8-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0
-; GFX8-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1
-; GFX8-NEXT: s_mov_b32 s42, -1
-; GFX8-NEXT: s_mov_b32 s43, 0xe80000
-; GFX8-NEXT: s_add_u32 s40, s40, s11
-; GFX8-NEXT: s_addc_u32 s41, s41, 0
+; GFX8-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX8-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX8-NEXT: s_mov_b32 s66, -1
+; GFX8-NEXT: s_mov_b32 s67, 0xe80000
+; GFX8-NEXT: s_add_u32 s64, s64, s11
+; GFX8-NEXT: s_addc_u32 s65, s65, 0
; GFX8-NEXT: s_getpc_b64 s[0:1]
; GFX8-NEXT: s_add_u32 s0, s0, _Z13get_global_idj at gotpcrel32@lo+4
; GFX8-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj at gotpcrel32@hi+12
-; GFX8-NEXT: s_load_dwordx4 s[36:39], s[4:5], 0x24
+; GFX8-NEXT: s_load_dwordx4 s[48:51], s[4:5], 0x24
; GFX8-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
-; GFX8-NEXT: s_mov_b64 s[0:1], s[40:41]
+; GFX8-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX8-NEXT: v_mov_b32_e32 v31, v0
-; GFX8-NEXT: s_mov_b64 s[2:3], s[42:43]
+; GFX8-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX8-NEXT: v_mov_b32_e32 v0, 0
; GFX8-NEXT: s_mov_b32 s32, 0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX8-NEXT: v_lshlrev_b32_e32 v0, 7, v0
; GFX8-NEXT: v_and_b32_e32 v2, 0xffff8000, v0
-; GFX8-NEXT: v_mov_b32_e32 v1, s37
-; GFX8-NEXT: v_add_u32_e32 v0, vcc, s36, v2
+; GFX8-NEXT: v_mov_b32_e32 v1, s49
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, s48, v2
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; GFX8-NEXT: v_mov_b32_e32 v3, s39
-; GFX8-NEXT: v_add_u32_e32 v12, vcc, s38, v2
+; GFX8-NEXT: v_mov_b32_e32 v3, s51
+; GFX8-NEXT: v_add_u32_e32 v12, vcc, s50, v2
; GFX8-NEXT: v_addc_u32_e32 v13, vcc, 0, v3, vcc
; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0x1000, v0
; GFX8-NEXT: v_addc_u32_e32 v3, vcc, 0, v1, vcc
@@ -1839,31 +1839,31 @@ define amdgpu_kernel void @DiffBase(ptr addrspace(1) %buffer1,
;
; GFX9-LABEL: DiffBase:
; GFX9: ; %bb.0: ; %entry
-; GFX9-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s42, -1
-; GFX9-NEXT: s_mov_b32 s43, 0xe00000
-; GFX9-NEXT: s_add_u32 s40, s40, s11
-; GFX9-NEXT: s_addc_u32 s41, s41, 0
+; GFX9-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s66, -1
+; GFX9-NEXT: s_mov_b32 s67, 0xe00000
+; GFX9-NEXT: s_add_u32 s64, s64, s11
+; GFX9-NEXT: s_addc_u32 s65, s65, 0
; GFX9-NEXT: s_getpc_b64 s[0:1]
; GFX9-NEXT: s_add_u32 s0, s0, _Z13get_global_idj at gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj at gotpcrel32@hi+12
-; GFX9-NEXT: s_load_dwordx4 s[36:39], s[4:5], 0x24
+; GFX9-NEXT: s_load_dwordx4 s[48:51], s[4:5], 0x24
; GFX9-NEXT: v_mov_b32_e32 v31, v0
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[40:41]
-; GFX9-NEXT: s_mov_b64 s[2:3], s[42:43]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 7, v0
; GFX9-NEXT: v_and_b32_e32 v16, 0xffff8000, v0
-; GFX9-NEXT: v_mov_b32_e32 v0, s37
-; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, s36, v16
+; GFX9-NEXT: v_mov_b32_e32 v0, s49
+; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, s48, v16
; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v0, vcc
-; GFX9-NEXT: v_mov_b32_e32 v0, s39
-; GFX9-NEXT: v_add_co_u32_e32 v10, vcc, s38, v16
+; GFX9-NEXT: v_mov_b32_e32 v0, s51
+; GFX9-NEXT: v_add_co_u32_e32 v10, vcc, s50, v16
; GFX9-NEXT: v_addc_co_u32_e32 v11, vcc, 0, v0, vcc
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v2
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
@@ -1893,35 +1893,35 @@ define amdgpu_kernel void @DiffBase(ptr addrspace(1) %buffer1,
; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v15, v3, vcc
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
-; GFX9-NEXT: global_store_dwordx2 v16, v[0:1], s[36:37]
+; GFX9-NEXT: global_store_dwordx2 v16, v[0:1], s[48:49]
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: DiffBase:
; GFX10: ; %bb.0: ; %entry
-; GFX10-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0
-; GFX10-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1
-; GFX10-NEXT: s_mov_b32 s42, -1
-; GFX10-NEXT: s_mov_b32 s43, 0x31c16000
-; GFX10-NEXT: s_add_u32 s40, s40, s11
-; GFX10-NEXT: s_addc_u32 s41, s41, 0
+; GFX10-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX10-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX10-NEXT: s_mov_b32 s66, -1
+; GFX10-NEXT: s_mov_b32 s67, 0x31c16000
+; GFX10-NEXT: s_add_u32 s64, s64, s11
+; GFX10-NEXT: s_addc_u32 s65, s65, 0
; GFX10-NEXT: s_getpc_b64 s[0:1]
; GFX10-NEXT: s_add_u32 s0, s0, _Z13get_global_idj at gotpcrel32@lo+4
; GFX10-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj at gotpcrel32@hi+12
; GFX10-NEXT: v_mov_b32_e32 v31, v0
; GFX10-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX10-NEXT: s_load_dwordx4 s[36:39], s[4:5], 0x24
+; GFX10-NEXT: s_load_dwordx4 s[48:51], s[4:5], 0x24
; GFX10-NEXT: v_mov_b32_e32 v0, 0
-; GFX10-NEXT: s_mov_b64 s[0:1], s[40:41]
-; GFX10-NEXT: s_mov_b64 s[2:3], s[42:43]
+; GFX10-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX10-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX10-NEXT: s_mov_b32 s32, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 7, v0
; GFX10-NEXT: v_and_b32_e32 v16, 0xffff8000, v0
-; GFX10-NEXT: v_add_co_u32 v8, s0, s36, v16
-; GFX10-NEXT: v_add_co_ci_u32_e64 v9, s0, s37, 0, s0
-; GFX10-NEXT: v_add_co_u32 v12, s0, s38, v16
-; GFX10-NEXT: v_add_co_ci_u32_e64 v13, s0, s39, 0, s0
+; GFX10-NEXT: v_add_co_u32 v8, s0, s48, v16
+; GFX10-NEXT: v_add_co_ci_u32_e64 v9, s0, s49, 0, s0
+; GFX10-NEXT: v_add_co_u32 v12, s0, s50, v16
+; GFX10-NEXT: v_add_co_ci_u32_e64 v13, s0, s51, 0, s0
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v8, 0x1800
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v9, vcc_lo
; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v12, 0x3000
@@ -1952,7 +1952,7 @@ define amdgpu_kernel void @DiffBase(ptr addrspace(1) %buffer1,
; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v15, v3, vcc_lo
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX10-NEXT: global_store_dwordx2 v16, v[0:1], s[36:37]
+; GFX10-NEXT: global_store_dwordx2 v16, v[0:1], s[48:49]
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: DiffBase:
@@ -1962,21 +1962,21 @@ define amdgpu_kernel void @DiffBase(ptr addrspace(1) %buffer1,
; GFX11-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj at gotpcrel32@hi+12
; GFX11-NEXT: v_dual_mov_b32 v31, v0 :: v_dual_mov_b32 v0, 0
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX11-NEXT: s_load_b128 s[36:39], s[4:5], 0x24
+; GFX11-NEXT: s_load_b128 s[48:51], s[4:5], 0x24
; GFX11-NEXT: s_mov_b32 s32, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 7, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_and_b32_e32 v12, 0xffff8000, v0
-; GFX11-NEXT: v_add_co_u32 v2, s0, s36, v12
+; GFX11-NEXT: v_add_co_u32 v2, s0, s48, v12
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_add_co_ci_u32_e64 v3, null, s37, 0, s0
-; GFX11-NEXT: v_add_co_u32 v8, s0, s38, v12
+; GFX11-NEXT: v_add_co_ci_u32_e64 v3, null, s49, 0, s0
+; GFX11-NEXT: v_add_co_u32 v8, s0, s50, v12
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v2
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_4)
; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v3, vcc_lo
-; GFX11-NEXT: v_add_co_ci_u32_e64 v9, null, s39, 0, s0
+; GFX11-NEXT: v_add_co_ci_u32_e64 v9, null, s51, 0, s0
; GFX11-NEXT: v_add_co_u32 v2, vcc_lo, v2, 0x2000
; GFX11-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo
; GFX11-NEXT: v_add_co_u32 v4, vcc_lo, 0x2000, v8
@@ -2005,7 +2005,7 @@ define amdgpu_kernel void @DiffBase(ptr addrspace(1) %buffer1,
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX11-NEXT: global_store_b64 v12, v[0:1], s[36:37]
+; GFX11-NEXT: global_store_b64 v12, v[0:1], s[48:49]
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %buffer2) {
entry:
@@ -2046,20 +2046,20 @@ entry:
define amdgpu_kernel void @ReverseOrder(ptr addrspace(1) %buffer) {
; GFX8-LABEL: ReverseOrder:
; GFX8: ; %bb.0: ; %entry
-; GFX8-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX8-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX8-NEXT: s_mov_b32 s38, -1
-; GFX8-NEXT: s_mov_b32 s39, 0xe80000
-; GFX8-NEXT: s_add_u32 s36, s36, s11
-; GFX8-NEXT: s_addc_u32 s37, s37, 0
+; GFX8-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX8-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX8-NEXT: s_mov_b32 s50, -1
+; GFX8-NEXT: s_mov_b32 s51, 0xe80000
+; GFX8-NEXT: s_add_u32 s48, s48, s11
+; GFX8-NEXT: s_addc_u32 s49, s49, 0
; GFX8-NEXT: s_getpc_b64 s[0:1]
; GFX8-NEXT: s_add_u32 s0, s0, _Z13get_global_idj at gotpcrel32@lo+4
; GFX8-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj at gotpcrel32@hi+12
; GFX8-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24
; GFX8-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
-; GFX8-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX8-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX8-NEXT: v_mov_b32_e32 v31, v0
-; GFX8-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX8-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX8-NEXT: v_mov_b32_e32 v0, 0
; GFX8-NEXT: s_mov_b32 s32, 0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
@@ -2127,20 +2127,20 @@ define amdgpu_kernel void @ReverseOrder(ptr addrspace(1) %buffer) {
;
; GFX9-LABEL: ReverseOrder:
; GFX9: ; %bb.0: ; %entry
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s11
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s11
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_getpc_b64 s[0:1]
; GFX9-NEXT: s_add_u32 s0, s0, _Z13get_global_idj at gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj at gotpcrel32@hi+12
; GFX9-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24
; GFX9-NEXT: v_mov_b32_e32 v31, v0
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
@@ -2196,12 +2196,12 @@ define amdgpu_kernel void @ReverseOrder(ptr addrspace(1) %buffer) {
;
; GFX10-LABEL: ReverseOrder:
; GFX10: ; %bb.0: ; %entry
-; GFX10-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX10-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX10-NEXT: s_mov_b32 s38, -1
-; GFX10-NEXT: s_mov_b32 s39, 0x31c16000
-; GFX10-NEXT: s_add_u32 s36, s36, s11
-; GFX10-NEXT: s_addc_u32 s37, s37, 0
+; GFX10-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX10-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX10-NEXT: s_mov_b32 s50, -1
+; GFX10-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX10-NEXT: s_add_u32 s48, s48, s11
+; GFX10-NEXT: s_addc_u32 s49, s49, 0
; GFX10-NEXT: s_getpc_b64 s[0:1]
; GFX10-NEXT: s_add_u32 s0, s0, _Z13get_global_idj at gotpcrel32@lo+4
; GFX10-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj at gotpcrel32@hi+12
@@ -2209,8 +2209,8 @@ define amdgpu_kernel void @ReverseOrder(ptr addrspace(1) %buffer) {
; GFX10-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
; GFX10-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24
; GFX10-NEXT: v_mov_b32_e32 v0, 0
-; GFX10-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX10-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX10-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX10-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX10-NEXT: s_mov_b32 s32, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
@@ -2382,20 +2382,20 @@ entry:
define hidden amdgpu_kernel void @negativeoffset(ptr addrspace(1) nocapture %buffer) {
; GFX8-LABEL: negativeoffset:
; GFX8: ; %bb.0: ; %entry
-; GFX8-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX8-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX8-NEXT: s_mov_b32 s38, -1
-; GFX8-NEXT: s_mov_b32 s39, 0xe80000
-; GFX8-NEXT: s_add_u32 s36, s36, s11
-; GFX8-NEXT: s_addc_u32 s37, s37, 0
+; GFX8-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX8-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX8-NEXT: s_mov_b32 s50, -1
+; GFX8-NEXT: s_mov_b32 s51, 0xe80000
+; GFX8-NEXT: s_add_u32 s48, s48, s11
+; GFX8-NEXT: s_addc_u32 s49, s49, 0
; GFX8-NEXT: s_getpc_b64 s[0:1]
; GFX8-NEXT: s_add_u32 s0, s0, _Z13get_global_idj at gotpcrel32@lo+4
; GFX8-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj at gotpcrel32@hi+12
; GFX8-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24
; GFX8-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
-; GFX8-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX8-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX8-NEXT: v_mov_b32_e32 v31, v0
-; GFX8-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX8-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX8-NEXT: v_mov_b32_e32 v0, 0
; GFX8-NEXT: s_mov_b32 s32, 0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
@@ -2423,20 +2423,20 @@ define hidden amdgpu_kernel void @negativeoffset(ptr addrspace(1) nocapture %buf
;
; GFX9-LABEL: negativeoffset:
; GFX9: ; %bb.0: ; %entry
-; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s38, -1
-; GFX9-NEXT: s_mov_b32 s39, 0xe00000
-; GFX9-NEXT: s_add_u32 s36, s36, s11
-; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s11
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_getpc_b64 s[0:1]
; GFX9-NEXT: s_add_u32 s0, s0, _Z13get_global_idj at gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj at gotpcrel32@hi+12
; GFX9-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24
; GFX9-NEXT: v_mov_b32_e32 v31, v0
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
@@ -2463,12 +2463,12 @@ define hidden amdgpu_kernel void @negativeoffset(ptr addrspace(1) nocapture %buf
;
; GFX10-LABEL: negativeoffset:
; GFX10: ; %bb.0: ; %entry
-; GFX10-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GFX10-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GFX10-NEXT: s_mov_b32 s38, -1
-; GFX10-NEXT: s_mov_b32 s39, 0x31c16000
-; GFX10-NEXT: s_add_u32 s36, s36, s11
-; GFX10-NEXT: s_addc_u32 s37, s37, 0
+; GFX10-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX10-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX10-NEXT: s_mov_b32 s50, -1
+; GFX10-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX10-NEXT: s_add_u32 s48, s48, s11
+; GFX10-NEXT: s_addc_u32 s49, s49, 0
; GFX10-NEXT: s_getpc_b64 s[0:1]
; GFX10-NEXT: s_add_u32 s0, s0, _Z13get_global_idj at gotpcrel32@lo+4
; GFX10-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj at gotpcrel32@hi+12
@@ -2476,8 +2476,8 @@ define hidden amdgpu_kernel void @negativeoffset(ptr addrspace(1) nocapture %buf
; GFX10-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
; GFX10-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24
; GFX10-NEXT: v_mov_b32_e32 v0, 0
-; GFX10-NEXT: s_mov_b64 s[0:1], s[36:37]
-; GFX10-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GFX10-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX10-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX10-NEXT: s_mov_b32 s32, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
diff --git a/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir b/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir
index c6ee557d970cd..814674804df57 100644
--- a/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir
+++ b/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir
@@ -41,63 +41,91 @@ body: |
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $sgpr34_sgpr35 = V_CMP_GT_I32_e64 1, undef %18:vgpr_32, implicit $exec
- ; CHECK-NEXT: renamable $sgpr36_sgpr37 = V_CMP_EQ_U32_e64 0, undef %18:vgpr_32, implicit $exec
- ; CHECK-NEXT: renamable $sgpr38_sgpr39 = V_CMP_NE_U32_e64 0, undef %18:vgpr_32, implicit $exec
- ; CHECK-NEXT: renamable $sgpr40_sgpr41 = V_CMP_GT_I32_e64 0, undef %18:vgpr_32, implicit $exec
- ; CHECK-NEXT: renamable $sgpr60 = S_MOV_B32 0
- ; CHECK-NEXT: renamable $sgpr42_sgpr43 = V_CMP_EQ_U32_e64 undef $sgpr4, undef %18:vgpr_32, implicit $exec
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_1024_align2 = COPY renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit $exec
- ; CHECK-NEXT: renamable $sgpr44_sgpr45 = V_CMP_NE_U32_e64 1, undef %18:vgpr_32, implicit $exec
- ; CHECK-NEXT: renamable $sgpr61 = S_MOV_B32 1083786240
+ ; CHECK-NEXT: renamable $sgpr18_sgpr19 = V_CMP_GT_I32_e64 1, undef %18:vgpr_32, implicit $exec
+ ; CHECK-NEXT: renamable $sgpr100_sgpr101 = V_CMP_EQ_U32_e64 0, undef %18:vgpr_32, implicit $exec
+ ; CHECK-NEXT: renamable $sgpr12_sgpr13 = V_CMP_NE_U32_e64 0, undef %18:vgpr_32, implicit $exec
+ ; CHECK-NEXT: SI_SPILL_S64_SAVE killed renamable $sgpr12_sgpr13, %stack.3, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.3, align 4, addrspace 5)
+ ; CHECK-NEXT: renamable $sgpr12_sgpr13 = V_CMP_GT_I32_e64 0, undef %18:vgpr_32, implicit $exec
+ ; CHECK-NEXT: SI_SPILL_S64_SAVE killed renamable $sgpr12_sgpr13, %stack.4, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.4, align 4, addrspace 5)
+ ; CHECK-NEXT: renamable $sgpr56 = S_MOV_B32 0
+ ; CHECK-NEXT: renamable $sgpr12_sgpr13 = V_CMP_EQ_U32_e64 undef $sgpr4, undef %18:vgpr_32, implicit $exec
+ ; CHECK-NEXT: SI_SPILL_S64_SAVE killed renamable $sgpr12_sgpr13, %stack.2, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.2, align 4, addrspace 5)
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_1024_align2 = COPY renamable $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71, implicit $exec
+ ; CHECK-NEXT: renamable $sgpr34_sgpr35 = V_CMP_NE_U32_e64 1, undef %18:vgpr_32, implicit $exec
+ ; CHECK-NEXT: renamable $sgpr57 = S_MOV_B32 1083786240
+ ; CHECK-NEXT: SI_SPILL_S1024_SAVE renamable $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71, %stack.1, implicit $exec, implicit $sgpr32 :: (store (s1024) into %stack.1, align 4, addrspace 5)
; CHECK-NEXT: S_BRANCH %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.17(0x40000000)
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x0000000F00000000
+ ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr100_sgpr101
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $vcc = S_AND_B64 $exec, renamable $sgpr44_sgpr45, implicit-def dead $scc
+ ; CHECK-NEXT: $vcc = S_AND_B64 $exec, renamable $sgpr34_sgpr35, implicit-def dead $scc
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_1024_align2 = COPY [[COPY]]
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
; CHECK-NEXT: S_BRANCH %bb.17
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.11(0x40000000), %bb.5(0x40000000)
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x0000000F00000000
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $sgpr64 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr65 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr66 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr67 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr68 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr69 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr70 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr71 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr72 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr73 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr74 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr75 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr76 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr77 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr78 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr79 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr80 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr81 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr82 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr83 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr84 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr85 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr86 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr87 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr88 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr89 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr90 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr91 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr92 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr93 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr94 = COPY renamable $sgpr60
- ; CHECK-NEXT: renamable $sgpr95 = COPY renamable $sgpr60
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_1024_align2 = COPY killed renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, implicit $exec
+ ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr100_sgpr101
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
+ ; CHECK-NEXT: renamable $sgpr36 = COPY renamable $sgpr72
+ ; CHECK-NEXT: renamable $sgpr37 = COPY renamable $sgpr72
+ ; CHECK-NEXT: renamable $sgpr38 = COPY renamable $sgpr72
+ ; CHECK-NEXT: renamable $sgpr39 = COPY renamable $sgpr72
+ ; CHECK-NEXT: renamable $sgpr40 = COPY renamable $sgpr72
+ ; CHECK-NEXT: renamable $sgpr41 = COPY renamable $sgpr72
+ ; CHECK-NEXT: renamable $sgpr42 = COPY renamable $sgpr72
+ ; CHECK-NEXT: renamable $sgpr43 = COPY renamable $sgpr72
+ ; CHECK-NEXT: renamable $sgpr44 = COPY renamable $sgpr72
+ ; CHECK-NEXT: renamable $sgpr45 = COPY renamable $sgpr72
+ ; CHECK-NEXT: renamable $sgpr46 = COPY renamable $sgpr72
+ ; CHECK-NEXT: renamable $sgpr47 = COPY killed renamable $sgpr72
+ ; CHECK-NEXT: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79 = COPY killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47
+ ; CHECK-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
+ ; CHECK-NEXT: renamable $sgpr80 = COPY killed renamable $sgpr52
+ ; CHECK-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47 = COPY killed renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79
+ ; CHECK-NEXT: renamable $sgpr48 = COPY renamable $sgpr80
+ ; CHECK-NEXT: renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
+ ; CHECK-NEXT: renamable $sgpr49 = COPY killed renamable $sgpr68
+ ; CHECK-NEXT: renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
+ ; CHECK-NEXT: renamable $sgpr50 = COPY killed renamable $sgpr68
+ ; CHECK-NEXT: renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
+ ; CHECK-NEXT: renamable $sgpr51 = COPY killed renamable $sgpr68
+ ; CHECK-NEXT: renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
+ ; CHECK-NEXT: renamable $sgpr52 = COPY killed renamable $sgpr68
+ ; CHECK-NEXT: renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
+ ; CHECK-NEXT: renamable $sgpr53 = COPY killed renamable $sgpr76
+ ; CHECK-NEXT: renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
+ ; CHECK-NEXT: renamable $sgpr54 = COPY killed renamable $sgpr72
+ ; CHECK-NEXT: renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
+ ; CHECK-NEXT: renamable $sgpr55 = COPY killed renamable $sgpr72
+ ; CHECK-NEXT: renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
+ ; CHECK-NEXT: renamable $sgpr56 = COPY killed renamable $sgpr72
+ ; CHECK-NEXT: renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
+ ; CHECK-NEXT: renamable $sgpr57 = COPY killed renamable $sgpr76
+ ; CHECK-NEXT: renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
+ ; CHECK-NEXT: renamable $sgpr58 = COPY killed renamable $sgpr76
+ ; CHECK-NEXT: renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
+ ; CHECK-NEXT: renamable $sgpr59 = COPY killed renamable $sgpr76
+ ; CHECK-NEXT: renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
+ ; CHECK-NEXT: renamable $sgpr60 = COPY killed renamable $sgpr76
+ ; CHECK-NEXT: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
+ ; CHECK-NEXT: renamable $sgpr61 = COPY killed renamable $sgpr80
+ ; CHECK-NEXT: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
+ ; CHECK-NEXT: renamable $sgpr62 = COPY killed renamable $sgpr80
+ ; CHECK-NEXT: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
+ ; CHECK-NEXT: renamable $sgpr63 = COPY killed renamable $sgpr80
+ ; CHECK-NEXT: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
+ ; CHECK-NEXT: renamable $sgpr64 = COPY killed renamable $sgpr80
+ ; CHECK-NEXT: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
+ ; CHECK-NEXT: renamable $sgpr65 = COPY killed renamable $sgpr84
+ ; CHECK-NEXT: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
+ ; CHECK-NEXT: renamable $sgpr66 = COPY killed renamable $sgpr84
+ ; CHECK-NEXT: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
+ ; CHECK-NEXT: renamable $sgpr67 = COPY killed renamable $sgpr84
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_1024_align2 = COPY killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67, implicit $exec
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.11, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.5
; CHECK-NEXT: {{ $}}
@@ -126,111 +154,117 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
; CHECK-NEXT: successors: %bb.12(0x40000000), %bb.6(0x40000000)
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x0000000F00000000
+ ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr100_sgpr101
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $sgpr12_sgpr13 = S_AND_B64 renamable $sgpr38_sgpr39, undef renamable $sgpr46_sgpr47, implicit-def dead $scc
- ; CHECK-NEXT: renamable $sgpr46_sgpr47 = V_CMP_GT_I32_e64 0, undef %18:vgpr_32, implicit $exec
+ ; CHECK-NEXT: renamable $sgpr12_sgpr13 = SI_SPILL_S64_RESTORE %stack.3, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.3, align 4, addrspace 5)
+ ; CHECK-NEXT: renamable $sgpr12_sgpr13 = S_AND_B64 killed renamable $sgpr12_sgpr13, undef renamable $sgpr62_sgpr63, implicit-def dead $scc
+ ; CHECK-NEXT: renamable $sgpr62_sgpr63 = V_CMP_GT_I32_e64 0, undef %18:vgpr_32, implicit $exec
; CHECK-NEXT: $exec = S_MOV_B64_term killed renamable $sgpr12_sgpr13
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.12, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.6:
; CHECK-NEXT: successors: %bb.7(0x80000000)
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x0000000F00000000
+ ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr62_sgpr63, $sgpr100_sgpr101
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: dead [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr40_sgpr41, implicit $exec
+ ; CHECK-NEXT: renamable $sgpr12_sgpr13 = SI_SPILL_S64_RESTORE %stack.4, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.4, align 4, addrspace 5)
+ ; CHECK-NEXT: dead [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed $sgpr12_sgpr13, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.7:
; CHECK-NEXT: successors: %bb.8(0x80000000)
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x0000000F00000000
+ ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr62_sgpr63, $sgpr100_sgpr101
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $sgpr48_sgpr49 = nofpexcept V_CMP_NLT_F64_e64 0, undef $sgpr4_sgpr5, 0, undef %29:vreg_64_align2, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: renamable $sgpr50_sgpr51 = nofpexcept V_CMP_NLT_F64_e64 0, 4607182418800017408, 0, undef %29:vreg_64_align2, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: renamable $sgpr64_sgpr65 = nofpexcept V_CMP_NLT_F64_e64 0, undef $sgpr4_sgpr5, 0, undef %29:vreg_64_align2, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: renamable $sgpr66_sgpr67 = nofpexcept V_CMP_NLT_F64_e64 0, 4607182418800017408, 0, undef %29:vreg_64_align2, 0, implicit $mode, implicit $exec
; CHECK-NEXT: dead [[V_INDIRECT_REG_READ_GPR_IDX_B32_V32_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V32 [[COPY1]], undef $sgpr14, 11, implicit-def $m0, implicit $m0, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.8:
; CHECK-NEXT: successors: %bb.10(0x40000000), %bb.9(0x40000000)
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x0000000F00000000
+ ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr100_sgpr101
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $vcc = S_AND_B64 $exec, renamable $sgpr48_sgpr49, implicit-def dead $scc
+ ; CHECK-NEXT: $vcc = S_AND_B64 $exec, renamable $sgpr64_sgpr65, implicit-def dead $scc
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.10, implicit $vcc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.9:
; CHECK-NEXT: successors: %bb.10(0x40000000), %bb.17(0x40000000)
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x0000000F00000000
+ ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr100_sgpr101
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_64_align2 = COPY renamable $sgpr60_sgpr61, implicit $exec
+ ; CHECK-NEXT: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_64_align2 = COPY killed renamable $sgpr84_sgpr85, implicit $exec
; CHECK-NEXT: GLOBAL_STORE_DWORDX2_SADDR undef %18:vgpr_32, [[COPY2]], undef renamable $sgpr4_sgpr5, 0, 0, implicit $exec :: (store (s64), addrspace 1)
- ; CHECK-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr34_sgpr35, implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr18_sgpr19, implicit $exec
; CHECK-NEXT: dead renamable $sgpr12_sgpr13 = V_CMP_NE_U32_e64 1, [[V_CNDMASK_B32_e64_1]], implicit $exec
- ; CHECK-NEXT: renamable $sgpr58 = S_ADD_U32 renamable $sgpr8, 32, implicit-def dead $scc
+ ; CHECK-NEXT: renamable $sgpr82 = S_ADD_U32 renamable $sgpr8, 32, implicit-def dead $scc
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
- ; CHECK-NEXT: renamable $sgpr52_sgpr53 = COPY killed renamable $sgpr4_sgpr5
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY renamable $sgpr52_sgpr53
- ; CHECK-NEXT: renamable $sgpr54_sgpr55 = COPY killed renamable $sgpr6_sgpr7
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY renamable $sgpr54_sgpr55
- ; CHECK-NEXT: renamable $sgpr56_sgpr57 = COPY killed renamable $sgpr10_sgpr11
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY renamable $sgpr56_sgpr57
+ ; CHECK-NEXT: renamable $sgpr68_sgpr69 = COPY killed renamable $sgpr4_sgpr5
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY renamable $sgpr68_sgpr69
+ ; CHECK-NEXT: renamable $sgpr78_sgpr79 = COPY killed renamable $sgpr6_sgpr7
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY renamable $sgpr78_sgpr79
+ ; CHECK-NEXT: renamable $sgpr80_sgpr81 = COPY killed renamable $sgpr10_sgpr11
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY renamable $sgpr80_sgpr81
; CHECK-NEXT: $sgpr12 = COPY renamable $sgpr14
; CHECK-NEXT: $sgpr13 = COPY renamable $sgpr15
- ; CHECK-NEXT: renamable $sgpr62 = COPY killed renamable $sgpr8
+ ; CHECK-NEXT: renamable $sgpr84 = COPY killed renamable $sgpr8
; CHECK-NEXT: renamable $sgpr33 = COPY killed renamable $sgpr16
- ; CHECK-NEXT: renamable $sgpr59 = COPY killed renamable $sgpr15
- ; CHECK-NEXT: renamable $sgpr63 = COPY killed renamable $sgpr14
+ ; CHECK-NEXT: renamable $sgpr83 = COPY killed renamable $sgpr15
+ ; CHECK-NEXT: renamable $sgpr85 = COPY killed renamable $sgpr14
+ ; CHECK-NEXT: renamable $sgpr36_sgpr37 = COPY killed renamable $sgpr18_sgpr19
; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL undef renamable $sgpr12_sgpr13, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
- ; CHECK-NEXT: $sgpr8_sgpr9 = COPY renamable $sgpr58_sgpr59
+ ; CHECK-NEXT: $sgpr8_sgpr9 = COPY renamable $sgpr82_sgpr83
; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL undef renamable $sgpr12_sgpr13, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr8_sgpr9
- ; CHECK-NEXT: renamable $sgpr14 = COPY killed renamable $sgpr63
- ; CHECK-NEXT: renamable $sgpr15 = COPY killed renamable $sgpr59
+ ; CHECK-NEXT: renamable $sgpr18_sgpr19 = COPY killed renamable $sgpr36_sgpr37
+ ; CHECK-NEXT: renamable $sgpr14 = COPY killed renamable $sgpr85
+ ; CHECK-NEXT: renamable $sgpr15 = COPY killed renamable $sgpr83
; CHECK-NEXT: renamable $sgpr16 = COPY killed renamable $sgpr33
- ; CHECK-NEXT: renamable $sgpr4_sgpr5 = COPY killed renamable $sgpr52_sgpr53
- ; CHECK-NEXT: renamable $sgpr6_sgpr7 = COPY killed renamable $sgpr54_sgpr55
- ; CHECK-NEXT: renamable $sgpr8 = COPY killed renamable $sgpr62
- ; CHECK-NEXT: renamable $sgpr10_sgpr11 = COPY killed renamable $sgpr56_sgpr57
+ ; CHECK-NEXT: renamable $sgpr4_sgpr5 = COPY killed renamable $sgpr68_sgpr69
+ ; CHECK-NEXT: renamable $sgpr6_sgpr7 = COPY killed renamable $sgpr78_sgpr79
+ ; CHECK-NEXT: renamable $sgpr8 = COPY killed renamable $sgpr84
+ ; CHECK-NEXT: renamable $sgpr10_sgpr11 = COPY killed renamable $sgpr80_sgpr81
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
- ; CHECK-NEXT: $exec = S_MOV_B64_term renamable $sgpr50_sgpr51
+ ; CHECK-NEXT: $exec = S_MOV_B64_term renamable $sgpr66_sgpr67
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.10, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.17
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.10:
; CHECK-NEXT: successors: %bb.8(0x40000000), %bb.12(0x40000000)
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x0000000F00000000
+ ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr100_sgpr101
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.8, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.12
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.11:
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.17(0x40000000)
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x0000000F00000000
+ ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr100_sgpr101
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.17
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.12:
; CHECK-NEXT: successors: %bb.11(0x40000000), %bb.13(0x40000000)
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x0000000F00000000
+ ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr62_sgpr63, $sgpr100_sgpr101
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $exec = S_MOV_B64_term killed renamable $sgpr46_sgpr47
+ ; CHECK-NEXT: $exec = S_MOV_B64_term killed renamable $sgpr62_sgpr63
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.11, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.13:
; CHECK-NEXT: successors: %bb.15(0x40000000), %bb.14(0x40000000)
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x0000000F00000000
+ ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr100_sgpr101
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $vcc = S_AND_B64 $exec, renamable $sgpr42_sgpr43, implicit-def dead $scc
+ ; CHECK-NEXT: renamable $sgpr12_sgpr13 = SI_SPILL_S64_RESTORE %stack.2, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.2, align 4, addrspace 5)
+ ; CHECK-NEXT: $vcc = S_AND_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def dead $scc
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.15, implicit $vcc
; CHECK-NEXT: S_BRANCH %bb.14
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.14:
; CHECK-NEXT: successors: %bb.15(0x80000000)
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x0000000F00000000
+ ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr100_sgpr101
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.15:
; CHECK-NEXT: successors: %bb.11(0x40000000), %bb.16(0x40000000)
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x0000000F00000000
+ ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr100_sgpr101
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $vcc = S_AND_B64 $exec, renamable $sgpr36_sgpr37, implicit-def dead $scc
+ ; CHECK-NEXT: $vcc = S_AND_B64 $exec, renamable $sgpr100_sgpr101, implicit-def dead $scc
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.11, implicit $vcc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.16:
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg.ll b/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg.ll
index 570ea4b7132aa..0d25bc97ff775 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg.ll
@@ -246,10 +246,10 @@ bb3:
; CHECK-LABEL: {{^}}spill_func:
; GCN: NumSgprs: 104
; GCN-GCNTRACKERS: NumSgprs: 104
-; GCN: NumVgprs: 3
-; GCN-GCNTRACKERS: NumVgprs: 4
-; GCN: ScratchSize: 12
-; GCN-GCNTRACKERS: ScratchSize: 16
+; GCN: NumVgprs: 2
+; GCN-GCNTRACKERS: NumVgprs: 3
+; GCN: ScratchSize: 8
+; GCN-GCNTRACKERS: ScratchSize: 12
define void @spill_func(ptr addrspace(1) %arg) #0 {
entry:
diff --git a/llvm/test/CodeGen/AMDGPU/select.f16.ll b/llvm/test/CodeGen/AMDGPU/select.f16.ll
index 26a4a6743cffa..926ebba0ced21 100644
--- a/llvm/test/CodeGen/AMDGPU/select.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/select.f16.ll
@@ -1903,19 +1903,14 @@ define <16 x half> @v_vselect_v16f16(<16 x half> %a, <16 x half> %b, <16 x i32>
; VI-LABEL: v_vselect_v16f16:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; VI-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
-; VI-NEXT: s_mov_b64 exec, s[4:5]
-; VI-NEXT: v_writelane_b32 v31, s30, 0
-; VI-NEXT: v_writelane_b32 v31, s31, 1
; VI-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v16
; VI-NEXT: v_cmp_eq_u32_e64 s[18:19], 0, v17
-; VI-NEXT: v_cmp_eq_u32_e64 s[30:31], 0, v29
+; VI-NEXT: v_cmp_eq_u32_e64 s[38:39], 0, v29
; VI-NEXT: v_lshrrev_b32_e32 v16, 16, v6
; VI-NEXT: v_lshrrev_b32_e32 v17, 16, v14
; VI-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v18
; VI-NEXT: v_cmp_eq_u32_e64 s[28:29], 0, v27
-; VI-NEXT: v_cndmask_b32_e64 v16, v17, v16, s[30:31]
+; VI-NEXT: v_cndmask_b32_e64 v16, v17, v16, s[38:39]
; VI-NEXT: v_lshrrev_b32_e32 v17, 16, v5
; VI-NEXT: v_lshrrev_b32_e32 v18, 16, v13
; VI-NEXT: v_cmp_eq_u32_e64 s[20:21], 0, v19
@@ -1957,8 +1952,6 @@ define <16 x half> @v_vselect_v16f16(<16 x half> %a, <16 x half> %b, <16 x i32>
; VI-NEXT: v_cmp_eq_u32_e64 s[16:17], 0, v28
; VI-NEXT: v_cndmask_b32_e64 v5, v13, v5, s[14:15]
; VI-NEXT: v_cndmask_b32_e64 v6, v14, v6, s[16:17]
-; VI-NEXT: v_readlane_b32 s31, v31, 1
-; VI-NEXT: v_readlane_b32 s30, v31, 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v8
; VI-NEXT: v_cndmask_b32_e32 v8, v10, v9, vcc
@@ -1976,10 +1969,6 @@ define <16 x half> @v_vselect_v16f16(<16 x half> %a, <16 x half> %b, <16 x i32>
; VI-NEXT: v_lshlrev_b32_e32 v8, 16, v8
; VI-NEXT: v_or_b32_sdwa v6, v6, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; VI-NEXT: v_or_b32_sdwa v7, v7, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
-; VI-NEXT: s_mov_b64 exec, s[4:5]
-; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_vselect_v16f16:
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-update-only-slot-indexes.ll b/llvm/test/CodeGen/AMDGPU/sgpr-spill-update-only-slot-indexes.ll
index 47810346c50b7..634d077e41d37 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-update-only-slot-indexes.ll
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-update-only-slot-indexes.ll
@@ -9,15 +9,15 @@ declare void @foo()
define amdgpu_kernel void @kernel() {
; GCN-LABEL: kernel:
; GCN: ; %bb.0:
-; GCN-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; GCN-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; GCN-NEXT: s_mov_b32 s38, -1
+; GCN-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GCN-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GCN-NEXT: s_mov_b32 s50, -1
; GCN-NEXT: ; implicit-def: $vgpr40 : SGPR spill to VGPR lane
-; GCN-NEXT: s_mov_b32 s39, 0xe00000
+; GCN-NEXT: s_mov_b32 s51, 0xe00000
; GCN-NEXT: v_writelane_b32 v40, s4, 0
-; GCN-NEXT: s_add_u32 s36, s36, s11
+; GCN-NEXT: s_add_u32 s48, s48, s11
; GCN-NEXT: v_writelane_b32 v40, s5, 1
-; GCN-NEXT: s_addc_u32 s37, s37, 0
+; GCN-NEXT: s_addc_u32 s49, s49, 0
; GCN-NEXT: s_mov_b64 s[4:5], s[0:1]
; GCN-NEXT: v_readlane_b32 s0, v40, 0
; GCN-NEXT: s_mov_b32 s13, s9
@@ -34,9 +34,9 @@ define amdgpu_kernel void @kernel() {
; GCN-NEXT: s_mov_b64 s[6:7], s[2:3]
; GCN-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GCN-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GCN-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GCN-NEXT: s_mov_b64 s[0:1], s[48:49]
; GCN-NEXT: v_or3_b32 v31, v0, v1, v2
-; GCN-NEXT: s_mov_b64 s[2:3], s[38:39]
+; GCN-NEXT: s_mov_b64 s[2:3], s[50:51]
; GCN-NEXT: s_mov_b32 s32, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v8i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v8i64.ll
index acc193a9393c1..e2d2a65c41b4f 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v8i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v8i64.ll
@@ -15006,20 +15006,12 @@ define void @s_shuffle_v2i64_v8i64__15_6() {
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_writelane_b32 v0, s36, 0
; GFX900-NEXT: v_writelane_b32 v0, s37, 1
-; GFX900-NEXT: v_writelane_b32 v0, s38, 2
-; GFX900-NEXT: v_writelane_b32 v0, s39, 3
-; GFX900-NEXT: v_writelane_b32 v0, s40, 4
-; GFX900-NEXT: v_writelane_b32 v0, s41, 5
-; GFX900-NEXT: v_writelane_b32 v0, s42, 6
-; GFX900-NEXT: v_writelane_b32 v0, s43, 7
-; GFX900-NEXT: v_writelane_b32 v0, s44, 8
-; GFX900-NEXT: v_writelane_b32 v0, s45, 9
-; GFX900-NEXT: v_writelane_b32 v0, s46, 10
-; GFX900-NEXT: v_writelane_b32 v0, s47, 11
-; GFX900-NEXT: v_writelane_b32 v0, s48, 12
-; GFX900-NEXT: v_writelane_b32 v0, s49, 13
-; GFX900-NEXT: v_writelane_b32 v0, s50, 14
-; GFX900-NEXT: v_writelane_b32 v0, s51, 15
+; GFX900-NEXT: v_writelane_b32 v0, s46, 2
+; GFX900-NEXT: v_writelane_b32 v0, s47, 3
+; GFX900-NEXT: v_writelane_b32 v0, s48, 4
+; GFX900-NEXT: v_writelane_b32 v0, s49, 5
+; GFX900-NEXT: v_writelane_b32 v0, s50, 6
+; GFX900-NEXT: v_writelane_b32 v0, s51, 7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:19]
; GFX900-NEXT: ;;#ASMEND
@@ -15033,20 +15025,12 @@ define void @s_shuffle_v2i64_v8i64__15_6() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_readlane_b32 s51, v0, 15
-; GFX900-NEXT: v_readlane_b32 s50, v0, 14
-; GFX900-NEXT: v_readlane_b32 s49, v0, 13
-; GFX900-NEXT: v_readlane_b32 s48, v0, 12
-; GFX900-NEXT: v_readlane_b32 s47, v0, 11
-; GFX900-NEXT: v_readlane_b32 s46, v0, 10
-; GFX900-NEXT: v_readlane_b32 s45, v0, 9
-; GFX900-NEXT: v_readlane_b32 s44, v0, 8
-; GFX900-NEXT: v_readlane_b32 s43, v0, 7
-; GFX900-NEXT: v_readlane_b32 s42, v0, 6
-; GFX900-NEXT: v_readlane_b32 s41, v0, 5
-; GFX900-NEXT: v_readlane_b32 s40, v0, 4
-; GFX900-NEXT: v_readlane_b32 s39, v0, 3
-; GFX900-NEXT: v_readlane_b32 s38, v0, 2
+; GFX900-NEXT: v_readlane_b32 s51, v0, 7
+; GFX900-NEXT: v_readlane_b32 s50, v0, 6
+; GFX900-NEXT: v_readlane_b32 s49, v0, 5
+; GFX900-NEXT: v_readlane_b32 s48, v0, 4
+; GFX900-NEXT: v_readlane_b32 s47, v0, 3
+; GFX900-NEXT: v_readlane_b32 s46, v0, 2
; GFX900-NEXT: v_readlane_b32 s37, v0, 1
; GFX900-NEXT: v_readlane_b32 s36, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -15063,20 +15047,12 @@ define void @s_shuffle_v2i64_v8i64__15_6() {
; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
-; GFX90A-NEXT: v_writelane_b32 v0, s40, 4
-; GFX90A-NEXT: v_writelane_b32 v0, s41, 5
-; GFX90A-NEXT: v_writelane_b32 v0, s42, 6
-; GFX90A-NEXT: v_writelane_b32 v0, s43, 7
-; GFX90A-NEXT: v_writelane_b32 v0, s44, 8
-; GFX90A-NEXT: v_writelane_b32 v0, s45, 9
-; GFX90A-NEXT: v_writelane_b32 v0, s46, 10
-; GFX90A-NEXT: v_writelane_b32 v0, s47, 11
-; GFX90A-NEXT: v_writelane_b32 v0, s48, 12
-; GFX90A-NEXT: v_writelane_b32 v0, s49, 13
-; GFX90A-NEXT: v_writelane_b32 v0, s50, 14
-; GFX90A-NEXT: v_writelane_b32 v0, s51, 15
+; GFX90A-NEXT: v_writelane_b32 v0, s46, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s47, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
+; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
+; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
+; GFX90A-NEXT: v_writelane_b32 v0, s51, 7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:19]
; GFX90A-NEXT: ;;#ASMEND
@@ -15090,20 +15066,12 @@ define void @s_shuffle_v2i64_v8i64__15_6() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_readlane_b32 s51, v0, 15
-; GFX90A-NEXT: v_readlane_b32 s50, v0, 14
-; GFX90A-NEXT: v_readlane_b32 s49, v0, 13
-; GFX90A-NEXT: v_readlane_b32 s48, v0, 12
-; GFX90A-NEXT: v_readlane_b32 s47, v0, 11
-; GFX90A-NEXT: v_readlane_b32 s46, v0, 10
-; GFX90A-NEXT: v_readlane_b32 s45, v0, 9
-; GFX90A-NEXT: v_readlane_b32 s44, v0, 8
-; GFX90A-NEXT: v_readlane_b32 s43, v0, 7
-; GFX90A-NEXT: v_readlane_b32 s42, v0, 6
-; GFX90A-NEXT: v_readlane_b32 s41, v0, 5
-; GFX90A-NEXT: v_readlane_b32 s40, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s51, v0, 7
+; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
+; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
+; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
+; GFX90A-NEXT: v_readlane_b32 s47, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s46, v0, 2
; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -15156,23 +15124,15 @@ define void @s_shuffle_v2i64_v8i64__15_7() {
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_writelane_b32 v0, s36, 0
; GFX900-NEXT: v_writelane_b32 v0, s37, 1
-; GFX900-NEXT: v_writelane_b32 v0, s38, 2
-; GFX900-NEXT: v_writelane_b32 v0, s39, 3
-; GFX900-NEXT: v_writelane_b32 v0, s40, 4
-; GFX900-NEXT: v_writelane_b32 v0, s41, 5
-; GFX900-NEXT: v_writelane_b32 v0, s42, 6
-; GFX900-NEXT: v_writelane_b32 v0, s43, 7
-; GFX900-NEXT: v_writelane_b32 v0, s44, 8
-; GFX900-NEXT: v_writelane_b32 v0, s45, 9
-; GFX900-NEXT: v_writelane_b32 v0, s46, 10
-; GFX900-NEXT: v_writelane_b32 v0, s47, 11
-; GFX900-NEXT: v_writelane_b32 v0, s48, 12
-; GFX900-NEXT: v_writelane_b32 v0, s49, 13
-; GFX900-NEXT: v_writelane_b32 v0, s50, 14
+; GFX900-NEXT: v_writelane_b32 v0, s46, 2
+; GFX900-NEXT: v_writelane_b32 v0, s47, 3
+; GFX900-NEXT: v_writelane_b32 v0, s48, 4
+; GFX900-NEXT: v_writelane_b32 v0, s49, 5
+; GFX900-NEXT: v_writelane_b32 v0, s50, 6
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_writelane_b32 v0, s51, 15
+; GFX900-NEXT: v_writelane_b32 v0, s51, 7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[36:51]
; GFX900-NEXT: ;;#ASMEND
@@ -15183,20 +15143,12 @@ define void @s_shuffle_v2i64_v8i64__15_7() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_readlane_b32 s51, v0, 15
-; GFX900-NEXT: v_readlane_b32 s50, v0, 14
-; GFX900-NEXT: v_readlane_b32 s49, v0, 13
-; GFX900-NEXT: v_readlane_b32 s48, v0, 12
-; GFX900-NEXT: v_readlane_b32 s47, v0, 11
-; GFX900-NEXT: v_readlane_b32 s46, v0, 10
-; GFX900-NEXT: v_readlane_b32 s45, v0, 9
-; GFX900-NEXT: v_readlane_b32 s44, v0, 8
-; GFX900-NEXT: v_readlane_b32 s43, v0, 7
-; GFX900-NEXT: v_readlane_b32 s42, v0, 6
-; GFX900-NEXT: v_readlane_b32 s41, v0, 5
-; GFX900-NEXT: v_readlane_b32 s40, v0, 4
-; GFX900-NEXT: v_readlane_b32 s39, v0, 3
-; GFX900-NEXT: v_readlane_b32 s38, v0, 2
+; GFX900-NEXT: v_readlane_b32 s51, v0, 7
+; GFX900-NEXT: v_readlane_b32 s50, v0, 6
+; GFX900-NEXT: v_readlane_b32 s49, v0, 5
+; GFX900-NEXT: v_readlane_b32 s48, v0, 4
+; GFX900-NEXT: v_readlane_b32 s47, v0, 3
+; GFX900-NEXT: v_readlane_b32 s46, v0, 2
; GFX900-NEXT: v_readlane_b32 s37, v0, 1
; GFX900-NEXT: v_readlane_b32 s36, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -15213,23 +15165,15 @@ define void @s_shuffle_v2i64_v8i64__15_7() {
; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
-; GFX90A-NEXT: v_writelane_b32 v0, s40, 4
-; GFX90A-NEXT: v_writelane_b32 v0, s41, 5
-; GFX90A-NEXT: v_writelane_b32 v0, s42, 6
-; GFX90A-NEXT: v_writelane_b32 v0, s43, 7
-; GFX90A-NEXT: v_writelane_b32 v0, s44, 8
-; GFX90A-NEXT: v_writelane_b32 v0, s45, 9
-; GFX90A-NEXT: v_writelane_b32 v0, s46, 10
-; GFX90A-NEXT: v_writelane_b32 v0, s47, 11
-; GFX90A-NEXT: v_writelane_b32 v0, s48, 12
-; GFX90A-NEXT: v_writelane_b32 v0, s49, 13
-; GFX90A-NEXT: v_writelane_b32 v0, s50, 14
+; GFX90A-NEXT: v_writelane_b32 v0, s46, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s47, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
+; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
+; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_writelane_b32 v0, s51, 15
+; GFX90A-NEXT: v_writelane_b32 v0, s51, 7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[36:51]
; GFX90A-NEXT: ;;#ASMEND
@@ -15240,20 +15184,12 @@ define void @s_shuffle_v2i64_v8i64__15_7() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_readlane_b32 s51, v0, 15
-; GFX90A-NEXT: v_readlane_b32 s50, v0, 14
-; GFX90A-NEXT: v_readlane_b32 s49, v0, 13
-; GFX90A-NEXT: v_readlane_b32 s48, v0, 12
-; GFX90A-NEXT: v_readlane_b32 s47, v0, 11
-; GFX90A-NEXT: v_readlane_b32 s46, v0, 10
-; GFX90A-NEXT: v_readlane_b32 s45, v0, 9
-; GFX90A-NEXT: v_readlane_b32 s44, v0, 8
-; GFX90A-NEXT: v_readlane_b32 s43, v0, 7
-; GFX90A-NEXT: v_readlane_b32 s42, v0, 6
-; GFX90A-NEXT: v_readlane_b32 s41, v0, 5
-; GFX90A-NEXT: v_readlane_b32 s40, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s51, v0, 7
+; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
+; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
+; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
+; GFX90A-NEXT: v_readlane_b32 s47, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s46, v0, 2
; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -16120,20 +16056,12 @@ define void @s_shuffle_v2i64_v8i64__10_0() {
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_writelane_b32 v0, s36, 0
; GFX900-NEXT: v_writelane_b32 v0, s37, 1
-; GFX900-NEXT: v_writelane_b32 v0, s38, 2
-; GFX900-NEXT: v_writelane_b32 v0, s39, 3
-; GFX900-NEXT: v_writelane_b32 v0, s40, 4
-; GFX900-NEXT: v_writelane_b32 v0, s41, 5
-; GFX900-NEXT: v_writelane_b32 v0, s42, 6
-; GFX900-NEXT: v_writelane_b32 v0, s43, 7
-; GFX900-NEXT: v_writelane_b32 v0, s44, 8
-; GFX900-NEXT: v_writelane_b32 v0, s45, 9
-; GFX900-NEXT: v_writelane_b32 v0, s46, 10
-; GFX900-NEXT: v_writelane_b32 v0, s47, 11
-; GFX900-NEXT: v_writelane_b32 v0, s48, 12
-; GFX900-NEXT: v_writelane_b32 v0, s49, 13
-; GFX900-NEXT: v_writelane_b32 v0, s50, 14
-; GFX900-NEXT: v_writelane_b32 v0, s51, 15
+; GFX900-NEXT: v_writelane_b32 v0, s46, 2
+; GFX900-NEXT: v_writelane_b32 v0, s47, 3
+; GFX900-NEXT: v_writelane_b32 v0, s48, 4
+; GFX900-NEXT: v_writelane_b32 v0, s49, 5
+; GFX900-NEXT: v_writelane_b32 v0, s50, 6
+; GFX900-NEXT: v_writelane_b32 v0, s51, 7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[36:51]
; GFX900-NEXT: ;;#ASMEND
@@ -16145,20 +16073,12 @@ define void @s_shuffle_v2i64_v8i64__10_0() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_readlane_b32 s51, v0, 15
-; GFX900-NEXT: v_readlane_b32 s50, v0, 14
-; GFX900-NEXT: v_readlane_b32 s49, v0, 13
-; GFX900-NEXT: v_readlane_b32 s48, v0, 12
-; GFX900-NEXT: v_readlane_b32 s47, v0, 11
-; GFX900-NEXT: v_readlane_b32 s46, v0, 10
-; GFX900-NEXT: v_readlane_b32 s45, v0, 9
-; GFX900-NEXT: v_readlane_b32 s44, v0, 8
-; GFX900-NEXT: v_readlane_b32 s43, v0, 7
-; GFX900-NEXT: v_readlane_b32 s42, v0, 6
-; GFX900-NEXT: v_readlane_b32 s41, v0, 5
-; GFX900-NEXT: v_readlane_b32 s40, v0, 4
-; GFX900-NEXT: v_readlane_b32 s39, v0, 3
-; GFX900-NEXT: v_readlane_b32 s38, v0, 2
+; GFX900-NEXT: v_readlane_b32 s51, v0, 7
+; GFX900-NEXT: v_readlane_b32 s50, v0, 6
+; GFX900-NEXT: v_readlane_b32 s49, v0, 5
+; GFX900-NEXT: v_readlane_b32 s48, v0, 4
+; GFX900-NEXT: v_readlane_b32 s47, v0, 3
+; GFX900-NEXT: v_readlane_b32 s46, v0, 2
; GFX900-NEXT: v_readlane_b32 s37, v0, 1
; GFX900-NEXT: v_readlane_b32 s36, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -16175,20 +16095,12 @@ define void @s_shuffle_v2i64_v8i64__10_0() {
; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
-; GFX90A-NEXT: v_writelane_b32 v0, s40, 4
-; GFX90A-NEXT: v_writelane_b32 v0, s41, 5
-; GFX90A-NEXT: v_writelane_b32 v0, s42, 6
-; GFX90A-NEXT: v_writelane_b32 v0, s43, 7
-; GFX90A-NEXT: v_writelane_b32 v0, s44, 8
-; GFX90A-NEXT: v_writelane_b32 v0, s45, 9
-; GFX90A-NEXT: v_writelane_b32 v0, s46, 10
-; GFX90A-NEXT: v_writelane_b32 v0, s47, 11
-; GFX90A-NEXT: v_writelane_b32 v0, s48, 12
-; GFX90A-NEXT: v_writelane_b32 v0, s49, 13
-; GFX90A-NEXT: v_writelane_b32 v0, s50, 14
-; GFX90A-NEXT: v_writelane_b32 v0, s51, 15
+; GFX90A-NEXT: v_writelane_b32 v0, s46, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s47, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
+; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
+; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
+; GFX90A-NEXT: v_writelane_b32 v0, s51, 7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[36:51]
; GFX90A-NEXT: ;;#ASMEND
@@ -16200,20 +16112,12 @@ define void @s_shuffle_v2i64_v8i64__10_0() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_readlane_b32 s51, v0, 15
-; GFX90A-NEXT: v_readlane_b32 s50, v0, 14
-; GFX90A-NEXT: v_readlane_b32 s49, v0, 13
-; GFX90A-NEXT: v_readlane_b32 s48, v0, 12
-; GFX90A-NEXT: v_readlane_b32 s47, v0, 11
-; GFX90A-NEXT: v_readlane_b32 s46, v0, 10
-; GFX90A-NEXT: v_readlane_b32 s45, v0, 9
-; GFX90A-NEXT: v_readlane_b32 s44, v0, 8
-; GFX90A-NEXT: v_readlane_b32 s43, v0, 7
-; GFX90A-NEXT: v_readlane_b32 s42, v0, 6
-; GFX90A-NEXT: v_readlane_b32 s41, v0, 5
-; GFX90A-NEXT: v_readlane_b32 s40, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s51, v0, 7
+; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
+; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
+; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
+; GFX90A-NEXT: v_readlane_b32 s47, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s46, v0, 2
; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -16881,20 +16785,12 @@ define void @s_shuffle_v2i64_v8i64__10_1() {
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_writelane_b32 v0, s36, 0
; GFX900-NEXT: v_writelane_b32 v0, s37, 1
-; GFX900-NEXT: v_writelane_b32 v0, s38, 2
-; GFX900-NEXT: v_writelane_b32 v0, s39, 3
-; GFX900-NEXT: v_writelane_b32 v0, s40, 4
-; GFX900-NEXT: v_writelane_b32 v0, s41, 5
-; GFX900-NEXT: v_writelane_b32 v0, s42, 6
-; GFX900-NEXT: v_writelane_b32 v0, s43, 7
-; GFX900-NEXT: v_writelane_b32 v0, s44, 8
-; GFX900-NEXT: v_writelane_b32 v0, s45, 9
-; GFX900-NEXT: v_writelane_b32 v0, s46, 10
-; GFX900-NEXT: v_writelane_b32 v0, s47, 11
-; GFX900-NEXT: v_writelane_b32 v0, s48, 12
-; GFX900-NEXT: v_writelane_b32 v0, s49, 13
-; GFX900-NEXT: v_writelane_b32 v0, s50, 14
-; GFX900-NEXT: v_writelane_b32 v0, s51, 15
+; GFX900-NEXT: v_writelane_b32 v0, s46, 2
+; GFX900-NEXT: v_writelane_b32 v0, s47, 3
+; GFX900-NEXT: v_writelane_b32 v0, s48, 4
+; GFX900-NEXT: v_writelane_b32 v0, s49, 5
+; GFX900-NEXT: v_writelane_b32 v0, s50, 6
+; GFX900-NEXT: v_writelane_b32 v0, s51, 7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[36:51]
; GFX900-NEXT: ;;#ASMEND
@@ -16906,20 +16802,12 @@ define void @s_shuffle_v2i64_v8i64__10_1() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_readlane_b32 s51, v0, 15
-; GFX900-NEXT: v_readlane_b32 s50, v0, 14
-; GFX900-NEXT: v_readlane_b32 s49, v0, 13
-; GFX900-NEXT: v_readlane_b32 s48, v0, 12
-; GFX900-NEXT: v_readlane_b32 s47, v0, 11
-; GFX900-NEXT: v_readlane_b32 s46, v0, 10
-; GFX900-NEXT: v_readlane_b32 s45, v0, 9
-; GFX900-NEXT: v_readlane_b32 s44, v0, 8
-; GFX900-NEXT: v_readlane_b32 s43, v0, 7
-; GFX900-NEXT: v_readlane_b32 s42, v0, 6
-; GFX900-NEXT: v_readlane_b32 s41, v0, 5
-; GFX900-NEXT: v_readlane_b32 s40, v0, 4
-; GFX900-NEXT: v_readlane_b32 s39, v0, 3
-; GFX900-NEXT: v_readlane_b32 s38, v0, 2
+; GFX900-NEXT: v_readlane_b32 s51, v0, 7
+; GFX900-NEXT: v_readlane_b32 s50, v0, 6
+; GFX900-NEXT: v_readlane_b32 s49, v0, 5
+; GFX900-NEXT: v_readlane_b32 s48, v0, 4
+; GFX900-NEXT: v_readlane_b32 s47, v0, 3
+; GFX900-NEXT: v_readlane_b32 s46, v0, 2
; GFX900-NEXT: v_readlane_b32 s37, v0, 1
; GFX900-NEXT: v_readlane_b32 s36, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -16936,20 +16824,12 @@ define void @s_shuffle_v2i64_v8i64__10_1() {
; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
-; GFX90A-NEXT: v_writelane_b32 v0, s40, 4
-; GFX90A-NEXT: v_writelane_b32 v0, s41, 5
-; GFX90A-NEXT: v_writelane_b32 v0, s42, 6
-; GFX90A-NEXT: v_writelane_b32 v0, s43, 7
-; GFX90A-NEXT: v_writelane_b32 v0, s44, 8
-; GFX90A-NEXT: v_writelane_b32 v0, s45, 9
-; GFX90A-NEXT: v_writelane_b32 v0, s46, 10
-; GFX90A-NEXT: v_writelane_b32 v0, s47, 11
-; GFX90A-NEXT: v_writelane_b32 v0, s48, 12
-; GFX90A-NEXT: v_writelane_b32 v0, s49, 13
-; GFX90A-NEXT: v_writelane_b32 v0, s50, 14
-; GFX90A-NEXT: v_writelane_b32 v0, s51, 15
+; GFX90A-NEXT: v_writelane_b32 v0, s46, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s47, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
+; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
+; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
+; GFX90A-NEXT: v_writelane_b32 v0, s51, 7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[36:51]
; GFX90A-NEXT: ;;#ASMEND
@@ -16961,20 +16841,12 @@ define void @s_shuffle_v2i64_v8i64__10_1() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_readlane_b32 s51, v0, 15
-; GFX90A-NEXT: v_readlane_b32 s50, v0, 14
-; GFX90A-NEXT: v_readlane_b32 s49, v0, 13
-; GFX90A-NEXT: v_readlane_b32 s48, v0, 12
-; GFX90A-NEXT: v_readlane_b32 s47, v0, 11
-; GFX90A-NEXT: v_readlane_b32 s46, v0, 10
-; GFX90A-NEXT: v_readlane_b32 s45, v0, 9
-; GFX90A-NEXT: v_readlane_b32 s44, v0, 8
-; GFX90A-NEXT: v_readlane_b32 s43, v0, 7
-; GFX90A-NEXT: v_readlane_b32 s42, v0, 6
-; GFX90A-NEXT: v_readlane_b32 s41, v0, 5
-; GFX90A-NEXT: v_readlane_b32 s40, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s51, v0, 7
+; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
+; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
+; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
+; GFX90A-NEXT: v_readlane_b32 s47, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s46, v0, 2
; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -20981,20 +20853,12 @@ define void @s_shuffle_v2i64_v8i64__9_6() {
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_writelane_b32 v0, s36, 0
; GFX900-NEXT: v_writelane_b32 v0, s37, 1
-; GFX900-NEXT: v_writelane_b32 v0, s38, 2
-; GFX900-NEXT: v_writelane_b32 v0, s39, 3
-; GFX900-NEXT: v_writelane_b32 v0, s40, 4
-; GFX900-NEXT: v_writelane_b32 v0, s41, 5
-; GFX900-NEXT: v_writelane_b32 v0, s42, 6
-; GFX900-NEXT: v_writelane_b32 v0, s43, 7
-; GFX900-NEXT: v_writelane_b32 v0, s44, 8
-; GFX900-NEXT: v_writelane_b32 v0, s45, 9
-; GFX900-NEXT: v_writelane_b32 v0, s46, 10
-; GFX900-NEXT: v_writelane_b32 v0, s47, 11
-; GFX900-NEXT: v_writelane_b32 v0, s48, 12
-; GFX900-NEXT: v_writelane_b32 v0, s49, 13
-; GFX900-NEXT: v_writelane_b32 v0, s50, 14
-; GFX900-NEXT: v_writelane_b32 v0, s51, 15
+; GFX900-NEXT: v_writelane_b32 v0, s46, 2
+; GFX900-NEXT: v_writelane_b32 v0, s47, 3
+; GFX900-NEXT: v_writelane_b32 v0, s48, 4
+; GFX900-NEXT: v_writelane_b32 v0, s49, 5
+; GFX900-NEXT: v_writelane_b32 v0, s50, 6
+; GFX900-NEXT: v_writelane_b32 v0, s51, 7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:19]
; GFX900-NEXT: ;;#ASMEND
@@ -21008,20 +20872,12 @@ define void @s_shuffle_v2i64_v8i64__9_6() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_readlane_b32 s51, v0, 15
-; GFX900-NEXT: v_readlane_b32 s50, v0, 14
-; GFX900-NEXT: v_readlane_b32 s49, v0, 13
-; GFX900-NEXT: v_readlane_b32 s48, v0, 12
-; GFX900-NEXT: v_readlane_b32 s47, v0, 11
-; GFX900-NEXT: v_readlane_b32 s46, v0, 10
-; GFX900-NEXT: v_readlane_b32 s45, v0, 9
-; GFX900-NEXT: v_readlane_b32 s44, v0, 8
-; GFX900-NEXT: v_readlane_b32 s43, v0, 7
-; GFX900-NEXT: v_readlane_b32 s42, v0, 6
-; GFX900-NEXT: v_readlane_b32 s41, v0, 5
-; GFX900-NEXT: v_readlane_b32 s40, v0, 4
-; GFX900-NEXT: v_readlane_b32 s39, v0, 3
-; GFX900-NEXT: v_readlane_b32 s38, v0, 2
+; GFX900-NEXT: v_readlane_b32 s51, v0, 7
+; GFX900-NEXT: v_readlane_b32 s50, v0, 6
+; GFX900-NEXT: v_readlane_b32 s49, v0, 5
+; GFX900-NEXT: v_readlane_b32 s48, v0, 4
+; GFX900-NEXT: v_readlane_b32 s47, v0, 3
+; GFX900-NEXT: v_readlane_b32 s46, v0, 2
; GFX900-NEXT: v_readlane_b32 s37, v0, 1
; GFX900-NEXT: v_readlane_b32 s36, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -21038,20 +20894,12 @@ define void @s_shuffle_v2i64_v8i64__9_6() {
; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
-; GFX90A-NEXT: v_writelane_b32 v0, s40, 4
-; GFX90A-NEXT: v_writelane_b32 v0, s41, 5
-; GFX90A-NEXT: v_writelane_b32 v0, s42, 6
-; GFX90A-NEXT: v_writelane_b32 v0, s43, 7
-; GFX90A-NEXT: v_writelane_b32 v0, s44, 8
-; GFX90A-NEXT: v_writelane_b32 v0, s45, 9
-; GFX90A-NEXT: v_writelane_b32 v0, s46, 10
-; GFX90A-NEXT: v_writelane_b32 v0, s47, 11
-; GFX90A-NEXT: v_writelane_b32 v0, s48, 12
-; GFX90A-NEXT: v_writelane_b32 v0, s49, 13
-; GFX90A-NEXT: v_writelane_b32 v0, s50, 14
-; GFX90A-NEXT: v_writelane_b32 v0, s51, 15
+; GFX90A-NEXT: v_writelane_b32 v0, s46, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s47, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
+; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
+; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
+; GFX90A-NEXT: v_writelane_b32 v0, s51, 7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:19]
; GFX90A-NEXT: ;;#ASMEND
@@ -21065,20 +20913,12 @@ define void @s_shuffle_v2i64_v8i64__9_6() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_readlane_b32 s51, v0, 15
-; GFX90A-NEXT: v_readlane_b32 s50, v0, 14
-; GFX90A-NEXT: v_readlane_b32 s49, v0, 13
-; GFX90A-NEXT: v_readlane_b32 s48, v0, 12
-; GFX90A-NEXT: v_readlane_b32 s47, v0, 11
-; GFX90A-NEXT: v_readlane_b32 s46, v0, 10
-; GFX90A-NEXT: v_readlane_b32 s45, v0, 9
-; GFX90A-NEXT: v_readlane_b32 s44, v0, 8
-; GFX90A-NEXT: v_readlane_b32 s43, v0, 7
-; GFX90A-NEXT: v_readlane_b32 s42, v0, 6
-; GFX90A-NEXT: v_readlane_b32 s41, v0, 5
-; GFX90A-NEXT: v_readlane_b32 s40, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s51, v0, 7
+; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
+; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
+; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
+; GFX90A-NEXT: v_readlane_b32 s47, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s46, v0, 2
; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -21187,20 +21027,12 @@ define void @s_shuffle_v2i64_v8i64__11_6() {
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_writelane_b32 v0, s36, 0
; GFX900-NEXT: v_writelane_b32 v0, s37, 1
-; GFX900-NEXT: v_writelane_b32 v0, s38, 2
-; GFX900-NEXT: v_writelane_b32 v0, s39, 3
-; GFX900-NEXT: v_writelane_b32 v0, s40, 4
-; GFX900-NEXT: v_writelane_b32 v0, s41, 5
-; GFX900-NEXT: v_writelane_b32 v0, s42, 6
-; GFX900-NEXT: v_writelane_b32 v0, s43, 7
-; GFX900-NEXT: v_writelane_b32 v0, s44, 8
-; GFX900-NEXT: v_writelane_b32 v0, s45, 9
-; GFX900-NEXT: v_writelane_b32 v0, s46, 10
-; GFX900-NEXT: v_writelane_b32 v0, s47, 11
-; GFX900-NEXT: v_writelane_b32 v0, s48, 12
-; GFX900-NEXT: v_writelane_b32 v0, s49, 13
-; GFX900-NEXT: v_writelane_b32 v0, s50, 14
-; GFX900-NEXT: v_writelane_b32 v0, s51, 15
+; GFX900-NEXT: v_writelane_b32 v0, s46, 2
+; GFX900-NEXT: v_writelane_b32 v0, s47, 3
+; GFX900-NEXT: v_writelane_b32 v0, s48, 4
+; GFX900-NEXT: v_writelane_b32 v0, s49, 5
+; GFX900-NEXT: v_writelane_b32 v0, s50, 6
+; GFX900-NEXT: v_writelane_b32 v0, s51, 7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:19]
; GFX900-NEXT: ;;#ASMEND
@@ -21214,20 +21046,12 @@ define void @s_shuffle_v2i64_v8i64__11_6() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_readlane_b32 s51, v0, 15
-; GFX900-NEXT: v_readlane_b32 s50, v0, 14
-; GFX900-NEXT: v_readlane_b32 s49, v0, 13
-; GFX900-NEXT: v_readlane_b32 s48, v0, 12
-; GFX900-NEXT: v_readlane_b32 s47, v0, 11
-; GFX900-NEXT: v_readlane_b32 s46, v0, 10
-; GFX900-NEXT: v_readlane_b32 s45, v0, 9
-; GFX900-NEXT: v_readlane_b32 s44, v0, 8
-; GFX900-NEXT: v_readlane_b32 s43, v0, 7
-; GFX900-NEXT: v_readlane_b32 s42, v0, 6
-; GFX900-NEXT: v_readlane_b32 s41, v0, 5
-; GFX900-NEXT: v_readlane_b32 s40, v0, 4
-; GFX900-NEXT: v_readlane_b32 s39, v0, 3
-; GFX900-NEXT: v_readlane_b32 s38, v0, 2
+; GFX900-NEXT: v_readlane_b32 s51, v0, 7
+; GFX900-NEXT: v_readlane_b32 s50, v0, 6
+; GFX900-NEXT: v_readlane_b32 s49, v0, 5
+; GFX900-NEXT: v_readlane_b32 s48, v0, 4
+; GFX900-NEXT: v_readlane_b32 s47, v0, 3
+; GFX900-NEXT: v_readlane_b32 s46, v0, 2
; GFX900-NEXT: v_readlane_b32 s37, v0, 1
; GFX900-NEXT: v_readlane_b32 s36, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -21244,20 +21068,12 @@ define void @s_shuffle_v2i64_v8i64__11_6() {
; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
-; GFX90A-NEXT: v_writelane_b32 v0, s40, 4
-; GFX90A-NEXT: v_writelane_b32 v0, s41, 5
-; GFX90A-NEXT: v_writelane_b32 v0, s42, 6
-; GFX90A-NEXT: v_writelane_b32 v0, s43, 7
-; GFX90A-NEXT: v_writelane_b32 v0, s44, 8
-; GFX90A-NEXT: v_writelane_b32 v0, s45, 9
-; GFX90A-NEXT: v_writelane_b32 v0, s46, 10
-; GFX90A-NEXT: v_writelane_b32 v0, s47, 11
-; GFX90A-NEXT: v_writelane_b32 v0, s48, 12
-; GFX90A-NEXT: v_writelane_b32 v0, s49, 13
-; GFX90A-NEXT: v_writelane_b32 v0, s50, 14
-; GFX90A-NEXT: v_writelane_b32 v0, s51, 15
+; GFX90A-NEXT: v_writelane_b32 v0, s46, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s47, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
+; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
+; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
+; GFX90A-NEXT: v_writelane_b32 v0, s51, 7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:19]
; GFX90A-NEXT: ;;#ASMEND
@@ -21271,20 +21087,12 @@ define void @s_shuffle_v2i64_v8i64__11_6() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_readlane_b32 s51, v0, 15
-; GFX90A-NEXT: v_readlane_b32 s50, v0, 14
-; GFX90A-NEXT: v_readlane_b32 s49, v0, 13
-; GFX90A-NEXT: v_readlane_b32 s48, v0, 12
-; GFX90A-NEXT: v_readlane_b32 s47, v0, 11
-; GFX90A-NEXT: v_readlane_b32 s46, v0, 10
-; GFX90A-NEXT: v_readlane_b32 s45, v0, 9
-; GFX90A-NEXT: v_readlane_b32 s44, v0, 8
-; GFX90A-NEXT: v_readlane_b32 s43, v0, 7
-; GFX90A-NEXT: v_readlane_b32 s42, v0, 6
-; GFX90A-NEXT: v_readlane_b32 s41, v0, 5
-; GFX90A-NEXT: v_readlane_b32 s40, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s51, v0, 7
+; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
+; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
+; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
+; GFX90A-NEXT: v_readlane_b32 s47, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s46, v0, 2
; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -21337,20 +21145,12 @@ define void @s_shuffle_v2i64_v8i64__12_6() {
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_writelane_b32 v0, s36, 0
; GFX900-NEXT: v_writelane_b32 v0, s37, 1
-; GFX900-NEXT: v_writelane_b32 v0, s38, 2
-; GFX900-NEXT: v_writelane_b32 v0, s39, 3
-; GFX900-NEXT: v_writelane_b32 v0, s40, 4
-; GFX900-NEXT: v_writelane_b32 v0, s41, 5
-; GFX900-NEXT: v_writelane_b32 v0, s42, 6
-; GFX900-NEXT: v_writelane_b32 v0, s43, 7
-; GFX900-NEXT: v_writelane_b32 v0, s44, 8
-; GFX900-NEXT: v_writelane_b32 v0, s45, 9
-; GFX900-NEXT: v_writelane_b32 v0, s46, 10
-; GFX900-NEXT: v_writelane_b32 v0, s47, 11
-; GFX900-NEXT: v_writelane_b32 v0, s48, 12
-; GFX900-NEXT: v_writelane_b32 v0, s49, 13
-; GFX900-NEXT: v_writelane_b32 v0, s50, 14
-; GFX900-NEXT: v_writelane_b32 v0, s51, 15
+; GFX900-NEXT: v_writelane_b32 v0, s46, 2
+; GFX900-NEXT: v_writelane_b32 v0, s47, 3
+; GFX900-NEXT: v_writelane_b32 v0, s48, 4
+; GFX900-NEXT: v_writelane_b32 v0, s49, 5
+; GFX900-NEXT: v_writelane_b32 v0, s50, 6
+; GFX900-NEXT: v_writelane_b32 v0, s51, 7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:19]
; GFX900-NEXT: ;;#ASMEND
@@ -21364,20 +21164,12 @@ define void @s_shuffle_v2i64_v8i64__12_6() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_readlane_b32 s51, v0, 15
-; GFX900-NEXT: v_readlane_b32 s50, v0, 14
-; GFX900-NEXT: v_readlane_b32 s49, v0, 13
-; GFX900-NEXT: v_readlane_b32 s48, v0, 12
-; GFX900-NEXT: v_readlane_b32 s47, v0, 11
-; GFX900-NEXT: v_readlane_b32 s46, v0, 10
-; GFX900-NEXT: v_readlane_b32 s45, v0, 9
-; GFX900-NEXT: v_readlane_b32 s44, v0, 8
-; GFX900-NEXT: v_readlane_b32 s43, v0, 7
-; GFX900-NEXT: v_readlane_b32 s42, v0, 6
-; GFX900-NEXT: v_readlane_b32 s41, v0, 5
-; GFX900-NEXT: v_readlane_b32 s40, v0, 4
-; GFX900-NEXT: v_readlane_b32 s39, v0, 3
-; GFX900-NEXT: v_readlane_b32 s38, v0, 2
+; GFX900-NEXT: v_readlane_b32 s51, v0, 7
+; GFX900-NEXT: v_readlane_b32 s50, v0, 6
+; GFX900-NEXT: v_readlane_b32 s49, v0, 5
+; GFX900-NEXT: v_readlane_b32 s48, v0, 4
+; GFX900-NEXT: v_readlane_b32 s47, v0, 3
+; GFX900-NEXT: v_readlane_b32 s46, v0, 2
; GFX900-NEXT: v_readlane_b32 s37, v0, 1
; GFX900-NEXT: v_readlane_b32 s36, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -21394,20 +21186,12 @@ define void @s_shuffle_v2i64_v8i64__12_6() {
; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
-; GFX90A-NEXT: v_writelane_b32 v0, s40, 4
-; GFX90A-NEXT: v_writelane_b32 v0, s41, 5
-; GFX90A-NEXT: v_writelane_b32 v0, s42, 6
-; GFX90A-NEXT: v_writelane_b32 v0, s43, 7
-; GFX90A-NEXT: v_writelane_b32 v0, s44, 8
-; GFX90A-NEXT: v_writelane_b32 v0, s45, 9
-; GFX90A-NEXT: v_writelane_b32 v0, s46, 10
-; GFX90A-NEXT: v_writelane_b32 v0, s47, 11
-; GFX90A-NEXT: v_writelane_b32 v0, s48, 12
-; GFX90A-NEXT: v_writelane_b32 v0, s49, 13
-; GFX90A-NEXT: v_writelane_b32 v0, s50, 14
-; GFX90A-NEXT: v_writelane_b32 v0, s51, 15
+; GFX90A-NEXT: v_writelane_b32 v0, s46, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s47, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
+; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
+; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
+; GFX90A-NEXT: v_writelane_b32 v0, s51, 7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:19]
; GFX90A-NEXT: ;;#ASMEND
@@ -21421,20 +21205,12 @@ define void @s_shuffle_v2i64_v8i64__12_6() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_readlane_b32 s51, v0, 15
-; GFX90A-NEXT: v_readlane_b32 s50, v0, 14
-; GFX90A-NEXT: v_readlane_b32 s49, v0, 13
-; GFX90A-NEXT: v_readlane_b32 s48, v0, 12
-; GFX90A-NEXT: v_readlane_b32 s47, v0, 11
-; GFX90A-NEXT: v_readlane_b32 s46, v0, 10
-; GFX90A-NEXT: v_readlane_b32 s45, v0, 9
-; GFX90A-NEXT: v_readlane_b32 s44, v0, 8
-; GFX90A-NEXT: v_readlane_b32 s43, v0, 7
-; GFX90A-NEXT: v_readlane_b32 s42, v0, 6
-; GFX90A-NEXT: v_readlane_b32 s41, v0, 5
-; GFX90A-NEXT: v_readlane_b32 s40, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s51, v0, 7
+; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
+; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
+; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
+; GFX90A-NEXT: v_readlane_b32 s47, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s46, v0, 2
; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -21475,20 +21251,12 @@ define void @s_shuffle_v2i64_v8i64__13_6() {
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_writelane_b32 v0, s36, 0
; GFX900-NEXT: v_writelane_b32 v0, s37, 1
-; GFX900-NEXT: v_writelane_b32 v0, s38, 2
-; GFX900-NEXT: v_writelane_b32 v0, s39, 3
-; GFX900-NEXT: v_writelane_b32 v0, s40, 4
-; GFX900-NEXT: v_writelane_b32 v0, s41, 5
-; GFX900-NEXT: v_writelane_b32 v0, s42, 6
-; GFX900-NEXT: v_writelane_b32 v0, s43, 7
-; GFX900-NEXT: v_writelane_b32 v0, s44, 8
-; GFX900-NEXT: v_writelane_b32 v0, s45, 9
-; GFX900-NEXT: v_writelane_b32 v0, s46, 10
-; GFX900-NEXT: v_writelane_b32 v0, s47, 11
-; GFX900-NEXT: v_writelane_b32 v0, s48, 12
-; GFX900-NEXT: v_writelane_b32 v0, s49, 13
-; GFX900-NEXT: v_writelane_b32 v0, s50, 14
-; GFX900-NEXT: v_writelane_b32 v0, s51, 15
+; GFX900-NEXT: v_writelane_b32 v0, s46, 2
+; GFX900-NEXT: v_writelane_b32 v0, s47, 3
+; GFX900-NEXT: v_writelane_b32 v0, s48, 4
+; GFX900-NEXT: v_writelane_b32 v0, s49, 5
+; GFX900-NEXT: v_writelane_b32 v0, s50, 6
+; GFX900-NEXT: v_writelane_b32 v0, s51, 7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:19]
; GFX900-NEXT: ;;#ASMEND
@@ -21502,20 +21270,12 @@ define void @s_shuffle_v2i64_v8i64__13_6() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_readlane_b32 s51, v0, 15
-; GFX900-NEXT: v_readlane_b32 s50, v0, 14
-; GFX900-NEXT: v_readlane_b32 s49, v0, 13
-; GFX900-NEXT: v_readlane_b32 s48, v0, 12
-; GFX900-NEXT: v_readlane_b32 s47, v0, 11
-; GFX900-NEXT: v_readlane_b32 s46, v0, 10
-; GFX900-NEXT: v_readlane_b32 s45, v0, 9
-; GFX900-NEXT: v_readlane_b32 s44, v0, 8
-; GFX900-NEXT: v_readlane_b32 s43, v0, 7
-; GFX900-NEXT: v_readlane_b32 s42, v0, 6
-; GFX900-NEXT: v_readlane_b32 s41, v0, 5
-; GFX900-NEXT: v_readlane_b32 s40, v0, 4
-; GFX900-NEXT: v_readlane_b32 s39, v0, 3
-; GFX900-NEXT: v_readlane_b32 s38, v0, 2
+; GFX900-NEXT: v_readlane_b32 s51, v0, 7
+; GFX900-NEXT: v_readlane_b32 s50, v0, 6
+; GFX900-NEXT: v_readlane_b32 s49, v0, 5
+; GFX900-NEXT: v_readlane_b32 s48, v0, 4
+; GFX900-NEXT: v_readlane_b32 s47, v0, 3
+; GFX900-NEXT: v_readlane_b32 s46, v0, 2
; GFX900-NEXT: v_readlane_b32 s37, v0, 1
; GFX900-NEXT: v_readlane_b32 s36, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -21532,20 +21292,12 @@ define void @s_shuffle_v2i64_v8i64__13_6() {
; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
-; GFX90A-NEXT: v_writelane_b32 v0, s40, 4
-; GFX90A-NEXT: v_writelane_b32 v0, s41, 5
-; GFX90A-NEXT: v_writelane_b32 v0, s42, 6
-; GFX90A-NEXT: v_writelane_b32 v0, s43, 7
-; GFX90A-NEXT: v_writelane_b32 v0, s44, 8
-; GFX90A-NEXT: v_writelane_b32 v0, s45, 9
-; GFX90A-NEXT: v_writelane_b32 v0, s46, 10
-; GFX90A-NEXT: v_writelane_b32 v0, s47, 11
-; GFX90A-NEXT: v_writelane_b32 v0, s48, 12
-; GFX90A-NEXT: v_writelane_b32 v0, s49, 13
-; GFX90A-NEXT: v_writelane_b32 v0, s50, 14
-; GFX90A-NEXT: v_writelane_b32 v0, s51, 15
+; GFX90A-NEXT: v_writelane_b32 v0, s46, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s47, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
+; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
+; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
+; GFX90A-NEXT: v_writelane_b32 v0, s51, 7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:19]
; GFX90A-NEXT: ;;#ASMEND
@@ -21559,20 +21311,12 @@ define void @s_shuffle_v2i64_v8i64__13_6() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_readlane_b32 s51, v0, 15
-; GFX90A-NEXT: v_readlane_b32 s50, v0, 14
-; GFX90A-NEXT: v_readlane_b32 s49, v0, 13
-; GFX90A-NEXT: v_readlane_b32 s48, v0, 12
-; GFX90A-NEXT: v_readlane_b32 s47, v0, 11
-; GFX90A-NEXT: v_readlane_b32 s46, v0, 10
-; GFX90A-NEXT: v_readlane_b32 s45, v0, 9
-; GFX90A-NEXT: v_readlane_b32 s44, v0, 8
-; GFX90A-NEXT: v_readlane_b32 s43, v0, 7
-; GFX90A-NEXT: v_readlane_b32 s42, v0, 6
-; GFX90A-NEXT: v_readlane_b32 s41, v0, 5
-; GFX90A-NEXT: v_readlane_b32 s40, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s51, v0, 7
+; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
+; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
+; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
+; GFX90A-NEXT: v_readlane_b32 s47, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s46, v0, 2
; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -21625,20 +21369,12 @@ define void @s_shuffle_v2i64_v8i64__14_6() {
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_writelane_b32 v0, s36, 0
; GFX900-NEXT: v_writelane_b32 v0, s37, 1
-; GFX900-NEXT: v_writelane_b32 v0, s38, 2
-; GFX900-NEXT: v_writelane_b32 v0, s39, 3
-; GFX900-NEXT: v_writelane_b32 v0, s40, 4
-; GFX900-NEXT: v_writelane_b32 v0, s41, 5
-; GFX900-NEXT: v_writelane_b32 v0, s42, 6
-; GFX900-NEXT: v_writelane_b32 v0, s43, 7
-; GFX900-NEXT: v_writelane_b32 v0, s44, 8
-; GFX900-NEXT: v_writelane_b32 v0, s45, 9
-; GFX900-NEXT: v_writelane_b32 v0, s46, 10
-; GFX900-NEXT: v_writelane_b32 v0, s47, 11
-; GFX900-NEXT: v_writelane_b32 v0, s48, 12
-; GFX900-NEXT: v_writelane_b32 v0, s49, 13
-; GFX900-NEXT: v_writelane_b32 v0, s50, 14
-; GFX900-NEXT: v_writelane_b32 v0, s51, 15
+; GFX900-NEXT: v_writelane_b32 v0, s46, 2
+; GFX900-NEXT: v_writelane_b32 v0, s47, 3
+; GFX900-NEXT: v_writelane_b32 v0, s48, 4
+; GFX900-NEXT: v_writelane_b32 v0, s49, 5
+; GFX900-NEXT: v_writelane_b32 v0, s50, 6
+; GFX900-NEXT: v_writelane_b32 v0, s51, 7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:19]
; GFX900-NEXT: ;;#ASMEND
@@ -21652,20 +21388,12 @@ define void @s_shuffle_v2i64_v8i64__14_6() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_readlane_b32 s51, v0, 15
-; GFX900-NEXT: v_readlane_b32 s50, v0, 14
-; GFX900-NEXT: v_readlane_b32 s49, v0, 13
-; GFX900-NEXT: v_readlane_b32 s48, v0, 12
-; GFX900-NEXT: v_readlane_b32 s47, v0, 11
-; GFX900-NEXT: v_readlane_b32 s46, v0, 10
-; GFX900-NEXT: v_readlane_b32 s45, v0, 9
-; GFX900-NEXT: v_readlane_b32 s44, v0, 8
-; GFX900-NEXT: v_readlane_b32 s43, v0, 7
-; GFX900-NEXT: v_readlane_b32 s42, v0, 6
-; GFX900-NEXT: v_readlane_b32 s41, v0, 5
-; GFX900-NEXT: v_readlane_b32 s40, v0, 4
-; GFX900-NEXT: v_readlane_b32 s39, v0, 3
-; GFX900-NEXT: v_readlane_b32 s38, v0, 2
+; GFX900-NEXT: v_readlane_b32 s51, v0, 7
+; GFX900-NEXT: v_readlane_b32 s50, v0, 6
+; GFX900-NEXT: v_readlane_b32 s49, v0, 5
+; GFX900-NEXT: v_readlane_b32 s48, v0, 4
+; GFX900-NEXT: v_readlane_b32 s47, v0, 3
+; GFX900-NEXT: v_readlane_b32 s46, v0, 2
; GFX900-NEXT: v_readlane_b32 s37, v0, 1
; GFX900-NEXT: v_readlane_b32 s36, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -21682,20 +21410,12 @@ define void @s_shuffle_v2i64_v8i64__14_6() {
; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
-; GFX90A-NEXT: v_writelane_b32 v0, s40, 4
-; GFX90A-NEXT: v_writelane_b32 v0, s41, 5
-; GFX90A-NEXT: v_writelane_b32 v0, s42, 6
-; GFX90A-NEXT: v_writelane_b32 v0, s43, 7
-; GFX90A-NEXT: v_writelane_b32 v0, s44, 8
-; GFX90A-NEXT: v_writelane_b32 v0, s45, 9
-; GFX90A-NEXT: v_writelane_b32 v0, s46, 10
-; GFX90A-NEXT: v_writelane_b32 v0, s47, 11
-; GFX90A-NEXT: v_writelane_b32 v0, s48, 12
-; GFX90A-NEXT: v_writelane_b32 v0, s49, 13
-; GFX90A-NEXT: v_writelane_b32 v0, s50, 14
-; GFX90A-NEXT: v_writelane_b32 v0, s51, 15
+; GFX90A-NEXT: v_writelane_b32 v0, s46, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s47, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
+; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
+; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
+; GFX90A-NEXT: v_writelane_b32 v0, s51, 7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:19]
; GFX90A-NEXT: ;;#ASMEND
@@ -21709,20 +21429,12 @@ define void @s_shuffle_v2i64_v8i64__14_6() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_readlane_b32 s51, v0, 15
-; GFX90A-NEXT: v_readlane_b32 s50, v0, 14
-; GFX90A-NEXT: v_readlane_b32 s49, v0, 13
-; GFX90A-NEXT: v_readlane_b32 s48, v0, 12
-; GFX90A-NEXT: v_readlane_b32 s47, v0, 11
-; GFX90A-NEXT: v_readlane_b32 s46, v0, 10
-; GFX90A-NEXT: v_readlane_b32 s45, v0, 9
-; GFX90A-NEXT: v_readlane_b32 s44, v0, 8
-; GFX90A-NEXT: v_readlane_b32 s43, v0, 7
-; GFX90A-NEXT: v_readlane_b32 s42, v0, 6
-; GFX90A-NEXT: v_readlane_b32 s41, v0, 5
-; GFX90A-NEXT: v_readlane_b32 s40, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s51, v0, 7
+; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
+; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
+; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
+; GFX90A-NEXT: v_readlane_b32 s47, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s46, v0, 2
; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -22201,23 +21913,15 @@ define void @s_shuffle_v2i64_v8i64__9_7() {
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_writelane_b32 v0, s36, 0
; GFX900-NEXT: v_writelane_b32 v0, s37, 1
-; GFX900-NEXT: v_writelane_b32 v0, s38, 2
-; GFX900-NEXT: v_writelane_b32 v0, s39, 3
-; GFX900-NEXT: v_writelane_b32 v0, s40, 4
-; GFX900-NEXT: v_writelane_b32 v0, s41, 5
-; GFX900-NEXT: v_writelane_b32 v0, s42, 6
-; GFX900-NEXT: v_writelane_b32 v0, s43, 7
-; GFX900-NEXT: v_writelane_b32 v0, s44, 8
-; GFX900-NEXT: v_writelane_b32 v0, s45, 9
-; GFX900-NEXT: v_writelane_b32 v0, s46, 10
-; GFX900-NEXT: v_writelane_b32 v0, s47, 11
-; GFX900-NEXT: v_writelane_b32 v0, s48, 12
-; GFX900-NEXT: v_writelane_b32 v0, s49, 13
-; GFX900-NEXT: v_writelane_b32 v0, s50, 14
+; GFX900-NEXT: v_writelane_b32 v0, s46, 2
+; GFX900-NEXT: v_writelane_b32 v0, s47, 3
+; GFX900-NEXT: v_writelane_b32 v0, s48, 4
+; GFX900-NEXT: v_writelane_b32 v0, s49, 5
+; GFX900-NEXT: v_writelane_b32 v0, s50, 6
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_writelane_b32 v0, s51, 15
+; GFX900-NEXT: v_writelane_b32 v0, s51, 7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[36:51]
; GFX900-NEXT: ;;#ASMEND
@@ -22228,20 +21932,12 @@ define void @s_shuffle_v2i64_v8i64__9_7() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_readlane_b32 s51, v0, 15
-; GFX900-NEXT: v_readlane_b32 s50, v0, 14
-; GFX900-NEXT: v_readlane_b32 s49, v0, 13
-; GFX900-NEXT: v_readlane_b32 s48, v0, 12
-; GFX900-NEXT: v_readlane_b32 s47, v0, 11
-; GFX900-NEXT: v_readlane_b32 s46, v0, 10
-; GFX900-NEXT: v_readlane_b32 s45, v0, 9
-; GFX900-NEXT: v_readlane_b32 s44, v0, 8
-; GFX900-NEXT: v_readlane_b32 s43, v0, 7
-; GFX900-NEXT: v_readlane_b32 s42, v0, 6
-; GFX900-NEXT: v_readlane_b32 s41, v0, 5
-; GFX900-NEXT: v_readlane_b32 s40, v0, 4
-; GFX900-NEXT: v_readlane_b32 s39, v0, 3
-; GFX900-NEXT: v_readlane_b32 s38, v0, 2
+; GFX900-NEXT: v_readlane_b32 s51, v0, 7
+; GFX900-NEXT: v_readlane_b32 s50, v0, 6
+; GFX900-NEXT: v_readlane_b32 s49, v0, 5
+; GFX900-NEXT: v_readlane_b32 s48, v0, 4
+; GFX900-NEXT: v_readlane_b32 s47, v0, 3
+; GFX900-NEXT: v_readlane_b32 s46, v0, 2
; GFX900-NEXT: v_readlane_b32 s37, v0, 1
; GFX900-NEXT: v_readlane_b32 s36, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -22258,23 +21954,15 @@ define void @s_shuffle_v2i64_v8i64__9_7() {
; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
-; GFX90A-NEXT: v_writelane_b32 v0, s40, 4
-; GFX90A-NEXT: v_writelane_b32 v0, s41, 5
-; GFX90A-NEXT: v_writelane_b32 v0, s42, 6
-; GFX90A-NEXT: v_writelane_b32 v0, s43, 7
-; GFX90A-NEXT: v_writelane_b32 v0, s44, 8
-; GFX90A-NEXT: v_writelane_b32 v0, s45, 9
-; GFX90A-NEXT: v_writelane_b32 v0, s46, 10
-; GFX90A-NEXT: v_writelane_b32 v0, s47, 11
-; GFX90A-NEXT: v_writelane_b32 v0, s48, 12
-; GFX90A-NEXT: v_writelane_b32 v0, s49, 13
-; GFX90A-NEXT: v_writelane_b32 v0, s50, 14
+; GFX90A-NEXT: v_writelane_b32 v0, s46, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s47, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
+; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
+; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_writelane_b32 v0, s51, 15
+; GFX90A-NEXT: v_writelane_b32 v0, s51, 7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[36:51]
; GFX90A-NEXT: ;;#ASMEND
@@ -22285,20 +21973,12 @@ define void @s_shuffle_v2i64_v8i64__9_7() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_readlane_b32 s51, v0, 15
-; GFX90A-NEXT: v_readlane_b32 s50, v0, 14
-; GFX90A-NEXT: v_readlane_b32 s49, v0, 13
-; GFX90A-NEXT: v_readlane_b32 s48, v0, 12
-; GFX90A-NEXT: v_readlane_b32 s47, v0, 11
-; GFX90A-NEXT: v_readlane_b32 s46, v0, 10
-; GFX90A-NEXT: v_readlane_b32 s45, v0, 9
-; GFX90A-NEXT: v_readlane_b32 s44, v0, 8
-; GFX90A-NEXT: v_readlane_b32 s43, v0, 7
-; GFX90A-NEXT: v_readlane_b32 s42, v0, 6
-; GFX90A-NEXT: v_readlane_b32 s41, v0, 5
-; GFX90A-NEXT: v_readlane_b32 s40, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s51, v0, 7
+; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
+; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
+; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
+; GFX90A-NEXT: v_readlane_b32 s47, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s46, v0, 2
; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -22407,23 +22087,15 @@ define void @s_shuffle_v2i64_v8i64__11_7() {
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_writelane_b32 v0, s36, 0
; GFX900-NEXT: v_writelane_b32 v0, s37, 1
-; GFX900-NEXT: v_writelane_b32 v0, s38, 2
-; GFX900-NEXT: v_writelane_b32 v0, s39, 3
-; GFX900-NEXT: v_writelane_b32 v0, s40, 4
-; GFX900-NEXT: v_writelane_b32 v0, s41, 5
-; GFX900-NEXT: v_writelane_b32 v0, s42, 6
-; GFX900-NEXT: v_writelane_b32 v0, s43, 7
-; GFX900-NEXT: v_writelane_b32 v0, s44, 8
-; GFX900-NEXT: v_writelane_b32 v0, s45, 9
-; GFX900-NEXT: v_writelane_b32 v0, s46, 10
-; GFX900-NEXT: v_writelane_b32 v0, s47, 11
-; GFX900-NEXT: v_writelane_b32 v0, s48, 12
-; GFX900-NEXT: v_writelane_b32 v0, s49, 13
-; GFX900-NEXT: v_writelane_b32 v0, s50, 14
+; GFX900-NEXT: v_writelane_b32 v0, s46, 2
+; GFX900-NEXT: v_writelane_b32 v0, s47, 3
+; GFX900-NEXT: v_writelane_b32 v0, s48, 4
+; GFX900-NEXT: v_writelane_b32 v0, s49, 5
+; GFX900-NEXT: v_writelane_b32 v0, s50, 6
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_writelane_b32 v0, s51, 15
+; GFX900-NEXT: v_writelane_b32 v0, s51, 7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[36:51]
; GFX900-NEXT: ;;#ASMEND
@@ -22434,20 +22106,12 @@ define void @s_shuffle_v2i64_v8i64__11_7() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_readlane_b32 s51, v0, 15
-; GFX900-NEXT: v_readlane_b32 s50, v0, 14
-; GFX900-NEXT: v_readlane_b32 s49, v0, 13
-; GFX900-NEXT: v_readlane_b32 s48, v0, 12
-; GFX900-NEXT: v_readlane_b32 s47, v0, 11
-; GFX900-NEXT: v_readlane_b32 s46, v0, 10
-; GFX900-NEXT: v_readlane_b32 s45, v0, 9
-; GFX900-NEXT: v_readlane_b32 s44, v0, 8
-; GFX900-NEXT: v_readlane_b32 s43, v0, 7
-; GFX900-NEXT: v_readlane_b32 s42, v0, 6
-; GFX900-NEXT: v_readlane_b32 s41, v0, 5
-; GFX900-NEXT: v_readlane_b32 s40, v0, 4
-; GFX900-NEXT: v_readlane_b32 s39, v0, 3
-; GFX900-NEXT: v_readlane_b32 s38, v0, 2
+; GFX900-NEXT: v_readlane_b32 s51, v0, 7
+; GFX900-NEXT: v_readlane_b32 s50, v0, 6
+; GFX900-NEXT: v_readlane_b32 s49, v0, 5
+; GFX900-NEXT: v_readlane_b32 s48, v0, 4
+; GFX900-NEXT: v_readlane_b32 s47, v0, 3
+; GFX900-NEXT: v_readlane_b32 s46, v0, 2
; GFX900-NEXT: v_readlane_b32 s37, v0, 1
; GFX900-NEXT: v_readlane_b32 s36, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -22464,23 +22128,15 @@ define void @s_shuffle_v2i64_v8i64__11_7() {
; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
-; GFX90A-NEXT: v_writelane_b32 v0, s40, 4
-; GFX90A-NEXT: v_writelane_b32 v0, s41, 5
-; GFX90A-NEXT: v_writelane_b32 v0, s42, 6
-; GFX90A-NEXT: v_writelane_b32 v0, s43, 7
-; GFX90A-NEXT: v_writelane_b32 v0, s44, 8
-; GFX90A-NEXT: v_writelane_b32 v0, s45, 9
-; GFX90A-NEXT: v_writelane_b32 v0, s46, 10
-; GFX90A-NEXT: v_writelane_b32 v0, s47, 11
-; GFX90A-NEXT: v_writelane_b32 v0, s48, 12
-; GFX90A-NEXT: v_writelane_b32 v0, s49, 13
-; GFX90A-NEXT: v_writelane_b32 v0, s50, 14
+; GFX90A-NEXT: v_writelane_b32 v0, s46, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s47, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
+; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
+; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_writelane_b32 v0, s51, 15
+; GFX90A-NEXT: v_writelane_b32 v0, s51, 7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[36:51]
; GFX90A-NEXT: ;;#ASMEND
@@ -22491,20 +22147,12 @@ define void @s_shuffle_v2i64_v8i64__11_7() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_readlane_b32 s51, v0, 15
-; GFX90A-NEXT: v_readlane_b32 s50, v0, 14
-; GFX90A-NEXT: v_readlane_b32 s49, v0, 13
-; GFX90A-NEXT: v_readlane_b32 s48, v0, 12
-; GFX90A-NEXT: v_readlane_b32 s47, v0, 11
-; GFX90A-NEXT: v_readlane_b32 s46, v0, 10
-; GFX90A-NEXT: v_readlane_b32 s45, v0, 9
-; GFX90A-NEXT: v_readlane_b32 s44, v0, 8
-; GFX90A-NEXT: v_readlane_b32 s43, v0, 7
-; GFX90A-NEXT: v_readlane_b32 s42, v0, 6
-; GFX90A-NEXT: v_readlane_b32 s41, v0, 5
-; GFX90A-NEXT: v_readlane_b32 s40, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s51, v0, 7
+; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
+; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
+; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
+; GFX90A-NEXT: v_readlane_b32 s47, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s46, v0, 2
; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -22557,20 +22205,12 @@ define void @s_shuffle_v2i64_v8i64__12_7() {
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_writelane_b32 v0, s36, 0
; GFX900-NEXT: v_writelane_b32 v0, s37, 1
-; GFX900-NEXT: v_writelane_b32 v0, s38, 2
-; GFX900-NEXT: v_writelane_b32 v0, s39, 3
-; GFX900-NEXT: v_writelane_b32 v0, s40, 4
-; GFX900-NEXT: v_writelane_b32 v0, s41, 5
-; GFX900-NEXT: v_writelane_b32 v0, s42, 6
-; GFX900-NEXT: v_writelane_b32 v0, s43, 7
-; GFX900-NEXT: v_writelane_b32 v0, s44, 8
-; GFX900-NEXT: v_writelane_b32 v0, s45, 9
-; GFX900-NEXT: v_writelane_b32 v0, s46, 10
-; GFX900-NEXT: v_writelane_b32 v0, s47, 11
-; GFX900-NEXT: v_writelane_b32 v0, s48, 12
-; GFX900-NEXT: v_writelane_b32 v0, s49, 13
-; GFX900-NEXT: v_writelane_b32 v0, s50, 14
-; GFX900-NEXT: v_writelane_b32 v0, s51, 15
+; GFX900-NEXT: v_writelane_b32 v0, s46, 2
+; GFX900-NEXT: v_writelane_b32 v0, s47, 3
+; GFX900-NEXT: v_writelane_b32 v0, s48, 4
+; GFX900-NEXT: v_writelane_b32 v0, s49, 5
+; GFX900-NEXT: v_writelane_b32 v0, s50, 6
+; GFX900-NEXT: v_writelane_b32 v0, s51, 7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:19]
; GFX900-NEXT: ;;#ASMEND
@@ -22584,20 +22224,12 @@ define void @s_shuffle_v2i64_v8i64__12_7() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_readlane_b32 s51, v0, 15
-; GFX900-NEXT: v_readlane_b32 s50, v0, 14
-; GFX900-NEXT: v_readlane_b32 s49, v0, 13
-; GFX900-NEXT: v_readlane_b32 s48, v0, 12
-; GFX900-NEXT: v_readlane_b32 s47, v0, 11
-; GFX900-NEXT: v_readlane_b32 s46, v0, 10
-; GFX900-NEXT: v_readlane_b32 s45, v0, 9
-; GFX900-NEXT: v_readlane_b32 s44, v0, 8
-; GFX900-NEXT: v_readlane_b32 s43, v0, 7
-; GFX900-NEXT: v_readlane_b32 s42, v0, 6
-; GFX900-NEXT: v_readlane_b32 s41, v0, 5
-; GFX900-NEXT: v_readlane_b32 s40, v0, 4
-; GFX900-NEXT: v_readlane_b32 s39, v0, 3
-; GFX900-NEXT: v_readlane_b32 s38, v0, 2
+; GFX900-NEXT: v_readlane_b32 s51, v0, 7
+; GFX900-NEXT: v_readlane_b32 s50, v0, 6
+; GFX900-NEXT: v_readlane_b32 s49, v0, 5
+; GFX900-NEXT: v_readlane_b32 s48, v0, 4
+; GFX900-NEXT: v_readlane_b32 s47, v0, 3
+; GFX900-NEXT: v_readlane_b32 s46, v0, 2
; GFX900-NEXT: v_readlane_b32 s37, v0, 1
; GFX900-NEXT: v_readlane_b32 s36, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -22614,20 +22246,12 @@ define void @s_shuffle_v2i64_v8i64__12_7() {
; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
-; GFX90A-NEXT: v_writelane_b32 v0, s40, 4
-; GFX90A-NEXT: v_writelane_b32 v0, s41, 5
-; GFX90A-NEXT: v_writelane_b32 v0, s42, 6
-; GFX90A-NEXT: v_writelane_b32 v0, s43, 7
-; GFX90A-NEXT: v_writelane_b32 v0, s44, 8
-; GFX90A-NEXT: v_writelane_b32 v0, s45, 9
-; GFX90A-NEXT: v_writelane_b32 v0, s46, 10
-; GFX90A-NEXT: v_writelane_b32 v0, s47, 11
-; GFX90A-NEXT: v_writelane_b32 v0, s48, 12
-; GFX90A-NEXT: v_writelane_b32 v0, s49, 13
-; GFX90A-NEXT: v_writelane_b32 v0, s50, 14
-; GFX90A-NEXT: v_writelane_b32 v0, s51, 15
+; GFX90A-NEXT: v_writelane_b32 v0, s46, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s47, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
+; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
+; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
+; GFX90A-NEXT: v_writelane_b32 v0, s51, 7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:19]
; GFX90A-NEXT: ;;#ASMEND
@@ -22641,20 +22265,12 @@ define void @s_shuffle_v2i64_v8i64__12_7() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_readlane_b32 s51, v0, 15
-; GFX90A-NEXT: v_readlane_b32 s50, v0, 14
-; GFX90A-NEXT: v_readlane_b32 s49, v0, 13
-; GFX90A-NEXT: v_readlane_b32 s48, v0, 12
-; GFX90A-NEXT: v_readlane_b32 s47, v0, 11
-; GFX90A-NEXT: v_readlane_b32 s46, v0, 10
-; GFX90A-NEXT: v_readlane_b32 s45, v0, 9
-; GFX90A-NEXT: v_readlane_b32 s44, v0, 8
-; GFX90A-NEXT: v_readlane_b32 s43, v0, 7
-; GFX90A-NEXT: v_readlane_b32 s42, v0, 6
-; GFX90A-NEXT: v_readlane_b32 s41, v0, 5
-; GFX90A-NEXT: v_readlane_b32 s40, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s51, v0, 7
+; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
+; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
+; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
+; GFX90A-NEXT: v_readlane_b32 s47, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s46, v0, 2
; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -22695,23 +22311,15 @@ define void @s_shuffle_v2i64_v8i64__13_7() {
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_writelane_b32 v0, s36, 0
; GFX900-NEXT: v_writelane_b32 v0, s37, 1
-; GFX900-NEXT: v_writelane_b32 v0, s38, 2
-; GFX900-NEXT: v_writelane_b32 v0, s39, 3
-; GFX900-NEXT: v_writelane_b32 v0, s40, 4
-; GFX900-NEXT: v_writelane_b32 v0, s41, 5
-; GFX900-NEXT: v_writelane_b32 v0, s42, 6
-; GFX900-NEXT: v_writelane_b32 v0, s43, 7
-; GFX900-NEXT: v_writelane_b32 v0, s44, 8
-; GFX900-NEXT: v_writelane_b32 v0, s45, 9
-; GFX900-NEXT: v_writelane_b32 v0, s46, 10
-; GFX900-NEXT: v_writelane_b32 v0, s47, 11
-; GFX900-NEXT: v_writelane_b32 v0, s48, 12
-; GFX900-NEXT: v_writelane_b32 v0, s49, 13
-; GFX900-NEXT: v_writelane_b32 v0, s50, 14
+; GFX900-NEXT: v_writelane_b32 v0, s46, 2
+; GFX900-NEXT: v_writelane_b32 v0, s47, 3
+; GFX900-NEXT: v_writelane_b32 v0, s48, 4
+; GFX900-NEXT: v_writelane_b32 v0, s49, 5
+; GFX900-NEXT: v_writelane_b32 v0, s50, 6
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_writelane_b32 v0, s51, 15
+; GFX900-NEXT: v_writelane_b32 v0, s51, 7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[36:51]
; GFX900-NEXT: ;;#ASMEND
@@ -22722,20 +22330,12 @@ define void @s_shuffle_v2i64_v8i64__13_7() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_readlane_b32 s51, v0, 15
-; GFX900-NEXT: v_readlane_b32 s50, v0, 14
-; GFX900-NEXT: v_readlane_b32 s49, v0, 13
-; GFX900-NEXT: v_readlane_b32 s48, v0, 12
-; GFX900-NEXT: v_readlane_b32 s47, v0, 11
-; GFX900-NEXT: v_readlane_b32 s46, v0, 10
-; GFX900-NEXT: v_readlane_b32 s45, v0, 9
-; GFX900-NEXT: v_readlane_b32 s44, v0, 8
-; GFX900-NEXT: v_readlane_b32 s43, v0, 7
-; GFX900-NEXT: v_readlane_b32 s42, v0, 6
-; GFX900-NEXT: v_readlane_b32 s41, v0, 5
-; GFX900-NEXT: v_readlane_b32 s40, v0, 4
-; GFX900-NEXT: v_readlane_b32 s39, v0, 3
-; GFX900-NEXT: v_readlane_b32 s38, v0, 2
+; GFX900-NEXT: v_readlane_b32 s51, v0, 7
+; GFX900-NEXT: v_readlane_b32 s50, v0, 6
+; GFX900-NEXT: v_readlane_b32 s49, v0, 5
+; GFX900-NEXT: v_readlane_b32 s48, v0, 4
+; GFX900-NEXT: v_readlane_b32 s47, v0, 3
+; GFX900-NEXT: v_readlane_b32 s46, v0, 2
; GFX900-NEXT: v_readlane_b32 s37, v0, 1
; GFX900-NEXT: v_readlane_b32 s36, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -22752,23 +22352,15 @@ define void @s_shuffle_v2i64_v8i64__13_7() {
; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
-; GFX90A-NEXT: v_writelane_b32 v0, s40, 4
-; GFX90A-NEXT: v_writelane_b32 v0, s41, 5
-; GFX90A-NEXT: v_writelane_b32 v0, s42, 6
-; GFX90A-NEXT: v_writelane_b32 v0, s43, 7
-; GFX90A-NEXT: v_writelane_b32 v0, s44, 8
-; GFX90A-NEXT: v_writelane_b32 v0, s45, 9
-; GFX90A-NEXT: v_writelane_b32 v0, s46, 10
-; GFX90A-NEXT: v_writelane_b32 v0, s47, 11
-; GFX90A-NEXT: v_writelane_b32 v0, s48, 12
-; GFX90A-NEXT: v_writelane_b32 v0, s49, 13
-; GFX90A-NEXT: v_writelane_b32 v0, s50, 14
+; GFX90A-NEXT: v_writelane_b32 v0, s46, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s47, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
+; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
+; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_writelane_b32 v0, s51, 15
+; GFX90A-NEXT: v_writelane_b32 v0, s51, 7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[36:51]
; GFX90A-NEXT: ;;#ASMEND
@@ -22779,20 +22371,12 @@ define void @s_shuffle_v2i64_v8i64__13_7() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_readlane_b32 s51, v0, 15
-; GFX90A-NEXT: v_readlane_b32 s50, v0, 14
-; GFX90A-NEXT: v_readlane_b32 s49, v0, 13
-; GFX90A-NEXT: v_readlane_b32 s48, v0, 12
-; GFX90A-NEXT: v_readlane_b32 s47, v0, 11
-; GFX90A-NEXT: v_readlane_b32 s46, v0, 10
-; GFX90A-NEXT: v_readlane_b32 s45, v0, 9
-; GFX90A-NEXT: v_readlane_b32 s44, v0, 8
-; GFX90A-NEXT: v_readlane_b32 s43, v0, 7
-; GFX90A-NEXT: v_readlane_b32 s42, v0, 6
-; GFX90A-NEXT: v_readlane_b32 s41, v0, 5
-; GFX90A-NEXT: v_readlane_b32 s40, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s51, v0, 7
+; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
+; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
+; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
+; GFX90A-NEXT: v_readlane_b32 s47, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s46, v0, 2
; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -22845,20 +22429,12 @@ define void @s_shuffle_v2i64_v8i64__14_7() {
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_writelane_b32 v0, s36, 0
; GFX900-NEXT: v_writelane_b32 v0, s37, 1
-; GFX900-NEXT: v_writelane_b32 v0, s38, 2
-; GFX900-NEXT: v_writelane_b32 v0, s39, 3
-; GFX900-NEXT: v_writelane_b32 v0, s40, 4
-; GFX900-NEXT: v_writelane_b32 v0, s41, 5
-; GFX900-NEXT: v_writelane_b32 v0, s42, 6
-; GFX900-NEXT: v_writelane_b32 v0, s43, 7
-; GFX900-NEXT: v_writelane_b32 v0, s44, 8
-; GFX900-NEXT: v_writelane_b32 v0, s45, 9
-; GFX900-NEXT: v_writelane_b32 v0, s46, 10
-; GFX900-NEXT: v_writelane_b32 v0, s47, 11
-; GFX900-NEXT: v_writelane_b32 v0, s48, 12
-; GFX900-NEXT: v_writelane_b32 v0, s49, 13
-; GFX900-NEXT: v_writelane_b32 v0, s50, 14
-; GFX900-NEXT: v_writelane_b32 v0, s51, 15
+; GFX900-NEXT: v_writelane_b32 v0, s46, 2
+; GFX900-NEXT: v_writelane_b32 v0, s47, 3
+; GFX900-NEXT: v_writelane_b32 v0, s48, 4
+; GFX900-NEXT: v_writelane_b32 v0, s49, 5
+; GFX900-NEXT: v_writelane_b32 v0, s50, 6
+; GFX900-NEXT: v_writelane_b32 v0, s51, 7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:19]
; GFX900-NEXT: ;;#ASMEND
@@ -22872,20 +22448,12 @@ define void @s_shuffle_v2i64_v8i64__14_7() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_readlane_b32 s51, v0, 15
-; GFX900-NEXT: v_readlane_b32 s50, v0, 14
-; GFX900-NEXT: v_readlane_b32 s49, v0, 13
-; GFX900-NEXT: v_readlane_b32 s48, v0, 12
-; GFX900-NEXT: v_readlane_b32 s47, v0, 11
-; GFX900-NEXT: v_readlane_b32 s46, v0, 10
-; GFX900-NEXT: v_readlane_b32 s45, v0, 9
-; GFX900-NEXT: v_readlane_b32 s44, v0, 8
-; GFX900-NEXT: v_readlane_b32 s43, v0, 7
-; GFX900-NEXT: v_readlane_b32 s42, v0, 6
-; GFX900-NEXT: v_readlane_b32 s41, v0, 5
-; GFX900-NEXT: v_readlane_b32 s40, v0, 4
-; GFX900-NEXT: v_readlane_b32 s39, v0, 3
-; GFX900-NEXT: v_readlane_b32 s38, v0, 2
+; GFX900-NEXT: v_readlane_b32 s51, v0, 7
+; GFX900-NEXT: v_readlane_b32 s50, v0, 6
+; GFX900-NEXT: v_readlane_b32 s49, v0, 5
+; GFX900-NEXT: v_readlane_b32 s48, v0, 4
+; GFX900-NEXT: v_readlane_b32 s47, v0, 3
+; GFX900-NEXT: v_readlane_b32 s46, v0, 2
; GFX900-NEXT: v_readlane_b32 s37, v0, 1
; GFX900-NEXT: v_readlane_b32 s36, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -22902,20 +22470,12 @@ define void @s_shuffle_v2i64_v8i64__14_7() {
; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
-; GFX90A-NEXT: v_writelane_b32 v0, s40, 4
-; GFX90A-NEXT: v_writelane_b32 v0, s41, 5
-; GFX90A-NEXT: v_writelane_b32 v0, s42, 6
-; GFX90A-NEXT: v_writelane_b32 v0, s43, 7
-; GFX90A-NEXT: v_writelane_b32 v0, s44, 8
-; GFX90A-NEXT: v_writelane_b32 v0, s45, 9
-; GFX90A-NEXT: v_writelane_b32 v0, s46, 10
-; GFX90A-NEXT: v_writelane_b32 v0, s47, 11
-; GFX90A-NEXT: v_writelane_b32 v0, s48, 12
-; GFX90A-NEXT: v_writelane_b32 v0, s49, 13
-; GFX90A-NEXT: v_writelane_b32 v0, s50, 14
-; GFX90A-NEXT: v_writelane_b32 v0, s51, 15
+; GFX90A-NEXT: v_writelane_b32 v0, s46, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s47, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
+; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
+; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
+; GFX90A-NEXT: v_writelane_b32 v0, s51, 7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:19]
; GFX90A-NEXT: ;;#ASMEND
@@ -22929,20 +22489,12 @@ define void @s_shuffle_v2i64_v8i64__14_7() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_readlane_b32 s51, v0, 15
-; GFX90A-NEXT: v_readlane_b32 s50, v0, 14
-; GFX90A-NEXT: v_readlane_b32 s49, v0, 13
-; GFX90A-NEXT: v_readlane_b32 s48, v0, 12
-; GFX90A-NEXT: v_readlane_b32 s47, v0, 11
-; GFX90A-NEXT: v_readlane_b32 s46, v0, 10
-; GFX90A-NEXT: v_readlane_b32 s45, v0, 9
-; GFX90A-NEXT: v_readlane_b32 s44, v0, 8
-; GFX90A-NEXT: v_readlane_b32 s43, v0, 7
-; GFX90A-NEXT: v_readlane_b32 s42, v0, 6
-; GFX90A-NEXT: v_readlane_b32 s41, v0, 5
-; GFX90A-NEXT: v_readlane_b32 s40, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s51, v0, 7
+; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
+; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
+; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
+; GFX90A-NEXT: v_readlane_b32 s47, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s46, v0, 2
; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -23859,20 +23411,12 @@ define void @s_shuffle_v2i64_v8i64__3_9() {
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_writelane_b32 v0, s36, 0
; GFX900-NEXT: v_writelane_b32 v0, s37, 1
-; GFX900-NEXT: v_writelane_b32 v0, s38, 2
-; GFX900-NEXT: v_writelane_b32 v0, s39, 3
-; GFX900-NEXT: v_writelane_b32 v0, s40, 4
-; GFX900-NEXT: v_writelane_b32 v0, s41, 5
-; GFX900-NEXT: v_writelane_b32 v0, s42, 6
-; GFX900-NEXT: v_writelane_b32 v0, s43, 7
-; GFX900-NEXT: v_writelane_b32 v0, s44, 8
-; GFX900-NEXT: v_writelane_b32 v0, s45, 9
-; GFX900-NEXT: v_writelane_b32 v0, s46, 10
-; GFX900-NEXT: v_writelane_b32 v0, s47, 11
-; GFX900-NEXT: v_writelane_b32 v0, s48, 12
-; GFX900-NEXT: v_writelane_b32 v0, s49, 13
-; GFX900-NEXT: v_writelane_b32 v0, s50, 14
-; GFX900-NEXT: v_writelane_b32 v0, s51, 15
+; GFX900-NEXT: v_writelane_b32 v0, s46, 2
+; GFX900-NEXT: v_writelane_b32 v0, s47, 3
+; GFX900-NEXT: v_writelane_b32 v0, s48, 4
+; GFX900-NEXT: v_writelane_b32 v0, s49, 5
+; GFX900-NEXT: v_writelane_b32 v0, s50, 6
+; GFX900-NEXT: v_writelane_b32 v0, s51, 7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[36:51]
; GFX900-NEXT: ;;#ASMEND
@@ -23884,20 +23428,12 @@ define void @s_shuffle_v2i64_v8i64__3_9() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_readlane_b32 s51, v0, 15
-; GFX900-NEXT: v_readlane_b32 s50, v0, 14
-; GFX900-NEXT: v_readlane_b32 s49, v0, 13
-; GFX900-NEXT: v_readlane_b32 s48, v0, 12
-; GFX900-NEXT: v_readlane_b32 s47, v0, 11
-; GFX900-NEXT: v_readlane_b32 s46, v0, 10
-; GFX900-NEXT: v_readlane_b32 s45, v0, 9
-; GFX900-NEXT: v_readlane_b32 s44, v0, 8
-; GFX900-NEXT: v_readlane_b32 s43, v0, 7
-; GFX900-NEXT: v_readlane_b32 s42, v0, 6
-; GFX900-NEXT: v_readlane_b32 s41, v0, 5
-; GFX900-NEXT: v_readlane_b32 s40, v0, 4
-; GFX900-NEXT: v_readlane_b32 s39, v0, 3
-; GFX900-NEXT: v_readlane_b32 s38, v0, 2
+; GFX900-NEXT: v_readlane_b32 s51, v0, 7
+; GFX900-NEXT: v_readlane_b32 s50, v0, 6
+; GFX900-NEXT: v_readlane_b32 s49, v0, 5
+; GFX900-NEXT: v_readlane_b32 s48, v0, 4
+; GFX900-NEXT: v_readlane_b32 s47, v0, 3
+; GFX900-NEXT: v_readlane_b32 s46, v0, 2
; GFX900-NEXT: v_readlane_b32 s37, v0, 1
; GFX900-NEXT: v_readlane_b32 s36, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -23914,20 +23450,12 @@ define void @s_shuffle_v2i64_v8i64__3_9() {
; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
-; GFX90A-NEXT: v_writelane_b32 v0, s40, 4
-; GFX90A-NEXT: v_writelane_b32 v0, s41, 5
-; GFX90A-NEXT: v_writelane_b32 v0, s42, 6
-; GFX90A-NEXT: v_writelane_b32 v0, s43, 7
-; GFX90A-NEXT: v_writelane_b32 v0, s44, 8
-; GFX90A-NEXT: v_writelane_b32 v0, s45, 9
-; GFX90A-NEXT: v_writelane_b32 v0, s46, 10
-; GFX90A-NEXT: v_writelane_b32 v0, s47, 11
-; GFX90A-NEXT: v_writelane_b32 v0, s48, 12
-; GFX90A-NEXT: v_writelane_b32 v0, s49, 13
-; GFX90A-NEXT: v_writelane_b32 v0, s50, 14
-; GFX90A-NEXT: v_writelane_b32 v0, s51, 15
+; GFX90A-NEXT: v_writelane_b32 v0, s46, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s47, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
+; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
+; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
+; GFX90A-NEXT: v_writelane_b32 v0, s51, 7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[36:51]
; GFX90A-NEXT: ;;#ASMEND
@@ -23939,20 +23467,12 @@ define void @s_shuffle_v2i64_v8i64__3_9() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_readlane_b32 s51, v0, 15
-; GFX90A-NEXT: v_readlane_b32 s50, v0, 14
-; GFX90A-NEXT: v_readlane_b32 s49, v0, 13
-; GFX90A-NEXT: v_readlane_b32 s48, v0, 12
-; GFX90A-NEXT: v_readlane_b32 s47, v0, 11
-; GFX90A-NEXT: v_readlane_b32 s46, v0, 10
-; GFX90A-NEXT: v_readlane_b32 s45, v0, 9
-; GFX90A-NEXT: v_readlane_b32 s44, v0, 8
-; GFX90A-NEXT: v_readlane_b32 s43, v0, 7
-; GFX90A-NEXT: v_readlane_b32 s42, v0, 6
-; GFX90A-NEXT: v_readlane_b32 s41, v0, 5
-; GFX90A-NEXT: v_readlane_b32 s40, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s51, v0, 7
+; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
+; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
+; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
+; GFX90A-NEXT: v_readlane_b32 s47, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s46, v0, 2
; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -24164,23 +23684,15 @@ define void @s_shuffle_v2i64_v8i64__6_9() {
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_writelane_b32 v0, s36, 0
; GFX900-NEXT: v_writelane_b32 v0, s37, 1
-; GFX900-NEXT: v_writelane_b32 v0, s38, 2
-; GFX900-NEXT: v_writelane_b32 v0, s39, 3
-; GFX900-NEXT: v_writelane_b32 v0, s40, 4
-; GFX900-NEXT: v_writelane_b32 v0, s41, 5
-; GFX900-NEXT: v_writelane_b32 v0, s42, 6
-; GFX900-NEXT: v_writelane_b32 v0, s43, 7
-; GFX900-NEXT: v_writelane_b32 v0, s44, 8
-; GFX900-NEXT: v_writelane_b32 v0, s45, 9
-; GFX900-NEXT: v_writelane_b32 v0, s46, 10
-; GFX900-NEXT: v_writelane_b32 v0, s47, 11
-; GFX900-NEXT: v_writelane_b32 v0, s48, 12
-; GFX900-NEXT: v_writelane_b32 v0, s49, 13
-; GFX900-NEXT: v_writelane_b32 v0, s50, 14
+; GFX900-NEXT: v_writelane_b32 v0, s46, 2
+; GFX900-NEXT: v_writelane_b32 v0, s47, 3
+; GFX900-NEXT: v_writelane_b32 v0, s48, 4
+; GFX900-NEXT: v_writelane_b32 v0, s49, 5
+; GFX900-NEXT: v_writelane_b32 v0, s50, 6
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_writelane_b32 v0, s51, 15
+; GFX900-NEXT: v_writelane_b32 v0, s51, 7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[36:51]
; GFX900-NEXT: ;;#ASMEND
@@ -24191,20 +23703,12 @@ define void @s_shuffle_v2i64_v8i64__6_9() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_readlane_b32 s51, v0, 15
-; GFX900-NEXT: v_readlane_b32 s50, v0, 14
-; GFX900-NEXT: v_readlane_b32 s49, v0, 13
-; GFX900-NEXT: v_readlane_b32 s48, v0, 12
-; GFX900-NEXT: v_readlane_b32 s47, v0, 11
-; GFX900-NEXT: v_readlane_b32 s46, v0, 10
-; GFX900-NEXT: v_readlane_b32 s45, v0, 9
-; GFX900-NEXT: v_readlane_b32 s44, v0, 8
-; GFX900-NEXT: v_readlane_b32 s43, v0, 7
-; GFX900-NEXT: v_readlane_b32 s42, v0, 6
-; GFX900-NEXT: v_readlane_b32 s41, v0, 5
-; GFX900-NEXT: v_readlane_b32 s40, v0, 4
-; GFX900-NEXT: v_readlane_b32 s39, v0, 3
-; GFX900-NEXT: v_readlane_b32 s38, v0, 2
+; GFX900-NEXT: v_readlane_b32 s51, v0, 7
+; GFX900-NEXT: v_readlane_b32 s50, v0, 6
+; GFX900-NEXT: v_readlane_b32 s49, v0, 5
+; GFX900-NEXT: v_readlane_b32 s48, v0, 4
+; GFX900-NEXT: v_readlane_b32 s47, v0, 3
+; GFX900-NEXT: v_readlane_b32 s46, v0, 2
; GFX900-NEXT: v_readlane_b32 s37, v0, 1
; GFX900-NEXT: v_readlane_b32 s36, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -24221,23 +23725,15 @@ define void @s_shuffle_v2i64_v8i64__6_9() {
; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
-; GFX90A-NEXT: v_writelane_b32 v0, s40, 4
-; GFX90A-NEXT: v_writelane_b32 v0, s41, 5
-; GFX90A-NEXT: v_writelane_b32 v0, s42, 6
-; GFX90A-NEXT: v_writelane_b32 v0, s43, 7
-; GFX90A-NEXT: v_writelane_b32 v0, s44, 8
-; GFX90A-NEXT: v_writelane_b32 v0, s45, 9
-; GFX90A-NEXT: v_writelane_b32 v0, s46, 10
-; GFX90A-NEXT: v_writelane_b32 v0, s47, 11
-; GFX90A-NEXT: v_writelane_b32 v0, s48, 12
-; GFX90A-NEXT: v_writelane_b32 v0, s49, 13
-; GFX90A-NEXT: v_writelane_b32 v0, s50, 14
+; GFX90A-NEXT: v_writelane_b32 v0, s46, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s47, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
+; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
+; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_writelane_b32 v0, s51, 15
+; GFX90A-NEXT: v_writelane_b32 v0, s51, 7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[36:51]
; GFX90A-NEXT: ;;#ASMEND
@@ -24248,20 +23744,12 @@ define void @s_shuffle_v2i64_v8i64__6_9() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_readlane_b32 s51, v0, 15
-; GFX90A-NEXT: v_readlane_b32 s50, v0, 14
-; GFX90A-NEXT: v_readlane_b32 s49, v0, 13
-; GFX90A-NEXT: v_readlane_b32 s48, v0, 12
-; GFX90A-NEXT: v_readlane_b32 s47, v0, 11
-; GFX90A-NEXT: v_readlane_b32 s46, v0, 10
-; GFX90A-NEXT: v_readlane_b32 s45, v0, 9
-; GFX90A-NEXT: v_readlane_b32 s44, v0, 8
-; GFX90A-NEXT: v_readlane_b32 s43, v0, 7
-; GFX90A-NEXT: v_readlane_b32 s42, v0, 6
-; GFX90A-NEXT: v_readlane_b32 s41, v0, 5
-; GFX90A-NEXT: v_readlane_b32 s40, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s51, v0, 7
+; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
+; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
+; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
+; GFX90A-NEXT: v_readlane_b32 s47, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s46, v0, 2
; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -25040,23 +24528,15 @@ define void @s_shuffle_v2i64_v8i64__6_10() {
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_writelane_b32 v0, s36, 0
; GFX900-NEXT: v_writelane_b32 v0, s37, 1
-; GFX900-NEXT: v_writelane_b32 v0, s38, 2
-; GFX900-NEXT: v_writelane_b32 v0, s39, 3
-; GFX900-NEXT: v_writelane_b32 v0, s40, 4
-; GFX900-NEXT: v_writelane_b32 v0, s41, 5
-; GFX900-NEXT: v_writelane_b32 v0, s42, 6
-; GFX900-NEXT: v_writelane_b32 v0, s43, 7
-; GFX900-NEXT: v_writelane_b32 v0, s44, 8
-; GFX900-NEXT: v_writelane_b32 v0, s45, 9
-; GFX900-NEXT: v_writelane_b32 v0, s46, 10
-; GFX900-NEXT: v_writelane_b32 v0, s47, 11
-; GFX900-NEXT: v_writelane_b32 v0, s48, 12
-; GFX900-NEXT: v_writelane_b32 v0, s49, 13
-; GFX900-NEXT: v_writelane_b32 v0, s50, 14
+; GFX900-NEXT: v_writelane_b32 v0, s46, 2
+; GFX900-NEXT: v_writelane_b32 v0, s47, 3
+; GFX900-NEXT: v_writelane_b32 v0, s48, 4
+; GFX900-NEXT: v_writelane_b32 v0, s49, 5
+; GFX900-NEXT: v_writelane_b32 v0, s50, 6
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_writelane_b32 v0, s51, 15
+; GFX900-NEXT: v_writelane_b32 v0, s51, 7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[36:51]
; GFX900-NEXT: ;;#ASMEND
@@ -25067,20 +24547,12 @@ define void @s_shuffle_v2i64_v8i64__6_10() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_readlane_b32 s51, v0, 15
-; GFX900-NEXT: v_readlane_b32 s50, v0, 14
-; GFX900-NEXT: v_readlane_b32 s49, v0, 13
-; GFX900-NEXT: v_readlane_b32 s48, v0, 12
-; GFX900-NEXT: v_readlane_b32 s47, v0, 11
-; GFX900-NEXT: v_readlane_b32 s46, v0, 10
-; GFX900-NEXT: v_readlane_b32 s45, v0, 9
-; GFX900-NEXT: v_readlane_b32 s44, v0, 8
-; GFX900-NEXT: v_readlane_b32 s43, v0, 7
-; GFX900-NEXT: v_readlane_b32 s42, v0, 6
-; GFX900-NEXT: v_readlane_b32 s41, v0, 5
-; GFX900-NEXT: v_readlane_b32 s40, v0, 4
-; GFX900-NEXT: v_readlane_b32 s39, v0, 3
-; GFX900-NEXT: v_readlane_b32 s38, v0, 2
+; GFX900-NEXT: v_readlane_b32 s51, v0, 7
+; GFX900-NEXT: v_readlane_b32 s50, v0, 6
+; GFX900-NEXT: v_readlane_b32 s49, v0, 5
+; GFX900-NEXT: v_readlane_b32 s48, v0, 4
+; GFX900-NEXT: v_readlane_b32 s47, v0, 3
+; GFX900-NEXT: v_readlane_b32 s46, v0, 2
; GFX900-NEXT: v_readlane_b32 s37, v0, 1
; GFX900-NEXT: v_readlane_b32 s36, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -25097,23 +24569,15 @@ define void @s_shuffle_v2i64_v8i64__6_10() {
; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
-; GFX90A-NEXT: v_writelane_b32 v0, s40, 4
-; GFX90A-NEXT: v_writelane_b32 v0, s41, 5
-; GFX90A-NEXT: v_writelane_b32 v0, s42, 6
-; GFX90A-NEXT: v_writelane_b32 v0, s43, 7
-; GFX90A-NEXT: v_writelane_b32 v0, s44, 8
-; GFX90A-NEXT: v_writelane_b32 v0, s45, 9
-; GFX90A-NEXT: v_writelane_b32 v0, s46, 10
-; GFX90A-NEXT: v_writelane_b32 v0, s47, 11
-; GFX90A-NEXT: v_writelane_b32 v0, s48, 12
-; GFX90A-NEXT: v_writelane_b32 v0, s49, 13
-; GFX90A-NEXT: v_writelane_b32 v0, s50, 14
+; GFX90A-NEXT: v_writelane_b32 v0, s46, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s47, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
+; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
+; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_writelane_b32 v0, s51, 15
+; GFX90A-NEXT: v_writelane_b32 v0, s51, 7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[36:51]
; GFX90A-NEXT: ;;#ASMEND
@@ -25124,20 +24588,12 @@ define void @s_shuffle_v2i64_v8i64__6_10() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_readlane_b32 s51, v0, 15
-; GFX90A-NEXT: v_readlane_b32 s50, v0, 14
-; GFX90A-NEXT: v_readlane_b32 s49, v0, 13
-; GFX90A-NEXT: v_readlane_b32 s48, v0, 12
-; GFX90A-NEXT: v_readlane_b32 s47, v0, 11
-; GFX90A-NEXT: v_readlane_b32 s46, v0, 10
-; GFX90A-NEXT: v_readlane_b32 s45, v0, 9
-; GFX90A-NEXT: v_readlane_b32 s44, v0, 8
-; GFX90A-NEXT: v_readlane_b32 s43, v0, 7
-; GFX90A-NEXT: v_readlane_b32 s42, v0, 6
-; GFX90A-NEXT: v_readlane_b32 s41, v0, 5
-; GFX90A-NEXT: v_readlane_b32 s40, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s51, v0, 7
+; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
+; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
+; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
+; GFX90A-NEXT: v_readlane_b32 s47, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s46, v0, 2
; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -25190,20 +24646,12 @@ define void @s_shuffle_v2i64_v8i64__7_10() {
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_writelane_b32 v0, s36, 0
; GFX900-NEXT: v_writelane_b32 v0, s37, 1
-; GFX900-NEXT: v_writelane_b32 v0, s38, 2
-; GFX900-NEXT: v_writelane_b32 v0, s39, 3
-; GFX900-NEXT: v_writelane_b32 v0, s40, 4
-; GFX900-NEXT: v_writelane_b32 v0, s41, 5
-; GFX900-NEXT: v_writelane_b32 v0, s42, 6
-; GFX900-NEXT: v_writelane_b32 v0, s43, 7
-; GFX900-NEXT: v_writelane_b32 v0, s44, 8
-; GFX900-NEXT: v_writelane_b32 v0, s45, 9
-; GFX900-NEXT: v_writelane_b32 v0, s46, 10
-; GFX900-NEXT: v_writelane_b32 v0, s47, 11
-; GFX900-NEXT: v_writelane_b32 v0, s48, 12
-; GFX900-NEXT: v_writelane_b32 v0, s49, 13
-; GFX900-NEXT: v_writelane_b32 v0, s50, 14
-; GFX900-NEXT: v_writelane_b32 v0, s51, 15
+; GFX900-NEXT: v_writelane_b32 v0, s46, 2
+; GFX900-NEXT: v_writelane_b32 v0, s47, 3
+; GFX900-NEXT: v_writelane_b32 v0, s48, 4
+; GFX900-NEXT: v_writelane_b32 v0, s49, 5
+; GFX900-NEXT: v_writelane_b32 v0, s50, 6
+; GFX900-NEXT: v_writelane_b32 v0, s51, 7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:19]
; GFX900-NEXT: ;;#ASMEND
@@ -25217,20 +24665,12 @@ define void @s_shuffle_v2i64_v8i64__7_10() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_readlane_b32 s51, v0, 15
-; GFX900-NEXT: v_readlane_b32 s50, v0, 14
-; GFX900-NEXT: v_readlane_b32 s49, v0, 13
-; GFX900-NEXT: v_readlane_b32 s48, v0, 12
-; GFX900-NEXT: v_readlane_b32 s47, v0, 11
-; GFX900-NEXT: v_readlane_b32 s46, v0, 10
-; GFX900-NEXT: v_readlane_b32 s45, v0, 9
-; GFX900-NEXT: v_readlane_b32 s44, v0, 8
-; GFX900-NEXT: v_readlane_b32 s43, v0, 7
-; GFX900-NEXT: v_readlane_b32 s42, v0, 6
-; GFX900-NEXT: v_readlane_b32 s41, v0, 5
-; GFX900-NEXT: v_readlane_b32 s40, v0, 4
-; GFX900-NEXT: v_readlane_b32 s39, v0, 3
-; GFX900-NEXT: v_readlane_b32 s38, v0, 2
+; GFX900-NEXT: v_readlane_b32 s51, v0, 7
+; GFX900-NEXT: v_readlane_b32 s50, v0, 6
+; GFX900-NEXT: v_readlane_b32 s49, v0, 5
+; GFX900-NEXT: v_readlane_b32 s48, v0, 4
+; GFX900-NEXT: v_readlane_b32 s47, v0, 3
+; GFX900-NEXT: v_readlane_b32 s46, v0, 2
; GFX900-NEXT: v_readlane_b32 s37, v0, 1
; GFX900-NEXT: v_readlane_b32 s36, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -25247,20 +24687,12 @@ define void @s_shuffle_v2i64_v8i64__7_10() {
; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
-; GFX90A-NEXT: v_writelane_b32 v0, s40, 4
-; GFX90A-NEXT: v_writelane_b32 v0, s41, 5
-; GFX90A-NEXT: v_writelane_b32 v0, s42, 6
-; GFX90A-NEXT: v_writelane_b32 v0, s43, 7
-; GFX90A-NEXT: v_writelane_b32 v0, s44, 8
-; GFX90A-NEXT: v_writelane_b32 v0, s45, 9
-; GFX90A-NEXT: v_writelane_b32 v0, s46, 10
-; GFX90A-NEXT: v_writelane_b32 v0, s47, 11
-; GFX90A-NEXT: v_writelane_b32 v0, s48, 12
-; GFX90A-NEXT: v_writelane_b32 v0, s49, 13
-; GFX90A-NEXT: v_writelane_b32 v0, s50, 14
-; GFX90A-NEXT: v_writelane_b32 v0, s51, 15
+; GFX90A-NEXT: v_writelane_b32 v0, s46, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s47, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
+; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
+; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
+; GFX90A-NEXT: v_writelane_b32 v0, s51, 7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:19]
; GFX90A-NEXT: ;;#ASMEND
@@ -25274,20 +24706,12 @@ define void @s_shuffle_v2i64_v8i64__7_10() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_readlane_b32 s51, v0, 15
-; GFX90A-NEXT: v_readlane_b32 s50, v0, 14
-; GFX90A-NEXT: v_readlane_b32 s49, v0, 13
-; GFX90A-NEXT: v_readlane_b32 s48, v0, 12
-; GFX90A-NEXT: v_readlane_b32 s47, v0, 11
-; GFX90A-NEXT: v_readlane_b32 s46, v0, 10
-; GFX90A-NEXT: v_readlane_b32 s45, v0, 9
-; GFX90A-NEXT: v_readlane_b32 s44, v0, 8
-; GFX90A-NEXT: v_readlane_b32 s43, v0, 7
-; GFX90A-NEXT: v_readlane_b32 s42, v0, 6
-; GFX90A-NEXT: v_readlane_b32 s41, v0, 5
-; GFX90A-NEXT: v_readlane_b32 s40, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s51, v0, 7
+; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
+; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
+; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
+; GFX90A-NEXT: v_readlane_b32 s47, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s46, v0, 2
; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -25735,20 +25159,12 @@ define void @s_shuffle_v2i64_v8i64__1_11() {
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_writelane_b32 v0, s36, 0
; GFX900-NEXT: v_writelane_b32 v0, s37, 1
-; GFX900-NEXT: v_writelane_b32 v0, s38, 2
-; GFX900-NEXT: v_writelane_b32 v0, s39, 3
-; GFX900-NEXT: v_writelane_b32 v0, s40, 4
-; GFX900-NEXT: v_writelane_b32 v0, s41, 5
-; GFX900-NEXT: v_writelane_b32 v0, s42, 6
-; GFX900-NEXT: v_writelane_b32 v0, s43, 7
-; GFX900-NEXT: v_writelane_b32 v0, s44, 8
-; GFX900-NEXT: v_writelane_b32 v0, s45, 9
-; GFX900-NEXT: v_writelane_b32 v0, s46, 10
-; GFX900-NEXT: v_writelane_b32 v0, s47, 11
-; GFX900-NEXT: v_writelane_b32 v0, s48, 12
-; GFX900-NEXT: v_writelane_b32 v0, s49, 13
-; GFX900-NEXT: v_writelane_b32 v0, s50, 14
-; GFX900-NEXT: v_writelane_b32 v0, s51, 15
+; GFX900-NEXT: v_writelane_b32 v0, s46, 2
+; GFX900-NEXT: v_writelane_b32 v0, s47, 3
+; GFX900-NEXT: v_writelane_b32 v0, s48, 4
+; GFX900-NEXT: v_writelane_b32 v0, s49, 5
+; GFX900-NEXT: v_writelane_b32 v0, s50, 6
+; GFX900-NEXT: v_writelane_b32 v0, s51, 7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[36:51]
; GFX900-NEXT: ;;#ASMEND
@@ -25760,20 +25176,12 @@ define void @s_shuffle_v2i64_v8i64__1_11() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_readlane_b32 s51, v0, 15
-; GFX900-NEXT: v_readlane_b32 s50, v0, 14
-; GFX900-NEXT: v_readlane_b32 s49, v0, 13
-; GFX900-NEXT: v_readlane_b32 s48, v0, 12
-; GFX900-NEXT: v_readlane_b32 s47, v0, 11
-; GFX900-NEXT: v_readlane_b32 s46, v0, 10
-; GFX900-NEXT: v_readlane_b32 s45, v0, 9
-; GFX900-NEXT: v_readlane_b32 s44, v0, 8
-; GFX900-NEXT: v_readlane_b32 s43, v0, 7
-; GFX900-NEXT: v_readlane_b32 s42, v0, 6
-; GFX900-NEXT: v_readlane_b32 s41, v0, 5
-; GFX900-NEXT: v_readlane_b32 s40, v0, 4
-; GFX900-NEXT: v_readlane_b32 s39, v0, 3
-; GFX900-NEXT: v_readlane_b32 s38, v0, 2
+; GFX900-NEXT: v_readlane_b32 s51, v0, 7
+; GFX900-NEXT: v_readlane_b32 s50, v0, 6
+; GFX900-NEXT: v_readlane_b32 s49, v0, 5
+; GFX900-NEXT: v_readlane_b32 s48, v0, 4
+; GFX900-NEXT: v_readlane_b32 s47, v0, 3
+; GFX900-NEXT: v_readlane_b32 s46, v0, 2
; GFX900-NEXT: v_readlane_b32 s37, v0, 1
; GFX900-NEXT: v_readlane_b32 s36, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -25790,20 +25198,12 @@ define void @s_shuffle_v2i64_v8i64__1_11() {
; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
-; GFX90A-NEXT: v_writelane_b32 v0, s40, 4
-; GFX90A-NEXT: v_writelane_b32 v0, s41, 5
-; GFX90A-NEXT: v_writelane_b32 v0, s42, 6
-; GFX90A-NEXT: v_writelane_b32 v0, s43, 7
-; GFX90A-NEXT: v_writelane_b32 v0, s44, 8
-; GFX90A-NEXT: v_writelane_b32 v0, s45, 9
-; GFX90A-NEXT: v_writelane_b32 v0, s46, 10
-; GFX90A-NEXT: v_writelane_b32 v0, s47, 11
-; GFX90A-NEXT: v_writelane_b32 v0, s48, 12
-; GFX90A-NEXT: v_writelane_b32 v0, s49, 13
-; GFX90A-NEXT: v_writelane_b32 v0, s50, 14
-; GFX90A-NEXT: v_writelane_b32 v0, s51, 15
+; GFX90A-NEXT: v_writelane_b32 v0, s46, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s47, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
+; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
+; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
+; GFX90A-NEXT: v_writelane_b32 v0, s51, 7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[36:51]
; GFX90A-NEXT: ;;#ASMEND
@@ -25815,20 +25215,12 @@ define void @s_shuffle_v2i64_v8i64__1_11() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_readlane_b32 s51, v0, 15
-; GFX90A-NEXT: v_readlane_b32 s50, v0, 14
-; GFX90A-NEXT: v_readlane_b32 s49, v0, 13
-; GFX90A-NEXT: v_readlane_b32 s48, v0, 12
-; GFX90A-NEXT: v_readlane_b32 s47, v0, 11
-; GFX90A-NEXT: v_readlane_b32 s46, v0, 10
-; GFX90A-NEXT: v_readlane_b32 s45, v0, 9
-; GFX90A-NEXT: v_readlane_b32 s44, v0, 8
-; GFX90A-NEXT: v_readlane_b32 s43, v0, 7
-; GFX90A-NEXT: v_readlane_b32 s42, v0, 6
-; GFX90A-NEXT: v_readlane_b32 s41, v0, 5
-; GFX90A-NEXT: v_readlane_b32 s40, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s51, v0, 7
+; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
+; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
+; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
+; GFX90A-NEXT: v_readlane_b32 s47, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s46, v0, 2
; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -26152,23 +25544,15 @@ define void @s_shuffle_v2i64_v8i64__6_11() {
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_writelane_b32 v0, s36, 0
; GFX900-NEXT: v_writelane_b32 v0, s37, 1
-; GFX900-NEXT: v_writelane_b32 v0, s38, 2
-; GFX900-NEXT: v_writelane_b32 v0, s39, 3
-; GFX900-NEXT: v_writelane_b32 v0, s40, 4
-; GFX900-NEXT: v_writelane_b32 v0, s41, 5
-; GFX900-NEXT: v_writelane_b32 v0, s42, 6
-; GFX900-NEXT: v_writelane_b32 v0, s43, 7
-; GFX900-NEXT: v_writelane_b32 v0, s44, 8
-; GFX900-NEXT: v_writelane_b32 v0, s45, 9
-; GFX900-NEXT: v_writelane_b32 v0, s46, 10
-; GFX900-NEXT: v_writelane_b32 v0, s47, 11
-; GFX900-NEXT: v_writelane_b32 v0, s48, 12
-; GFX900-NEXT: v_writelane_b32 v0, s49, 13
-; GFX900-NEXT: v_writelane_b32 v0, s50, 14
+; GFX900-NEXT: v_writelane_b32 v0, s46, 2
+; GFX900-NEXT: v_writelane_b32 v0, s47, 3
+; GFX900-NEXT: v_writelane_b32 v0, s48, 4
+; GFX900-NEXT: v_writelane_b32 v0, s49, 5
+; GFX900-NEXT: v_writelane_b32 v0, s50, 6
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_writelane_b32 v0, s51, 15
+; GFX900-NEXT: v_writelane_b32 v0, s51, 7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[36:51]
; GFX900-NEXT: ;;#ASMEND
@@ -26179,20 +25563,12 @@ define void @s_shuffle_v2i64_v8i64__6_11() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_readlane_b32 s51, v0, 15
-; GFX900-NEXT: v_readlane_b32 s50, v0, 14
-; GFX900-NEXT: v_readlane_b32 s49, v0, 13
-; GFX900-NEXT: v_readlane_b32 s48, v0, 12
-; GFX900-NEXT: v_readlane_b32 s47, v0, 11
-; GFX900-NEXT: v_readlane_b32 s46, v0, 10
-; GFX900-NEXT: v_readlane_b32 s45, v0, 9
-; GFX900-NEXT: v_readlane_b32 s44, v0, 8
-; GFX900-NEXT: v_readlane_b32 s43, v0, 7
-; GFX900-NEXT: v_readlane_b32 s42, v0, 6
-; GFX900-NEXT: v_readlane_b32 s41, v0, 5
-; GFX900-NEXT: v_readlane_b32 s40, v0, 4
-; GFX900-NEXT: v_readlane_b32 s39, v0, 3
-; GFX900-NEXT: v_readlane_b32 s38, v0, 2
+; GFX900-NEXT: v_readlane_b32 s51, v0, 7
+; GFX900-NEXT: v_readlane_b32 s50, v0, 6
+; GFX900-NEXT: v_readlane_b32 s49, v0, 5
+; GFX900-NEXT: v_readlane_b32 s48, v0, 4
+; GFX900-NEXT: v_readlane_b32 s47, v0, 3
+; GFX900-NEXT: v_readlane_b32 s46, v0, 2
; GFX900-NEXT: v_readlane_b32 s37, v0, 1
; GFX900-NEXT: v_readlane_b32 s36, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -26209,23 +25585,15 @@ define void @s_shuffle_v2i64_v8i64__6_11() {
; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
-; GFX90A-NEXT: v_writelane_b32 v0, s40, 4
-; GFX90A-NEXT: v_writelane_b32 v0, s41, 5
-; GFX90A-NEXT: v_writelane_b32 v0, s42, 6
-; GFX90A-NEXT: v_writelane_b32 v0, s43, 7
-; GFX90A-NEXT: v_writelane_b32 v0, s44, 8
-; GFX90A-NEXT: v_writelane_b32 v0, s45, 9
-; GFX90A-NEXT: v_writelane_b32 v0, s46, 10
-; GFX90A-NEXT: v_writelane_b32 v0, s47, 11
-; GFX90A-NEXT: v_writelane_b32 v0, s48, 12
-; GFX90A-NEXT: v_writelane_b32 v0, s49, 13
-; GFX90A-NEXT: v_writelane_b32 v0, s50, 14
+; GFX90A-NEXT: v_writelane_b32 v0, s46, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s47, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
+; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
+; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_writelane_b32 v0, s51, 15
+; GFX90A-NEXT: v_writelane_b32 v0, s51, 7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[36:51]
; GFX90A-NEXT: ;;#ASMEND
@@ -26236,20 +25604,12 @@ define void @s_shuffle_v2i64_v8i64__6_11() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_readlane_b32 s51, v0, 15
-; GFX90A-NEXT: v_readlane_b32 s50, v0, 14
-; GFX90A-NEXT: v_readlane_b32 s49, v0, 13
-; GFX90A-NEXT: v_readlane_b32 s48, v0, 12
-; GFX90A-NEXT: v_readlane_b32 s47, v0, 11
-; GFX90A-NEXT: v_readlane_b32 s46, v0, 10
-; GFX90A-NEXT: v_readlane_b32 s45, v0, 9
-; GFX90A-NEXT: v_readlane_b32 s44, v0, 8
-; GFX90A-NEXT: v_readlane_b32 s43, v0, 7
-; GFX90A-NEXT: v_readlane_b32 s42, v0, 6
-; GFX90A-NEXT: v_readlane_b32 s41, v0, 5
-; GFX90A-NEXT: v_readlane_b32 s40, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s51, v0, 7
+; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
+; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
+; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
+; GFX90A-NEXT: v_readlane_b32 s47, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s46, v0, 2
; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -27028,23 +26388,15 @@ define void @s_shuffle_v2i64_v8i64__6_12() {
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_writelane_b32 v0, s36, 0
; GFX900-NEXT: v_writelane_b32 v0, s37, 1
-; GFX900-NEXT: v_writelane_b32 v0, s38, 2
-; GFX900-NEXT: v_writelane_b32 v0, s39, 3
-; GFX900-NEXT: v_writelane_b32 v0, s40, 4
-; GFX900-NEXT: v_writelane_b32 v0, s41, 5
-; GFX900-NEXT: v_writelane_b32 v0, s42, 6
-; GFX900-NEXT: v_writelane_b32 v0, s43, 7
-; GFX900-NEXT: v_writelane_b32 v0, s44, 8
-; GFX900-NEXT: v_writelane_b32 v0, s45, 9
-; GFX900-NEXT: v_writelane_b32 v0, s46, 10
-; GFX900-NEXT: v_writelane_b32 v0, s47, 11
-; GFX900-NEXT: v_writelane_b32 v0, s48, 12
-; GFX900-NEXT: v_writelane_b32 v0, s49, 13
-; GFX900-NEXT: v_writelane_b32 v0, s50, 14
+; GFX900-NEXT: v_writelane_b32 v0, s46, 2
+; GFX900-NEXT: v_writelane_b32 v0, s47, 3
+; GFX900-NEXT: v_writelane_b32 v0, s48, 4
+; GFX900-NEXT: v_writelane_b32 v0, s49, 5
+; GFX900-NEXT: v_writelane_b32 v0, s50, 6
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_writelane_b32 v0, s51, 15
+; GFX900-NEXT: v_writelane_b32 v0, s51, 7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[36:51]
; GFX900-NEXT: ;;#ASMEND
@@ -27055,20 +26407,12 @@ define void @s_shuffle_v2i64_v8i64__6_12() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_readlane_b32 s51, v0, 15
-; GFX900-NEXT: v_readlane_b32 s50, v0, 14
-; GFX900-NEXT: v_readlane_b32 s49, v0, 13
-; GFX900-NEXT: v_readlane_b32 s48, v0, 12
-; GFX900-NEXT: v_readlane_b32 s47, v0, 11
-; GFX900-NEXT: v_readlane_b32 s46, v0, 10
-; GFX900-NEXT: v_readlane_b32 s45, v0, 9
-; GFX900-NEXT: v_readlane_b32 s44, v0, 8
-; GFX900-NEXT: v_readlane_b32 s43, v0, 7
-; GFX900-NEXT: v_readlane_b32 s42, v0, 6
-; GFX900-NEXT: v_readlane_b32 s41, v0, 5
-; GFX900-NEXT: v_readlane_b32 s40, v0, 4
-; GFX900-NEXT: v_readlane_b32 s39, v0, 3
-; GFX900-NEXT: v_readlane_b32 s38, v0, 2
+; GFX900-NEXT: v_readlane_b32 s51, v0, 7
+; GFX900-NEXT: v_readlane_b32 s50, v0, 6
+; GFX900-NEXT: v_readlane_b32 s49, v0, 5
+; GFX900-NEXT: v_readlane_b32 s48, v0, 4
+; GFX900-NEXT: v_readlane_b32 s47, v0, 3
+; GFX900-NEXT: v_readlane_b32 s46, v0, 2
; GFX900-NEXT: v_readlane_b32 s37, v0, 1
; GFX900-NEXT: v_readlane_b32 s36, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -27085,23 +26429,15 @@ define void @s_shuffle_v2i64_v8i64__6_12() {
; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
-; GFX90A-NEXT: v_writelane_b32 v0, s40, 4
-; GFX90A-NEXT: v_writelane_b32 v0, s41, 5
-; GFX90A-NEXT: v_writelane_b32 v0, s42, 6
-; GFX90A-NEXT: v_writelane_b32 v0, s43, 7
-; GFX90A-NEXT: v_writelane_b32 v0, s44, 8
-; GFX90A-NEXT: v_writelane_b32 v0, s45, 9
-; GFX90A-NEXT: v_writelane_b32 v0, s46, 10
-; GFX90A-NEXT: v_writelane_b32 v0, s47, 11
-; GFX90A-NEXT: v_writelane_b32 v0, s48, 12
-; GFX90A-NEXT: v_writelane_b32 v0, s49, 13
-; GFX90A-NEXT: v_writelane_b32 v0, s50, 14
+; GFX90A-NEXT: v_writelane_b32 v0, s46, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s47, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
+; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
+; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_writelane_b32 v0, s51, 15
+; GFX90A-NEXT: v_writelane_b32 v0, s51, 7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[36:51]
; GFX90A-NEXT: ;;#ASMEND
@@ -27112,20 +26448,12 @@ define void @s_shuffle_v2i64_v8i64__6_12() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_readlane_b32 s51, v0, 15
-; GFX90A-NEXT: v_readlane_b32 s50, v0, 14
-; GFX90A-NEXT: v_readlane_b32 s49, v0, 13
-; GFX90A-NEXT: v_readlane_b32 s48, v0, 12
-; GFX90A-NEXT: v_readlane_b32 s47, v0, 11
-; GFX90A-NEXT: v_readlane_b32 s46, v0, 10
-; GFX90A-NEXT: v_readlane_b32 s45, v0, 9
-; GFX90A-NEXT: v_readlane_b32 s44, v0, 8
-; GFX90A-NEXT: v_readlane_b32 s43, v0, 7
-; GFX90A-NEXT: v_readlane_b32 s42, v0, 6
-; GFX90A-NEXT: v_readlane_b32 s41, v0, 5
-; GFX90A-NEXT: v_readlane_b32 s40, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s51, v0, 7
+; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
+; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
+; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
+; GFX90A-NEXT: v_readlane_b32 s47, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s46, v0, 2
; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -27178,20 +26506,12 @@ define void @s_shuffle_v2i64_v8i64__7_12() {
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_writelane_b32 v0, s36, 0
; GFX900-NEXT: v_writelane_b32 v0, s37, 1
-; GFX900-NEXT: v_writelane_b32 v0, s38, 2
-; GFX900-NEXT: v_writelane_b32 v0, s39, 3
-; GFX900-NEXT: v_writelane_b32 v0, s40, 4
-; GFX900-NEXT: v_writelane_b32 v0, s41, 5
-; GFX900-NEXT: v_writelane_b32 v0, s42, 6
-; GFX900-NEXT: v_writelane_b32 v0, s43, 7
-; GFX900-NEXT: v_writelane_b32 v0, s44, 8
-; GFX900-NEXT: v_writelane_b32 v0, s45, 9
-; GFX900-NEXT: v_writelane_b32 v0, s46, 10
-; GFX900-NEXT: v_writelane_b32 v0, s47, 11
-; GFX900-NEXT: v_writelane_b32 v0, s48, 12
-; GFX900-NEXT: v_writelane_b32 v0, s49, 13
-; GFX900-NEXT: v_writelane_b32 v0, s50, 14
-; GFX900-NEXT: v_writelane_b32 v0, s51, 15
+; GFX900-NEXT: v_writelane_b32 v0, s46, 2
+; GFX900-NEXT: v_writelane_b32 v0, s47, 3
+; GFX900-NEXT: v_writelane_b32 v0, s48, 4
+; GFX900-NEXT: v_writelane_b32 v0, s49, 5
+; GFX900-NEXT: v_writelane_b32 v0, s50, 6
+; GFX900-NEXT: v_writelane_b32 v0, s51, 7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:19]
; GFX900-NEXT: ;;#ASMEND
@@ -27205,20 +26525,12 @@ define void @s_shuffle_v2i64_v8i64__7_12() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_readlane_b32 s51, v0, 15
-; GFX900-NEXT: v_readlane_b32 s50, v0, 14
-; GFX900-NEXT: v_readlane_b32 s49, v0, 13
-; GFX900-NEXT: v_readlane_b32 s48, v0, 12
-; GFX900-NEXT: v_readlane_b32 s47, v0, 11
-; GFX900-NEXT: v_readlane_b32 s46, v0, 10
-; GFX900-NEXT: v_readlane_b32 s45, v0, 9
-; GFX900-NEXT: v_readlane_b32 s44, v0, 8
-; GFX900-NEXT: v_readlane_b32 s43, v0, 7
-; GFX900-NEXT: v_readlane_b32 s42, v0, 6
-; GFX900-NEXT: v_readlane_b32 s41, v0, 5
-; GFX900-NEXT: v_readlane_b32 s40, v0, 4
-; GFX900-NEXT: v_readlane_b32 s39, v0, 3
-; GFX900-NEXT: v_readlane_b32 s38, v0, 2
+; GFX900-NEXT: v_readlane_b32 s51, v0, 7
+; GFX900-NEXT: v_readlane_b32 s50, v0, 6
+; GFX900-NEXT: v_readlane_b32 s49, v0, 5
+; GFX900-NEXT: v_readlane_b32 s48, v0, 4
+; GFX900-NEXT: v_readlane_b32 s47, v0, 3
+; GFX900-NEXT: v_readlane_b32 s46, v0, 2
; GFX900-NEXT: v_readlane_b32 s37, v0, 1
; GFX900-NEXT: v_readlane_b32 s36, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -27235,20 +26547,12 @@ define void @s_shuffle_v2i64_v8i64__7_12() {
; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
-; GFX90A-NEXT: v_writelane_b32 v0, s40, 4
-; GFX90A-NEXT: v_writelane_b32 v0, s41, 5
-; GFX90A-NEXT: v_writelane_b32 v0, s42, 6
-; GFX90A-NEXT: v_writelane_b32 v0, s43, 7
-; GFX90A-NEXT: v_writelane_b32 v0, s44, 8
-; GFX90A-NEXT: v_writelane_b32 v0, s45, 9
-; GFX90A-NEXT: v_writelane_b32 v0, s46, 10
-; GFX90A-NEXT: v_writelane_b32 v0, s47, 11
-; GFX90A-NEXT: v_writelane_b32 v0, s48, 12
-; GFX90A-NEXT: v_writelane_b32 v0, s49, 13
-; GFX90A-NEXT: v_writelane_b32 v0, s50, 14
-; GFX90A-NEXT: v_writelane_b32 v0, s51, 15
+; GFX90A-NEXT: v_writelane_b32 v0, s46, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s47, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
+; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
+; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
+; GFX90A-NEXT: v_writelane_b32 v0, s51, 7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:19]
; GFX90A-NEXT: ;;#ASMEND
@@ -27262,20 +26566,12 @@ define void @s_shuffle_v2i64_v8i64__7_12() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_readlane_b32 s51, v0, 15
-; GFX90A-NEXT: v_readlane_b32 s50, v0, 14
-; GFX90A-NEXT: v_readlane_b32 s49, v0, 13
-; GFX90A-NEXT: v_readlane_b32 s48, v0, 12
-; GFX90A-NEXT: v_readlane_b32 s47, v0, 11
-; GFX90A-NEXT: v_readlane_b32 s46, v0, 10
-; GFX90A-NEXT: v_readlane_b32 s45, v0, 9
-; GFX90A-NEXT: v_readlane_b32 s44, v0, 8
-; GFX90A-NEXT: v_readlane_b32 s43, v0, 7
-; GFX90A-NEXT: v_readlane_b32 s42, v0, 6
-; GFX90A-NEXT: v_readlane_b32 s41, v0, 5
-; GFX90A-NEXT: v_readlane_b32 s40, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s51, v0, 7
+; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
+; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
+; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
+; GFX90A-NEXT: v_readlane_b32 s47, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s46, v0, 2
; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -27987,23 +27283,15 @@ define void @s_shuffle_v2i64_v8i64__6_13() {
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_writelane_b32 v0, s36, 0
; GFX900-NEXT: v_writelane_b32 v0, s37, 1
-; GFX900-NEXT: v_writelane_b32 v0, s38, 2
-; GFX900-NEXT: v_writelane_b32 v0, s39, 3
-; GFX900-NEXT: v_writelane_b32 v0, s40, 4
-; GFX900-NEXT: v_writelane_b32 v0, s41, 5
-; GFX900-NEXT: v_writelane_b32 v0, s42, 6
-; GFX900-NEXT: v_writelane_b32 v0, s43, 7
-; GFX900-NEXT: v_writelane_b32 v0, s44, 8
-; GFX900-NEXT: v_writelane_b32 v0, s45, 9
-; GFX900-NEXT: v_writelane_b32 v0, s46, 10
-; GFX900-NEXT: v_writelane_b32 v0, s47, 11
-; GFX900-NEXT: v_writelane_b32 v0, s48, 12
-; GFX900-NEXT: v_writelane_b32 v0, s49, 13
-; GFX900-NEXT: v_writelane_b32 v0, s50, 14
+; GFX900-NEXT: v_writelane_b32 v0, s46, 2
+; GFX900-NEXT: v_writelane_b32 v0, s47, 3
+; GFX900-NEXT: v_writelane_b32 v0, s48, 4
+; GFX900-NEXT: v_writelane_b32 v0, s49, 5
+; GFX900-NEXT: v_writelane_b32 v0, s50, 6
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_writelane_b32 v0, s51, 15
+; GFX900-NEXT: v_writelane_b32 v0, s51, 7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[36:51]
; GFX900-NEXT: ;;#ASMEND
@@ -28014,20 +27302,12 @@ define void @s_shuffle_v2i64_v8i64__6_13() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_readlane_b32 s51, v0, 15
-; GFX900-NEXT: v_readlane_b32 s50, v0, 14
-; GFX900-NEXT: v_readlane_b32 s49, v0, 13
-; GFX900-NEXT: v_readlane_b32 s48, v0, 12
-; GFX900-NEXT: v_readlane_b32 s47, v0, 11
-; GFX900-NEXT: v_readlane_b32 s46, v0, 10
-; GFX900-NEXT: v_readlane_b32 s45, v0, 9
-; GFX900-NEXT: v_readlane_b32 s44, v0, 8
-; GFX900-NEXT: v_readlane_b32 s43, v0, 7
-; GFX900-NEXT: v_readlane_b32 s42, v0, 6
-; GFX900-NEXT: v_readlane_b32 s41, v0, 5
-; GFX900-NEXT: v_readlane_b32 s40, v0, 4
-; GFX900-NEXT: v_readlane_b32 s39, v0, 3
-; GFX900-NEXT: v_readlane_b32 s38, v0, 2
+; GFX900-NEXT: v_readlane_b32 s51, v0, 7
+; GFX900-NEXT: v_readlane_b32 s50, v0, 6
+; GFX900-NEXT: v_readlane_b32 s49, v0, 5
+; GFX900-NEXT: v_readlane_b32 s48, v0, 4
+; GFX900-NEXT: v_readlane_b32 s47, v0, 3
+; GFX900-NEXT: v_readlane_b32 s46, v0, 2
; GFX900-NEXT: v_readlane_b32 s37, v0, 1
; GFX900-NEXT: v_readlane_b32 s36, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -28044,23 +27324,15 @@ define void @s_shuffle_v2i64_v8i64__6_13() {
; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
-; GFX90A-NEXT: v_writelane_b32 v0, s40, 4
-; GFX90A-NEXT: v_writelane_b32 v0, s41, 5
-; GFX90A-NEXT: v_writelane_b32 v0, s42, 6
-; GFX90A-NEXT: v_writelane_b32 v0, s43, 7
-; GFX90A-NEXT: v_writelane_b32 v0, s44, 8
-; GFX90A-NEXT: v_writelane_b32 v0, s45, 9
-; GFX90A-NEXT: v_writelane_b32 v0, s46, 10
-; GFX90A-NEXT: v_writelane_b32 v0, s47, 11
-; GFX90A-NEXT: v_writelane_b32 v0, s48, 12
-; GFX90A-NEXT: v_writelane_b32 v0, s49, 13
-; GFX90A-NEXT: v_writelane_b32 v0, s50, 14
+; GFX90A-NEXT: v_writelane_b32 v0, s46, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s47, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
+; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
+; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_writelane_b32 v0, s51, 15
+; GFX90A-NEXT: v_writelane_b32 v0, s51, 7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[36:51]
; GFX90A-NEXT: ;;#ASMEND
@@ -28071,20 +27343,12 @@ define void @s_shuffle_v2i64_v8i64__6_13() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_readlane_b32 s51, v0, 15
-; GFX90A-NEXT: v_readlane_b32 s50, v0, 14
-; GFX90A-NEXT: v_readlane_b32 s49, v0, 13
-; GFX90A-NEXT: v_readlane_b32 s48, v0, 12
-; GFX90A-NEXT: v_readlane_b32 s47, v0, 11
-; GFX90A-NEXT: v_readlane_b32 s46, v0, 10
-; GFX90A-NEXT: v_readlane_b32 s45, v0, 9
-; GFX90A-NEXT: v_readlane_b32 s44, v0, 8
-; GFX90A-NEXT: v_readlane_b32 s43, v0, 7
-; GFX90A-NEXT: v_readlane_b32 s42, v0, 6
-; GFX90A-NEXT: v_readlane_b32 s41, v0, 5
-; GFX90A-NEXT: v_readlane_b32 s40, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s51, v0, 7
+; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
+; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
+; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
+; GFX90A-NEXT: v_readlane_b32 s47, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s46, v0, 2
; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -28137,20 +27401,12 @@ define void @s_shuffle_v2i64_v8i64__7_13() {
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_writelane_b32 v0, s36, 0
; GFX900-NEXT: v_writelane_b32 v0, s37, 1
-; GFX900-NEXT: v_writelane_b32 v0, s38, 2
-; GFX900-NEXT: v_writelane_b32 v0, s39, 3
-; GFX900-NEXT: v_writelane_b32 v0, s40, 4
-; GFX900-NEXT: v_writelane_b32 v0, s41, 5
-; GFX900-NEXT: v_writelane_b32 v0, s42, 6
-; GFX900-NEXT: v_writelane_b32 v0, s43, 7
-; GFX900-NEXT: v_writelane_b32 v0, s44, 8
-; GFX900-NEXT: v_writelane_b32 v0, s45, 9
-; GFX900-NEXT: v_writelane_b32 v0, s46, 10
-; GFX900-NEXT: v_writelane_b32 v0, s47, 11
-; GFX900-NEXT: v_writelane_b32 v0, s48, 12
-; GFX900-NEXT: v_writelane_b32 v0, s49, 13
-; GFX900-NEXT: v_writelane_b32 v0, s50, 14
-; GFX900-NEXT: v_writelane_b32 v0, s51, 15
+; GFX900-NEXT: v_writelane_b32 v0, s46, 2
+; GFX900-NEXT: v_writelane_b32 v0, s47, 3
+; GFX900-NEXT: v_writelane_b32 v0, s48, 4
+; GFX900-NEXT: v_writelane_b32 v0, s49, 5
+; GFX900-NEXT: v_writelane_b32 v0, s50, 6
+; GFX900-NEXT: v_writelane_b32 v0, s51, 7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[36:51]
; GFX900-NEXT: ;;#ASMEND
@@ -28164,20 +27420,12 @@ define void @s_shuffle_v2i64_v8i64__7_13() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_readlane_b32 s51, v0, 15
-; GFX900-NEXT: v_readlane_b32 s50, v0, 14
-; GFX900-NEXT: v_readlane_b32 s49, v0, 13
-; GFX900-NEXT: v_readlane_b32 s48, v0, 12
-; GFX900-NEXT: v_readlane_b32 s47, v0, 11
-; GFX900-NEXT: v_readlane_b32 s46, v0, 10
-; GFX900-NEXT: v_readlane_b32 s45, v0, 9
-; GFX900-NEXT: v_readlane_b32 s44, v0, 8
-; GFX900-NEXT: v_readlane_b32 s43, v0, 7
-; GFX900-NEXT: v_readlane_b32 s42, v0, 6
-; GFX900-NEXT: v_readlane_b32 s41, v0, 5
-; GFX900-NEXT: v_readlane_b32 s40, v0, 4
-; GFX900-NEXT: v_readlane_b32 s39, v0, 3
-; GFX900-NEXT: v_readlane_b32 s38, v0, 2
+; GFX900-NEXT: v_readlane_b32 s51, v0, 7
+; GFX900-NEXT: v_readlane_b32 s50, v0, 6
+; GFX900-NEXT: v_readlane_b32 s49, v0, 5
+; GFX900-NEXT: v_readlane_b32 s48, v0, 4
+; GFX900-NEXT: v_readlane_b32 s47, v0, 3
+; GFX900-NEXT: v_readlane_b32 s46, v0, 2
; GFX900-NEXT: v_readlane_b32 s37, v0, 1
; GFX900-NEXT: v_readlane_b32 s36, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -28194,20 +27442,12 @@ define void @s_shuffle_v2i64_v8i64__7_13() {
; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
-; GFX90A-NEXT: v_writelane_b32 v0, s40, 4
-; GFX90A-NEXT: v_writelane_b32 v0, s41, 5
-; GFX90A-NEXT: v_writelane_b32 v0, s42, 6
-; GFX90A-NEXT: v_writelane_b32 v0, s43, 7
-; GFX90A-NEXT: v_writelane_b32 v0, s44, 8
-; GFX90A-NEXT: v_writelane_b32 v0, s45, 9
-; GFX90A-NEXT: v_writelane_b32 v0, s46, 10
-; GFX90A-NEXT: v_writelane_b32 v0, s47, 11
-; GFX90A-NEXT: v_writelane_b32 v0, s48, 12
-; GFX90A-NEXT: v_writelane_b32 v0, s49, 13
-; GFX90A-NEXT: v_writelane_b32 v0, s50, 14
-; GFX90A-NEXT: v_writelane_b32 v0, s51, 15
+; GFX90A-NEXT: v_writelane_b32 v0, s46, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s47, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
+; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
+; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
+; GFX90A-NEXT: v_writelane_b32 v0, s51, 7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[36:51]
; GFX90A-NEXT: ;;#ASMEND
@@ -28221,20 +27461,12 @@ define void @s_shuffle_v2i64_v8i64__7_13() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_readlane_b32 s51, v0, 15
-; GFX90A-NEXT: v_readlane_b32 s50, v0, 14
-; GFX90A-NEXT: v_readlane_b32 s49, v0, 13
-; GFX90A-NEXT: v_readlane_b32 s48, v0, 12
-; GFX90A-NEXT: v_readlane_b32 s47, v0, 11
-; GFX90A-NEXT: v_readlane_b32 s46, v0, 10
-; GFX90A-NEXT: v_readlane_b32 s45, v0, 9
-; GFX90A-NEXT: v_readlane_b32 s44, v0, 8
-; GFX90A-NEXT: v_readlane_b32 s43, v0, 7
-; GFX90A-NEXT: v_readlane_b32 s42, v0, 6
-; GFX90A-NEXT: v_readlane_b32 s41, v0, 5
-; GFX90A-NEXT: v_readlane_b32 s40, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s51, v0, 7
+; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
+; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
+; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
+; GFX90A-NEXT: v_readlane_b32 s47, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s46, v0, 2
; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -29009,23 +28241,15 @@ define void @s_shuffle_v2i64_v8i64__6_14() {
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_writelane_b32 v0, s36, 0
; GFX900-NEXT: v_writelane_b32 v0, s37, 1
-; GFX900-NEXT: v_writelane_b32 v0, s38, 2
-; GFX900-NEXT: v_writelane_b32 v0, s39, 3
-; GFX900-NEXT: v_writelane_b32 v0, s40, 4
-; GFX900-NEXT: v_writelane_b32 v0, s41, 5
-; GFX900-NEXT: v_writelane_b32 v0, s42, 6
-; GFX900-NEXT: v_writelane_b32 v0, s43, 7
-; GFX900-NEXT: v_writelane_b32 v0, s44, 8
-; GFX900-NEXT: v_writelane_b32 v0, s45, 9
-; GFX900-NEXT: v_writelane_b32 v0, s46, 10
-; GFX900-NEXT: v_writelane_b32 v0, s47, 11
-; GFX900-NEXT: v_writelane_b32 v0, s48, 12
-; GFX900-NEXT: v_writelane_b32 v0, s49, 13
-; GFX900-NEXT: v_writelane_b32 v0, s50, 14
+; GFX900-NEXT: v_writelane_b32 v0, s46, 2
+; GFX900-NEXT: v_writelane_b32 v0, s47, 3
+; GFX900-NEXT: v_writelane_b32 v0, s48, 4
+; GFX900-NEXT: v_writelane_b32 v0, s49, 5
+; GFX900-NEXT: v_writelane_b32 v0, s50, 6
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_writelane_b32 v0, s51, 15
+; GFX900-NEXT: v_writelane_b32 v0, s51, 7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[36:51]
; GFX900-NEXT: ;;#ASMEND
@@ -29036,20 +28260,12 @@ define void @s_shuffle_v2i64_v8i64__6_14() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_readlane_b32 s51, v0, 15
-; GFX900-NEXT: v_readlane_b32 s50, v0, 14
-; GFX900-NEXT: v_readlane_b32 s49, v0, 13
-; GFX900-NEXT: v_readlane_b32 s48, v0, 12
-; GFX900-NEXT: v_readlane_b32 s47, v0, 11
-; GFX900-NEXT: v_readlane_b32 s46, v0, 10
-; GFX900-NEXT: v_readlane_b32 s45, v0, 9
-; GFX900-NEXT: v_readlane_b32 s44, v0, 8
-; GFX900-NEXT: v_readlane_b32 s43, v0, 7
-; GFX900-NEXT: v_readlane_b32 s42, v0, 6
-; GFX900-NEXT: v_readlane_b32 s41, v0, 5
-; GFX900-NEXT: v_readlane_b32 s40, v0, 4
-; GFX900-NEXT: v_readlane_b32 s39, v0, 3
-; GFX900-NEXT: v_readlane_b32 s38, v0, 2
+; GFX900-NEXT: v_readlane_b32 s51, v0, 7
+; GFX900-NEXT: v_readlane_b32 s50, v0, 6
+; GFX900-NEXT: v_readlane_b32 s49, v0, 5
+; GFX900-NEXT: v_readlane_b32 s48, v0, 4
+; GFX900-NEXT: v_readlane_b32 s47, v0, 3
+; GFX900-NEXT: v_readlane_b32 s46, v0, 2
; GFX900-NEXT: v_readlane_b32 s37, v0, 1
; GFX900-NEXT: v_readlane_b32 s36, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -29066,23 +28282,15 @@ define void @s_shuffle_v2i64_v8i64__6_14() {
; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
-; GFX90A-NEXT: v_writelane_b32 v0, s40, 4
-; GFX90A-NEXT: v_writelane_b32 v0, s41, 5
-; GFX90A-NEXT: v_writelane_b32 v0, s42, 6
-; GFX90A-NEXT: v_writelane_b32 v0, s43, 7
-; GFX90A-NEXT: v_writelane_b32 v0, s44, 8
-; GFX90A-NEXT: v_writelane_b32 v0, s45, 9
-; GFX90A-NEXT: v_writelane_b32 v0, s46, 10
-; GFX90A-NEXT: v_writelane_b32 v0, s47, 11
-; GFX90A-NEXT: v_writelane_b32 v0, s48, 12
-; GFX90A-NEXT: v_writelane_b32 v0, s49, 13
-; GFX90A-NEXT: v_writelane_b32 v0, s50, 14
+; GFX90A-NEXT: v_writelane_b32 v0, s46, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s47, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
+; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
+; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_writelane_b32 v0, s51, 15
+; GFX90A-NEXT: v_writelane_b32 v0, s51, 7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[36:51]
; GFX90A-NEXT: ;;#ASMEND
@@ -29093,20 +28301,12 @@ define void @s_shuffle_v2i64_v8i64__6_14() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_readlane_b32 s51, v0, 15
-; GFX90A-NEXT: v_readlane_b32 s50, v0, 14
-; GFX90A-NEXT: v_readlane_b32 s49, v0, 13
-; GFX90A-NEXT: v_readlane_b32 s48, v0, 12
-; GFX90A-NEXT: v_readlane_b32 s47, v0, 11
-; GFX90A-NEXT: v_readlane_b32 s46, v0, 10
-; GFX90A-NEXT: v_readlane_b32 s45, v0, 9
-; GFX90A-NEXT: v_readlane_b32 s44, v0, 8
-; GFX90A-NEXT: v_readlane_b32 s43, v0, 7
-; GFX90A-NEXT: v_readlane_b32 s42, v0, 6
-; GFX90A-NEXT: v_readlane_b32 s41, v0, 5
-; GFX90A-NEXT: v_readlane_b32 s40, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s51, v0, 7
+; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
+; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
+; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
+; GFX90A-NEXT: v_readlane_b32 s47, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s46, v0, 2
; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -29159,20 +28359,12 @@ define void @s_shuffle_v2i64_v8i64__7_14() {
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_writelane_b32 v0, s36, 0
; GFX900-NEXT: v_writelane_b32 v0, s37, 1
-; GFX900-NEXT: v_writelane_b32 v0, s38, 2
-; GFX900-NEXT: v_writelane_b32 v0, s39, 3
-; GFX900-NEXT: v_writelane_b32 v0, s40, 4
-; GFX900-NEXT: v_writelane_b32 v0, s41, 5
-; GFX900-NEXT: v_writelane_b32 v0, s42, 6
-; GFX900-NEXT: v_writelane_b32 v0, s43, 7
-; GFX900-NEXT: v_writelane_b32 v0, s44, 8
-; GFX900-NEXT: v_writelane_b32 v0, s45, 9
-; GFX900-NEXT: v_writelane_b32 v0, s46, 10
-; GFX900-NEXT: v_writelane_b32 v0, s47, 11
-; GFX900-NEXT: v_writelane_b32 v0, s48, 12
-; GFX900-NEXT: v_writelane_b32 v0, s49, 13
-; GFX900-NEXT: v_writelane_b32 v0, s50, 14
-; GFX900-NEXT: v_writelane_b32 v0, s51, 15
+; GFX900-NEXT: v_writelane_b32 v0, s46, 2
+; GFX900-NEXT: v_writelane_b32 v0, s47, 3
+; GFX900-NEXT: v_writelane_b32 v0, s48, 4
+; GFX900-NEXT: v_writelane_b32 v0, s49, 5
+; GFX900-NEXT: v_writelane_b32 v0, s50, 6
+; GFX900-NEXT: v_writelane_b32 v0, s51, 7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:19]
; GFX900-NEXT: ;;#ASMEND
@@ -29186,20 +28378,12 @@ define void @s_shuffle_v2i64_v8i64__7_14() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_readlane_b32 s51, v0, 15
-; GFX900-NEXT: v_readlane_b32 s50, v0, 14
-; GFX900-NEXT: v_readlane_b32 s49, v0, 13
-; GFX900-NEXT: v_readlane_b32 s48, v0, 12
-; GFX900-NEXT: v_readlane_b32 s47, v0, 11
-; GFX900-NEXT: v_readlane_b32 s46, v0, 10
-; GFX900-NEXT: v_readlane_b32 s45, v0, 9
-; GFX900-NEXT: v_readlane_b32 s44, v0, 8
-; GFX900-NEXT: v_readlane_b32 s43, v0, 7
-; GFX900-NEXT: v_readlane_b32 s42, v0, 6
-; GFX900-NEXT: v_readlane_b32 s41, v0, 5
-; GFX900-NEXT: v_readlane_b32 s40, v0, 4
-; GFX900-NEXT: v_readlane_b32 s39, v0, 3
-; GFX900-NEXT: v_readlane_b32 s38, v0, 2
+; GFX900-NEXT: v_readlane_b32 s51, v0, 7
+; GFX900-NEXT: v_readlane_b32 s50, v0, 6
+; GFX900-NEXT: v_readlane_b32 s49, v0, 5
+; GFX900-NEXT: v_readlane_b32 s48, v0, 4
+; GFX900-NEXT: v_readlane_b32 s47, v0, 3
+; GFX900-NEXT: v_readlane_b32 s46, v0, 2
; GFX900-NEXT: v_readlane_b32 s37, v0, 1
; GFX900-NEXT: v_readlane_b32 s36, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -29216,20 +28400,12 @@ define void @s_shuffle_v2i64_v8i64__7_14() {
; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
-; GFX90A-NEXT: v_writelane_b32 v0, s40, 4
-; GFX90A-NEXT: v_writelane_b32 v0, s41, 5
-; GFX90A-NEXT: v_writelane_b32 v0, s42, 6
-; GFX90A-NEXT: v_writelane_b32 v0, s43, 7
-; GFX90A-NEXT: v_writelane_b32 v0, s44, 8
-; GFX90A-NEXT: v_writelane_b32 v0, s45, 9
-; GFX90A-NEXT: v_writelane_b32 v0, s46, 10
-; GFX90A-NEXT: v_writelane_b32 v0, s47, 11
-; GFX90A-NEXT: v_writelane_b32 v0, s48, 12
-; GFX90A-NEXT: v_writelane_b32 v0, s49, 13
-; GFX90A-NEXT: v_writelane_b32 v0, s50, 14
-; GFX90A-NEXT: v_writelane_b32 v0, s51, 15
+; GFX90A-NEXT: v_writelane_b32 v0, s46, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s47, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
+; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
+; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
+; GFX90A-NEXT: v_writelane_b32 v0, s51, 7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:19]
; GFX90A-NEXT: ;;#ASMEND
@@ -29243,20 +28419,12 @@ define void @s_shuffle_v2i64_v8i64__7_14() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_readlane_b32 s51, v0, 15
-; GFX90A-NEXT: v_readlane_b32 s50, v0, 14
-; GFX90A-NEXT: v_readlane_b32 s49, v0, 13
-; GFX90A-NEXT: v_readlane_b32 s48, v0, 12
-; GFX90A-NEXT: v_readlane_b32 s47, v0, 11
-; GFX90A-NEXT: v_readlane_b32 s46, v0, 10
-; GFX90A-NEXT: v_readlane_b32 s45, v0, 9
-; GFX90A-NEXT: v_readlane_b32 s44, v0, 8
-; GFX90A-NEXT: v_readlane_b32 s43, v0, 7
-; GFX90A-NEXT: v_readlane_b32 s42, v0, 6
-; GFX90A-NEXT: v_readlane_b32 s41, v0, 5
-; GFX90A-NEXT: v_readlane_b32 s40, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s51, v0, 7
+; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
+; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
+; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
+; GFX90A-NEXT: v_readlane_b32 s47, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s46, v0, 2
; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -30055,23 +29223,15 @@ define void @s_shuffle_v2i64_v8i64__6_15() {
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_writelane_b32 v0, s36, 0
; GFX900-NEXT: v_writelane_b32 v0, s37, 1
-; GFX900-NEXT: v_writelane_b32 v0, s38, 2
-; GFX900-NEXT: v_writelane_b32 v0, s39, 3
-; GFX900-NEXT: v_writelane_b32 v0, s40, 4
-; GFX900-NEXT: v_writelane_b32 v0, s41, 5
-; GFX900-NEXT: v_writelane_b32 v0, s42, 6
-; GFX900-NEXT: v_writelane_b32 v0, s43, 7
-; GFX900-NEXT: v_writelane_b32 v0, s44, 8
-; GFX900-NEXT: v_writelane_b32 v0, s45, 9
-; GFX900-NEXT: v_writelane_b32 v0, s46, 10
-; GFX900-NEXT: v_writelane_b32 v0, s47, 11
-; GFX900-NEXT: v_writelane_b32 v0, s48, 12
-; GFX900-NEXT: v_writelane_b32 v0, s49, 13
-; GFX900-NEXT: v_writelane_b32 v0, s50, 14
+; GFX900-NEXT: v_writelane_b32 v0, s46, 2
+; GFX900-NEXT: v_writelane_b32 v0, s47, 3
+; GFX900-NEXT: v_writelane_b32 v0, s48, 4
+; GFX900-NEXT: v_writelane_b32 v0, s49, 5
+; GFX900-NEXT: v_writelane_b32 v0, s50, 6
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_writelane_b32 v0, s51, 15
+; GFX900-NEXT: v_writelane_b32 v0, s51, 7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[36:51]
; GFX900-NEXT: ;;#ASMEND
@@ -30082,20 +29242,12 @@ define void @s_shuffle_v2i64_v8i64__6_15() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_readlane_b32 s51, v0, 15
-; GFX900-NEXT: v_readlane_b32 s50, v0, 14
-; GFX900-NEXT: v_readlane_b32 s49, v0, 13
-; GFX900-NEXT: v_readlane_b32 s48, v0, 12
-; GFX900-NEXT: v_readlane_b32 s47, v0, 11
-; GFX900-NEXT: v_readlane_b32 s46, v0, 10
-; GFX900-NEXT: v_readlane_b32 s45, v0, 9
-; GFX900-NEXT: v_readlane_b32 s44, v0, 8
-; GFX900-NEXT: v_readlane_b32 s43, v0, 7
-; GFX900-NEXT: v_readlane_b32 s42, v0, 6
-; GFX900-NEXT: v_readlane_b32 s41, v0, 5
-; GFX900-NEXT: v_readlane_b32 s40, v0, 4
-; GFX900-NEXT: v_readlane_b32 s39, v0, 3
-; GFX900-NEXT: v_readlane_b32 s38, v0, 2
+; GFX900-NEXT: v_readlane_b32 s51, v0, 7
+; GFX900-NEXT: v_readlane_b32 s50, v0, 6
+; GFX900-NEXT: v_readlane_b32 s49, v0, 5
+; GFX900-NEXT: v_readlane_b32 s48, v0, 4
+; GFX900-NEXT: v_readlane_b32 s47, v0, 3
+; GFX900-NEXT: v_readlane_b32 s46, v0, 2
; GFX900-NEXT: v_readlane_b32 s37, v0, 1
; GFX900-NEXT: v_readlane_b32 s36, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -30112,23 +29264,15 @@ define void @s_shuffle_v2i64_v8i64__6_15() {
; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
-; GFX90A-NEXT: v_writelane_b32 v0, s40, 4
-; GFX90A-NEXT: v_writelane_b32 v0, s41, 5
-; GFX90A-NEXT: v_writelane_b32 v0, s42, 6
-; GFX90A-NEXT: v_writelane_b32 v0, s43, 7
-; GFX90A-NEXT: v_writelane_b32 v0, s44, 8
-; GFX90A-NEXT: v_writelane_b32 v0, s45, 9
-; GFX90A-NEXT: v_writelane_b32 v0, s46, 10
-; GFX90A-NEXT: v_writelane_b32 v0, s47, 11
-; GFX90A-NEXT: v_writelane_b32 v0, s48, 12
-; GFX90A-NEXT: v_writelane_b32 v0, s49, 13
-; GFX90A-NEXT: v_writelane_b32 v0, s50, 14
+; GFX90A-NEXT: v_writelane_b32 v0, s46, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s47, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
+; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
+; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_writelane_b32 v0, s51, 15
+; GFX90A-NEXT: v_writelane_b32 v0, s51, 7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[36:51]
; GFX90A-NEXT: ;;#ASMEND
@@ -30139,20 +29283,12 @@ define void @s_shuffle_v2i64_v8i64__6_15() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_readlane_b32 s51, v0, 15
-; GFX90A-NEXT: v_readlane_b32 s50, v0, 14
-; GFX90A-NEXT: v_readlane_b32 s49, v0, 13
-; GFX90A-NEXT: v_readlane_b32 s48, v0, 12
-; GFX90A-NEXT: v_readlane_b32 s47, v0, 11
-; GFX90A-NEXT: v_readlane_b32 s46, v0, 10
-; GFX90A-NEXT: v_readlane_b32 s45, v0, 9
-; GFX90A-NEXT: v_readlane_b32 s44, v0, 8
-; GFX90A-NEXT: v_readlane_b32 s43, v0, 7
-; GFX90A-NEXT: v_readlane_b32 s42, v0, 6
-; GFX90A-NEXT: v_readlane_b32 s41, v0, 5
-; GFX90A-NEXT: v_readlane_b32 s40, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s51, v0, 7
+; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
+; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
+; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
+; GFX90A-NEXT: v_readlane_b32 s47, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s46, v0, 2
; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -30205,20 +29341,12 @@ define void @s_shuffle_v2i64_v8i64__7_15() {
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_writelane_b32 v0, s36, 0
; GFX900-NEXT: v_writelane_b32 v0, s37, 1
-; GFX900-NEXT: v_writelane_b32 v0, s38, 2
-; GFX900-NEXT: v_writelane_b32 v0, s39, 3
-; GFX900-NEXT: v_writelane_b32 v0, s40, 4
-; GFX900-NEXT: v_writelane_b32 v0, s41, 5
-; GFX900-NEXT: v_writelane_b32 v0, s42, 6
-; GFX900-NEXT: v_writelane_b32 v0, s43, 7
-; GFX900-NEXT: v_writelane_b32 v0, s44, 8
-; GFX900-NEXT: v_writelane_b32 v0, s45, 9
-; GFX900-NEXT: v_writelane_b32 v0, s46, 10
-; GFX900-NEXT: v_writelane_b32 v0, s47, 11
-; GFX900-NEXT: v_writelane_b32 v0, s48, 12
-; GFX900-NEXT: v_writelane_b32 v0, s49, 13
-; GFX900-NEXT: v_writelane_b32 v0, s50, 14
-; GFX900-NEXT: v_writelane_b32 v0, s51, 15
+; GFX900-NEXT: v_writelane_b32 v0, s46, 2
+; GFX900-NEXT: v_writelane_b32 v0, s47, 3
+; GFX900-NEXT: v_writelane_b32 v0, s48, 4
+; GFX900-NEXT: v_writelane_b32 v0, s49, 5
+; GFX900-NEXT: v_writelane_b32 v0, s50, 6
+; GFX900-NEXT: v_writelane_b32 v0, s51, 7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[36:51]
; GFX900-NEXT: ;;#ASMEND
@@ -30232,20 +29360,12 @@ define void @s_shuffle_v2i64_v8i64__7_15() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_readlane_b32 s51, v0, 15
-; GFX900-NEXT: v_readlane_b32 s50, v0, 14
-; GFX900-NEXT: v_readlane_b32 s49, v0, 13
-; GFX900-NEXT: v_readlane_b32 s48, v0, 12
-; GFX900-NEXT: v_readlane_b32 s47, v0, 11
-; GFX900-NEXT: v_readlane_b32 s46, v0, 10
-; GFX900-NEXT: v_readlane_b32 s45, v0, 9
-; GFX900-NEXT: v_readlane_b32 s44, v0, 8
-; GFX900-NEXT: v_readlane_b32 s43, v0, 7
-; GFX900-NEXT: v_readlane_b32 s42, v0, 6
-; GFX900-NEXT: v_readlane_b32 s41, v0, 5
-; GFX900-NEXT: v_readlane_b32 s40, v0, 4
-; GFX900-NEXT: v_readlane_b32 s39, v0, 3
-; GFX900-NEXT: v_readlane_b32 s38, v0, 2
+; GFX900-NEXT: v_readlane_b32 s51, v0, 7
+; GFX900-NEXT: v_readlane_b32 s50, v0, 6
+; GFX900-NEXT: v_readlane_b32 s49, v0, 5
+; GFX900-NEXT: v_readlane_b32 s48, v0, 4
+; GFX900-NEXT: v_readlane_b32 s47, v0, 3
+; GFX900-NEXT: v_readlane_b32 s46, v0, 2
; GFX900-NEXT: v_readlane_b32 s37, v0, 1
; GFX900-NEXT: v_readlane_b32 s36, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -30262,20 +29382,12 @@ define void @s_shuffle_v2i64_v8i64__7_15() {
; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
-; GFX90A-NEXT: v_writelane_b32 v0, s40, 4
-; GFX90A-NEXT: v_writelane_b32 v0, s41, 5
-; GFX90A-NEXT: v_writelane_b32 v0, s42, 6
-; GFX90A-NEXT: v_writelane_b32 v0, s43, 7
-; GFX90A-NEXT: v_writelane_b32 v0, s44, 8
-; GFX90A-NEXT: v_writelane_b32 v0, s45, 9
-; GFX90A-NEXT: v_writelane_b32 v0, s46, 10
-; GFX90A-NEXT: v_writelane_b32 v0, s47, 11
-; GFX90A-NEXT: v_writelane_b32 v0, s48, 12
-; GFX90A-NEXT: v_writelane_b32 v0, s49, 13
-; GFX90A-NEXT: v_writelane_b32 v0, s50, 14
-; GFX90A-NEXT: v_writelane_b32 v0, s51, 15
+; GFX90A-NEXT: v_writelane_b32 v0, s46, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s47, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
+; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
+; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
+; GFX90A-NEXT: v_writelane_b32 v0, s51, 7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[36:51]
; GFX90A-NEXT: ;;#ASMEND
@@ -30289,20 +29401,12 @@ define void @s_shuffle_v2i64_v8i64__7_15() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_readlane_b32 s51, v0, 15
-; GFX90A-NEXT: v_readlane_b32 s50, v0, 14
-; GFX90A-NEXT: v_readlane_b32 s49, v0, 13
-; GFX90A-NEXT: v_readlane_b32 s48, v0, 12
-; GFX90A-NEXT: v_readlane_b32 s47, v0, 11
-; GFX90A-NEXT: v_readlane_b32 s46, v0, 10
-; GFX90A-NEXT: v_readlane_b32 s45, v0, 9
-; GFX90A-NEXT: v_readlane_b32 s44, v0, 8
-; GFX90A-NEXT: v_readlane_b32 s43, v0, 7
-; GFX90A-NEXT: v_readlane_b32 s42, v0, 6
-; GFX90A-NEXT: v_readlane_b32 s41, v0, 5
-; GFX90A-NEXT: v_readlane_b32 s40, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s51, v0, 7
+; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
+; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
+; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
+; GFX90A-NEXT: v_readlane_b32 s47, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s46, v0, 2
; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
diff --git a/llvm/test/CodeGen/AMDGPU/sibling-call.ll b/llvm/test/CodeGen/AMDGPU/sibling-call.ll
index b57adfe7d9306..3447cd161c653 100644
--- a/llvm/test/CodeGen/AMDGPU/sibling-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/sibling-call.ll
@@ -610,42 +610,42 @@ define hidden fastcc i32 @indirect_divergent_sibling_call_i32_fastcc_i32_i32(ptr
; FIJI-NEXT: v_writelane_b32 v40, s35, 3
; FIJI-NEXT: v_writelane_b32 v40, s36, 4
; FIJI-NEXT: v_writelane_b32 v40, s37, 5
-; FIJI-NEXT: v_writelane_b32 v40, s38, 6
-; FIJI-NEXT: v_writelane_b32 v40, s39, 7
-; FIJI-NEXT: v_writelane_b32 v40, s40, 8
-; FIJI-NEXT: v_writelane_b32 v40, s41, 9
-; FIJI-NEXT: v_writelane_b32 v40, s42, 10
-; FIJI-NEXT: v_writelane_b32 v40, s43, 11
-; FIJI-NEXT: v_writelane_b32 v40, s44, 12
-; FIJI-NEXT: v_writelane_b32 v40, s45, 13
-; FIJI-NEXT: v_writelane_b32 v40, s46, 14
-; FIJI-NEXT: v_writelane_b32 v40, s47, 15
-; FIJI-NEXT: v_writelane_b32 v40, s48, 16
-; FIJI-NEXT: s_mov_b32 s42, s15
-; FIJI-NEXT: s_mov_b32 s43, s14
-; FIJI-NEXT: s_mov_b32 s44, s13
-; FIJI-NEXT: s_mov_b32 s45, s12
+; FIJI-NEXT: v_writelane_b32 v40, s46, 6
+; FIJI-NEXT: v_writelane_b32 v40, s47, 7
+; FIJI-NEXT: v_writelane_b32 v40, s48, 8
+; FIJI-NEXT: v_writelane_b32 v40, s49, 9
+; FIJI-NEXT: v_writelane_b32 v40, s50, 10
+; FIJI-NEXT: v_writelane_b32 v40, s51, 11
+; FIJI-NEXT: v_writelane_b32 v40, s52, 12
+; FIJI-NEXT: v_writelane_b32 v40, s53, 13
+; FIJI-NEXT: v_writelane_b32 v40, s62, 14
+; FIJI-NEXT: v_writelane_b32 v40, s63, 15
+; FIJI-NEXT: v_writelane_b32 v40, s64, 16
+; FIJI-NEXT: s_mov_b32 s50, s15
+; FIJI-NEXT: s_mov_b32 s51, s14
+; FIJI-NEXT: s_mov_b32 s52, s13
+; FIJI-NEXT: s_mov_b32 s53, s12
; FIJI-NEXT: s_mov_b64 s[34:35], s[10:11]
; FIJI-NEXT: s_mov_b64 s[36:37], s[8:9]
-; FIJI-NEXT: s_mov_b64 s[38:39], s[6:7]
-; FIJI-NEXT: s_mov_b64 s[40:41], s[4:5]
+; FIJI-NEXT: s_mov_b64 s[46:47], s[6:7]
+; FIJI-NEXT: s_mov_b64 s[48:49], s[4:5]
; FIJI-NEXT: v_add_u32_e32 v3, vcc, v3, v4
-; FIJI-NEXT: s_mov_b64 s[46:47], exec
+; FIJI-NEXT: s_mov_b64 s[62:63], exec
; FIJI-NEXT: s_addk_i32 s32, 0x400
-; FIJI-NEXT: v_writelane_b32 v40, s49, 17
+; FIJI-NEXT: v_writelane_b32 v40, s65, 17
; FIJI-NEXT: .LBB18_1: ; =>This Inner Loop Header: Depth=1
; FIJI-NEXT: v_readfirstlane_b32 s16, v0
; FIJI-NEXT: v_readfirstlane_b32 s17, v1
; FIJI-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
-; FIJI-NEXT: s_and_saveexec_b64 s[48:49], vcc
-; FIJI-NEXT: s_mov_b64 s[4:5], s[40:41]
-; FIJI-NEXT: s_mov_b64 s[6:7], s[38:39]
+; FIJI-NEXT: s_and_saveexec_b64 s[64:65], vcc
+; FIJI-NEXT: s_mov_b64 s[4:5], s[48:49]
+; FIJI-NEXT: s_mov_b64 s[6:7], s[46:47]
; FIJI-NEXT: s_mov_b64 s[8:9], s[36:37]
; FIJI-NEXT: s_mov_b64 s[10:11], s[34:35]
-; FIJI-NEXT: s_mov_b32 s12, s45
-; FIJI-NEXT: s_mov_b32 s13, s44
-; FIJI-NEXT: s_mov_b32 s14, s43
-; FIJI-NEXT: s_mov_b32 s15, s42
+; FIJI-NEXT: s_mov_b32 s12, s53
+; FIJI-NEXT: s_mov_b32 s13, s52
+; FIJI-NEXT: s_mov_b32 s14, s51
+; FIJI-NEXT: s_mov_b32 s15, s50
; FIJI-NEXT: v_mov_b32_e32 v0, v2
; FIJI-NEXT: v_mov_b32_e32 v1, v3
; FIJI-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -654,23 +654,23 @@ define hidden fastcc i32 @indirect_divergent_sibling_call_i32_fastcc_i32_i32(ptr
; FIJI-NEXT: ; implicit-def: $vgpr31
; FIJI-NEXT: ; implicit-def: $vgpr2
; FIJI-NEXT: ; implicit-def: $vgpr3
-; FIJI-NEXT: s_xor_b64 exec, exec, s[48:49]
+; FIJI-NEXT: s_xor_b64 exec, exec, s[64:65]
; FIJI-NEXT: s_cbranch_execnz .LBB18_1
; FIJI-NEXT: ; %bb.2:
-; FIJI-NEXT: s_mov_b64 exec, s[46:47]
+; FIJI-NEXT: s_mov_b64 exec, s[62:63]
; FIJI-NEXT: v_mov_b32_e32 v0, v4
-; FIJI-NEXT: v_readlane_b32 s49, v40, 17
-; FIJI-NEXT: v_readlane_b32 s48, v40, 16
-; FIJI-NEXT: v_readlane_b32 s47, v40, 15
-; FIJI-NEXT: v_readlane_b32 s46, v40, 14
-; FIJI-NEXT: v_readlane_b32 s45, v40, 13
-; FIJI-NEXT: v_readlane_b32 s44, v40, 12
-; FIJI-NEXT: v_readlane_b32 s43, v40, 11
-; FIJI-NEXT: v_readlane_b32 s42, v40, 10
-; FIJI-NEXT: v_readlane_b32 s41, v40, 9
-; FIJI-NEXT: v_readlane_b32 s40, v40, 8
-; FIJI-NEXT: v_readlane_b32 s39, v40, 7
-; FIJI-NEXT: v_readlane_b32 s38, v40, 6
+; FIJI-NEXT: v_readlane_b32 s65, v40, 17
+; FIJI-NEXT: v_readlane_b32 s64, v40, 16
+; FIJI-NEXT: v_readlane_b32 s63, v40, 15
+; FIJI-NEXT: v_readlane_b32 s62, v40, 14
+; FIJI-NEXT: v_readlane_b32 s53, v40, 13
+; FIJI-NEXT: v_readlane_b32 s52, v40, 12
+; FIJI-NEXT: v_readlane_b32 s51, v40, 11
+; FIJI-NEXT: v_readlane_b32 s50, v40, 10
+; FIJI-NEXT: v_readlane_b32 s49, v40, 9
+; FIJI-NEXT: v_readlane_b32 s48, v40, 8
+; FIJI-NEXT: v_readlane_b32 s47, v40, 7
+; FIJI-NEXT: v_readlane_b32 s46, v40, 6
; FIJI-NEXT: v_readlane_b32 s37, v40, 5
; FIJI-NEXT: v_readlane_b32 s36, v40, 4
; FIJI-NEXT: v_readlane_b32 s35, v40, 3
@@ -701,42 +701,42 @@ define hidden fastcc i32 @indirect_divergent_sibling_call_i32_fastcc_i32_i32(ptr
; HAWAII-NEXT: v_writelane_b32 v40, s35, 3
; HAWAII-NEXT: v_writelane_b32 v40, s36, 4
; HAWAII-NEXT: v_writelane_b32 v40, s37, 5
-; HAWAII-NEXT: v_writelane_b32 v40, s38, 6
-; HAWAII-NEXT: v_writelane_b32 v40, s39, 7
-; HAWAII-NEXT: v_writelane_b32 v40, s40, 8
-; HAWAII-NEXT: v_writelane_b32 v40, s41, 9
-; HAWAII-NEXT: v_writelane_b32 v40, s42, 10
-; HAWAII-NEXT: v_writelane_b32 v40, s43, 11
-; HAWAII-NEXT: v_writelane_b32 v40, s44, 12
-; HAWAII-NEXT: v_writelane_b32 v40, s45, 13
-; HAWAII-NEXT: v_writelane_b32 v40, s46, 14
-; HAWAII-NEXT: v_writelane_b32 v40, s47, 15
-; HAWAII-NEXT: v_writelane_b32 v40, s48, 16
-; HAWAII-NEXT: s_mov_b32 s42, s15
-; HAWAII-NEXT: s_mov_b32 s43, s14
-; HAWAII-NEXT: s_mov_b32 s44, s13
-; HAWAII-NEXT: s_mov_b32 s45, s12
+; HAWAII-NEXT: v_writelane_b32 v40, s46, 6
+; HAWAII-NEXT: v_writelane_b32 v40, s47, 7
+; HAWAII-NEXT: v_writelane_b32 v40, s48, 8
+; HAWAII-NEXT: v_writelane_b32 v40, s49, 9
+; HAWAII-NEXT: v_writelane_b32 v40, s50, 10
+; HAWAII-NEXT: v_writelane_b32 v40, s51, 11
+; HAWAII-NEXT: v_writelane_b32 v40, s52, 12
+; HAWAII-NEXT: v_writelane_b32 v40, s53, 13
+; HAWAII-NEXT: v_writelane_b32 v40, s62, 14
+; HAWAII-NEXT: v_writelane_b32 v40, s63, 15
+; HAWAII-NEXT: v_writelane_b32 v40, s64, 16
+; HAWAII-NEXT: s_mov_b32 s50, s15
+; HAWAII-NEXT: s_mov_b32 s51, s14
+; HAWAII-NEXT: s_mov_b32 s52, s13
+; HAWAII-NEXT: s_mov_b32 s53, s12
; HAWAII-NEXT: s_mov_b64 s[34:35], s[10:11]
; HAWAII-NEXT: s_mov_b64 s[36:37], s[8:9]
-; HAWAII-NEXT: s_mov_b64 s[38:39], s[6:7]
-; HAWAII-NEXT: s_mov_b64 s[40:41], s[4:5]
+; HAWAII-NEXT: s_mov_b64 s[46:47], s[6:7]
+; HAWAII-NEXT: s_mov_b64 s[48:49], s[4:5]
; HAWAII-NEXT: v_add_i32_e32 v3, vcc, v3, v4
-; HAWAII-NEXT: s_mov_b64 s[46:47], exec
+; HAWAII-NEXT: s_mov_b64 s[62:63], exec
; HAWAII-NEXT: s_addk_i32 s32, 0x400
-; HAWAII-NEXT: v_writelane_b32 v40, s49, 17
+; HAWAII-NEXT: v_writelane_b32 v40, s65, 17
; HAWAII-NEXT: .LBB18_1: ; =>This Inner Loop Header: Depth=1
; HAWAII-NEXT: v_readfirstlane_b32 s16, v0
; HAWAII-NEXT: v_readfirstlane_b32 s17, v1
; HAWAII-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
-; HAWAII-NEXT: s_and_saveexec_b64 s[48:49], vcc
-; HAWAII-NEXT: s_mov_b64 s[4:5], s[40:41]
-; HAWAII-NEXT: s_mov_b64 s[6:7], s[38:39]
+; HAWAII-NEXT: s_and_saveexec_b64 s[64:65], vcc
+; HAWAII-NEXT: s_mov_b64 s[4:5], s[48:49]
+; HAWAII-NEXT: s_mov_b64 s[6:7], s[46:47]
; HAWAII-NEXT: s_mov_b64 s[8:9], s[36:37]
; HAWAII-NEXT: s_mov_b64 s[10:11], s[34:35]
-; HAWAII-NEXT: s_mov_b32 s12, s45
-; HAWAII-NEXT: s_mov_b32 s13, s44
-; HAWAII-NEXT: s_mov_b32 s14, s43
-; HAWAII-NEXT: s_mov_b32 s15, s42
+; HAWAII-NEXT: s_mov_b32 s12, s53
+; HAWAII-NEXT: s_mov_b32 s13, s52
+; HAWAII-NEXT: s_mov_b32 s14, s51
+; HAWAII-NEXT: s_mov_b32 s15, s50
; HAWAII-NEXT: v_mov_b32_e32 v0, v2
; HAWAII-NEXT: v_mov_b32_e32 v1, v3
; HAWAII-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -745,23 +745,23 @@ define hidden fastcc i32 @indirect_divergent_sibling_call_i32_fastcc_i32_i32(ptr
; HAWAII-NEXT: ; implicit-def: $vgpr31
; HAWAII-NEXT: ; implicit-def: $vgpr2
; HAWAII-NEXT: ; implicit-def: $vgpr3
-; HAWAII-NEXT: s_xor_b64 exec, exec, s[48:49]
+; HAWAII-NEXT: s_xor_b64 exec, exec, s[64:65]
; HAWAII-NEXT: s_cbranch_execnz .LBB18_1
; HAWAII-NEXT: ; %bb.2:
-; HAWAII-NEXT: s_mov_b64 exec, s[46:47]
+; HAWAII-NEXT: s_mov_b64 exec, s[62:63]
; HAWAII-NEXT: v_mov_b32_e32 v0, v4
-; HAWAII-NEXT: v_readlane_b32 s49, v40, 17
-; HAWAII-NEXT: v_readlane_b32 s48, v40, 16
-; HAWAII-NEXT: v_readlane_b32 s47, v40, 15
-; HAWAII-NEXT: v_readlane_b32 s46, v40, 14
-; HAWAII-NEXT: v_readlane_b32 s45, v40, 13
-; HAWAII-NEXT: v_readlane_b32 s44, v40, 12
-; HAWAII-NEXT: v_readlane_b32 s43, v40, 11
-; HAWAII-NEXT: v_readlane_b32 s42, v40, 10
-; HAWAII-NEXT: v_readlane_b32 s41, v40, 9
-; HAWAII-NEXT: v_readlane_b32 s40, v40, 8
-; HAWAII-NEXT: v_readlane_b32 s39, v40, 7
-; HAWAII-NEXT: v_readlane_b32 s38, v40, 6
+; HAWAII-NEXT: v_readlane_b32 s65, v40, 17
+; HAWAII-NEXT: v_readlane_b32 s64, v40, 16
+; HAWAII-NEXT: v_readlane_b32 s63, v40, 15
+; HAWAII-NEXT: v_readlane_b32 s62, v40, 14
+; HAWAII-NEXT: v_readlane_b32 s53, v40, 13
+; HAWAII-NEXT: v_readlane_b32 s52, v40, 12
+; HAWAII-NEXT: v_readlane_b32 s51, v40, 11
+; HAWAII-NEXT: v_readlane_b32 s50, v40, 10
+; HAWAII-NEXT: v_readlane_b32 s49, v40, 9
+; HAWAII-NEXT: v_readlane_b32 s48, v40, 8
+; HAWAII-NEXT: v_readlane_b32 s47, v40, 7
+; HAWAII-NEXT: v_readlane_b32 s46, v40, 6
; HAWAII-NEXT: v_readlane_b32 s37, v40, 5
; HAWAII-NEXT: v_readlane_b32 s36, v40, 4
; HAWAII-NEXT: v_readlane_b32 s35, v40, 3
@@ -792,42 +792,42 @@ define hidden fastcc i32 @indirect_divergent_sibling_call_i32_fastcc_i32_i32(ptr
; GFX9-NEXT: v_writelane_b32 v40, s35, 3
; GFX9-NEXT: v_writelane_b32 v40, s36, 4
; GFX9-NEXT: v_writelane_b32 v40, s37, 5
-; GFX9-NEXT: v_writelane_b32 v40, s38, 6
-; GFX9-NEXT: v_writelane_b32 v40, s39, 7
-; GFX9-NEXT: v_writelane_b32 v40, s40, 8
-; GFX9-NEXT: v_writelane_b32 v40, s41, 9
-; GFX9-NEXT: v_writelane_b32 v40, s42, 10
-; GFX9-NEXT: v_writelane_b32 v40, s43, 11
-; GFX9-NEXT: v_writelane_b32 v40, s44, 12
-; GFX9-NEXT: v_writelane_b32 v40, s45, 13
-; GFX9-NEXT: v_writelane_b32 v40, s46, 14
-; GFX9-NEXT: v_writelane_b32 v40, s47, 15
-; GFX9-NEXT: v_writelane_b32 v40, s48, 16
-; GFX9-NEXT: s_mov_b32 s42, s15
-; GFX9-NEXT: s_mov_b32 s43, s14
-; GFX9-NEXT: s_mov_b32 s44, s13
-; GFX9-NEXT: s_mov_b32 s45, s12
+; GFX9-NEXT: v_writelane_b32 v40, s46, 6
+; GFX9-NEXT: v_writelane_b32 v40, s47, 7
+; GFX9-NEXT: v_writelane_b32 v40, s48, 8
+; GFX9-NEXT: v_writelane_b32 v40, s49, 9
+; GFX9-NEXT: v_writelane_b32 v40, s50, 10
+; GFX9-NEXT: v_writelane_b32 v40, s51, 11
+; GFX9-NEXT: v_writelane_b32 v40, s52, 12
+; GFX9-NEXT: v_writelane_b32 v40, s53, 13
+; GFX9-NEXT: v_writelane_b32 v40, s62, 14
+; GFX9-NEXT: v_writelane_b32 v40, s63, 15
+; GFX9-NEXT: v_writelane_b32 v40, s64, 16
+; GFX9-NEXT: s_mov_b32 s50, s15
+; GFX9-NEXT: s_mov_b32 s51, s14
+; GFX9-NEXT: s_mov_b32 s52, s13
+; GFX9-NEXT: s_mov_b32 s53, s12
; GFX9-NEXT: s_mov_b64 s[34:35], s[10:11]
; GFX9-NEXT: s_mov_b64 s[36:37], s[8:9]
-; GFX9-NEXT: s_mov_b64 s[38:39], s[6:7]
-; GFX9-NEXT: s_mov_b64 s[40:41], s[4:5]
+; GFX9-NEXT: s_mov_b64 s[46:47], s[6:7]
+; GFX9-NEXT: s_mov_b64 s[48:49], s[4:5]
; GFX9-NEXT: v_add_u32_e32 v3, v3, v4
-; GFX9-NEXT: s_mov_b64 s[46:47], exec
+; GFX9-NEXT: s_mov_b64 s[62:63], exec
; GFX9-NEXT: s_addk_i32 s32, 0x400
-; GFX9-NEXT: v_writelane_b32 v40, s49, 17
+; GFX9-NEXT: v_writelane_b32 v40, s65, 17
; GFX9-NEXT: .LBB18_1: ; =>This Inner Loop Header: Depth=1
; GFX9-NEXT: v_readfirstlane_b32 s16, v0
; GFX9-NEXT: v_readfirstlane_b32 s17, v1
; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
-; GFX9-NEXT: s_and_saveexec_b64 s[48:49], vcc
-; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GFX9-NEXT: s_and_saveexec_b64 s[64:65], vcc
+; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47]
; GFX9-NEXT: s_mov_b64 s[8:9], s[36:37]
; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GFX9-NEXT: s_mov_b32 s12, s45
-; GFX9-NEXT: s_mov_b32 s13, s44
-; GFX9-NEXT: s_mov_b32 s14, s43
-; GFX9-NEXT: s_mov_b32 s15, s42
+; GFX9-NEXT: s_mov_b32 s12, s53
+; GFX9-NEXT: s_mov_b32 s13, s52
+; GFX9-NEXT: s_mov_b32 s14, s51
+; GFX9-NEXT: s_mov_b32 s15, s50
; GFX9-NEXT: v_mov_b32_e32 v0, v2
; GFX9-NEXT: v_mov_b32_e32 v1, v3
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -836,23 +836,23 @@ define hidden fastcc i32 @indirect_divergent_sibling_call_i32_fastcc_i32_i32(ptr
; GFX9-NEXT: ; implicit-def: $vgpr31
; GFX9-NEXT: ; implicit-def: $vgpr2
; GFX9-NEXT: ; implicit-def: $vgpr3
-; GFX9-NEXT: s_xor_b64 exec, exec, s[48:49]
+; GFX9-NEXT: s_xor_b64 exec, exec, s[64:65]
; GFX9-NEXT: s_cbranch_execnz .LBB18_1
; GFX9-NEXT: ; %bb.2:
-; GFX9-NEXT: s_mov_b64 exec, s[46:47]
+; GFX9-NEXT: s_mov_b64 exec, s[62:63]
; GFX9-NEXT: v_mov_b32_e32 v0, v4
-; GFX9-NEXT: v_readlane_b32 s49, v40, 17
-; GFX9-NEXT: v_readlane_b32 s48, v40, 16
-; GFX9-NEXT: v_readlane_b32 s47, v40, 15
-; GFX9-NEXT: v_readlane_b32 s46, v40, 14
-; GFX9-NEXT: v_readlane_b32 s45, v40, 13
-; GFX9-NEXT: v_readlane_b32 s44, v40, 12
-; GFX9-NEXT: v_readlane_b32 s43, v40, 11
-; GFX9-NEXT: v_readlane_b32 s42, v40, 10
-; GFX9-NEXT: v_readlane_b32 s41, v40, 9
-; GFX9-NEXT: v_readlane_b32 s40, v40, 8
-; GFX9-NEXT: v_readlane_b32 s39, v40, 7
-; GFX9-NEXT: v_readlane_b32 s38, v40, 6
+; GFX9-NEXT: v_readlane_b32 s65, v40, 17
+; GFX9-NEXT: v_readlane_b32 s64, v40, 16
+; GFX9-NEXT: v_readlane_b32 s63, v40, 15
+; GFX9-NEXT: v_readlane_b32 s62, v40, 14
+; GFX9-NEXT: v_readlane_b32 s53, v40, 13
+; GFX9-NEXT: v_readlane_b32 s52, v40, 12
+; GFX9-NEXT: v_readlane_b32 s51, v40, 11
+; GFX9-NEXT: v_readlane_b32 s50, v40, 10
+; GFX9-NEXT: v_readlane_b32 s49, v40, 9
+; GFX9-NEXT: v_readlane_b32 s48, v40, 8
+; GFX9-NEXT: v_readlane_b32 s47, v40, 7
+; GFX9-NEXT: v_readlane_b32 s46, v40, 6
; GFX9-NEXT: v_readlane_b32 s37, v40, 5
; GFX9-NEXT: v_readlane_b32 s36, v40, 4
; GFX9-NEXT: v_readlane_b32 s35, v40, 3
diff --git a/llvm/test/CodeGen/AMDGPU/snippet-copy-bundle-regression.mir b/llvm/test/CodeGen/AMDGPU/snippet-copy-bundle-regression.mir
index 080bd052a7391..6b5c624356f47 100644
--- a/llvm/test/CodeGen/AMDGPU/snippet-copy-bundle-regression.mir
+++ b/llvm/test/CodeGen/AMDGPU/snippet-copy-bundle-regression.mir
@@ -36,52 +36,73 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $sgpr34_sgpr35 = IMPLICIT_DEF
; CHECK-NEXT: dead [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: renamable $sgpr41 = IMPLICIT_DEF
- ; CHECK-NEXT: renamable $sgpr38_sgpr39 = COPY undef $sgpr8_sgpr9
+ ; CHECK-NEXT: renamable $sgpr49 = IMPLICIT_DEF
+ ; CHECK-NEXT: renamable $sgpr46_sgpr47 = COPY undef $sgpr8_sgpr9
; CHECK-NEXT: renamable $sgpr36_sgpr37 = IMPLICIT_DEF
- ; CHECK-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX8_IMM renamable $sgpr38_sgpr39, 0, 0 :: (dereferenceable invariant load (s256), align 16, addrspace 4)
- ; CHECK-NEXT: dead renamable $sgpr4 = S_LOAD_DWORD_IMM renamable $sgpr38_sgpr39, 48, 0 :: (dereferenceable invariant load (s32), align 16, addrspace 4)
- ; CHECK-NEXT: dead renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM renamable $sgpr44_sgpr45, 0, 0 :: (invariant load (s64), align 16, addrspace 4)
+ ; CHECK-NEXT: renamable $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX8_IMM renamable $sgpr46_sgpr47, 0, 0 :: (dereferenceable invariant load (s256), align 16, addrspace 4)
+ ; CHECK-NEXT: dead renamable $sgpr4 = S_LOAD_DWORD_IMM renamable $sgpr46_sgpr47, 48, 0 :: (dereferenceable invariant load (s32), align 16, addrspace 4)
+ ; CHECK-NEXT: dead renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM renamable $sgpr8_sgpr9, 0, 0 :: (invariant load (s64), align 16, addrspace 4)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
- ; CHECK-NEXT: $vgpr1 = COPY renamable $sgpr51
+ ; CHECK-NEXT: renamable $sgpr10_sgpr11 = COPY renamable $sgpr14_sgpr15
+ ; CHECK-NEXT: renamable $sgpr9 = COPY renamable $sgpr13
+ ; CHECK-NEXT: renamable $vgpr23 = IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr23 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, killed $vgpr23, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11
+ ; CHECK-NEXT: $vgpr23 = SI_SPILL_S32_TO_VGPR $sgpr5, 1, killed $vgpr23
+ ; CHECK-NEXT: $vgpr23 = SI_SPILL_S32_TO_VGPR $sgpr6, 2, killed $vgpr23
+ ; CHECK-NEXT: $vgpr23 = SI_SPILL_S32_TO_VGPR $sgpr7, 3, killed $vgpr23
+ ; CHECK-NEXT: $vgpr23 = SI_SPILL_S32_TO_VGPR $sgpr8, 4, killed $vgpr23
+ ; CHECK-NEXT: $vgpr23 = SI_SPILL_S32_TO_VGPR $sgpr9, 5, killed $vgpr23
+ ; CHECK-NEXT: $vgpr23 = SI_SPILL_S32_TO_VGPR $sgpr10, 6, killed $vgpr23
+ ; CHECK-NEXT: $vgpr23 = SI_SPILL_S32_TO_VGPR $sgpr11, 7, killed $vgpr23, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11
+ ; CHECK-NEXT: SI_SPILL_WWM_V32_SAVE killed $vgpr23, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+ ; CHECK-NEXT: $vgpr1 = COPY killed renamable $sgpr15
; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL undef renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit undef $sgpr15, implicit $vgpr31, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
- ; CHECK-NEXT: $vcc = COPY renamable $sgpr40_sgpr41
+ ; CHECK-NEXT: $vcc = COPY renamable $sgpr48_sgpr49
; CHECK-NEXT: S_CBRANCH_VCCZ %bb.2, implicit undef $vcc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.3(0x80000000)
- ; CHECK-NEXT: liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x000000000000FC00
+ ; CHECK-NEXT: liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr46_sgpr47
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX8_IMM renamable $sgpr38_sgpr39, 56, 0 :: (dereferenceable invariant load (s256), align 8, addrspace 4)
+ ; CHECK-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX8_IMM renamable $sgpr46_sgpr47, 56, 0 :: (dereferenceable invariant load (s256), align 8, addrspace 4)
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
- ; CHECK-NEXT: liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x000000000000FC00
+ ; CHECK-NEXT: liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr46_sgpr47
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX8_IMM renamable $sgpr38_sgpr39, 56, 0 :: (dereferenceable invariant load (s256), align 8, addrspace 4)
+ ; CHECK-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX8_IMM renamable $sgpr46_sgpr47, 56, 0 :: (dereferenceable invariant load (s256), align 8, addrspace 4)
; CHECK-NEXT: S_CMP_LG_U64 renamable $sgpr4_sgpr5, 0, implicit-def $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000)
- ; CHECK-NEXT: liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11:0x00000000000003F0, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x000000000000FC00
+ ; CHECK-NEXT: liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr46_sgpr47, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11:0x00000000000003F0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_CBRANCH_VCCZ %bb.5, implicit undef $vcc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.5(0x80000000)
- ; CHECK-NEXT: liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11:0x00000000000003F0, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x000000000000FC00
+ ; CHECK-NEXT: liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr46_sgpr47, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11:0x00000000000003F0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_CMP_EQ_U32 renamable $sgpr8, 0, implicit-def $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
- ; CHECK-NEXT: liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11:0x00000000000000F0, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x000000000000FC00
+ ; CHECK-NEXT: liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr46_sgpr47, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11:0x00000000000000F0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: dead renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr38_sgpr39, 40, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+ ; CHECK-NEXT: dead renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr46_sgpr47, 40, 0 :: (dereferenceable invariant load (s64), addrspace 4)
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR undef [[DEF]], undef [[DEF]], killed renamable $sgpr6_sgpr7, 0, 0, implicit $exec :: (store (s32), addrspace 1)
- ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR undef [[DEF]], undef [[DEF]], renamable $sgpr50_sgpr51, 0, 0, implicit $exec :: (store (s32), addrspace 1)
- ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr49
+ ; CHECK-NEXT: renamable $vgpr23 = SI_SPILL_WWM_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+ ; CHECK-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr23, 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11
+ ; CHECK-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR $vgpr23, 1
+ ; CHECK-NEXT: $sgpr6 = SI_RESTORE_S32_FROM_VGPR $vgpr23, 2
+ ; CHECK-NEXT: $sgpr7 = SI_RESTORE_S32_FROM_VGPR $vgpr23, 3
+ ; CHECK-NEXT: $sgpr8 = SI_RESTORE_S32_FROM_VGPR $vgpr23, 4
+ ; CHECK-NEXT: $sgpr9 = SI_RESTORE_S32_FROM_VGPR $vgpr23, 5
+ ; CHECK-NEXT: $sgpr10 = SI_RESTORE_S32_FROM_VGPR $vgpr23, 6
+ ; CHECK-NEXT: $sgpr11 = SI_RESTORE_S32_FROM_VGPR killed $vgpr23, 7
+ ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR undef [[DEF]], undef [[DEF]], renamable $sgpr10_sgpr11, 0, 0, implicit $exec :: (store (s32), addrspace 1)
+ ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr9
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
; CHECK-NEXT: $sgpr6_sgpr7 = COPY killed renamable $sgpr36_sgpr37
; CHECK-NEXT: $sgpr10_sgpr11 = COPY killed renamable $sgpr34_sgpr35
diff --git a/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir b/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir
index dff2bd7f7aef9..adaef348a0388 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir
@@ -53,7 +53,7 @@ body: |
bb.0:
liveins: $sgpr30_sgpr31, $sgpr10, $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71, $sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79, $sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87, $sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
; GCN-LABEL: name: sgpr_spill_lane_crossover
- ; GCN: liveins: $sgpr10, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $vgpr63, $sgpr30_sgpr31, $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71, $sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79, $sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87, $sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN: liveins: $sgpr10, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr94, $sgpr95, $vgpr63, $sgpr30_sgpr31, $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71, $sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79, $sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87, $sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
; GCN-NEXT: {{ $}}
; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr64, 0, $vgpr63
; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr65, 1, $vgpr63
@@ -61,32 +61,16 @@ body: |
; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr67, 3, $vgpr63
; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr68, 4, $vgpr63
; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr69, 5, $vgpr63
- ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr70, 6, $vgpr63
- ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr71, 7, $vgpr63
- ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr72, 8, $vgpr63
- ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr73, 9, $vgpr63
- ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr74, 10, $vgpr63
- ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr75, 11, $vgpr63
- ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr76, 12, $vgpr63
- ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr77, 13, $vgpr63
- ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr78, 14, $vgpr63
- ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr79, 15, $vgpr63
- ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr80, 16, $vgpr63
- ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr81, 17, $vgpr63
- ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr82, 18, $vgpr63
- ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr83, 19, $vgpr63
- ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr84, 20, $vgpr63
- ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr85, 21, $vgpr63
- ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr86, 22, $vgpr63
- ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr87, 23, $vgpr63
- ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr88, 24, $vgpr63
- ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr89, 25, $vgpr63
- ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr90, 26, $vgpr63
- ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr91, 27, $vgpr63
- ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr92, 28, $vgpr63
- ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr93, 29, $vgpr63
- ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr94, 30, $vgpr63
- ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr95, 31, $vgpr63
+ ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr78, 6, $vgpr63
+ ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr79, 7, $vgpr63
+ ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr80, 8, $vgpr63
+ ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr81, 9, $vgpr63
+ ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr82, 10, $vgpr63
+ ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr83, 11, $vgpr63
+ ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr84, 12, $vgpr63
+ ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr85, 13, $vgpr63
+ ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr94, 14, $vgpr63
+ ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr95, 15, $vgpr63
; GCN-NEXT: S_NOP 0
; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 0, [[DEF]]
diff --git a/llvm/test/CodeGen/AMDGPU/spill-sgpr-used-for-exec-copy.mir b/llvm/test/CodeGen/AMDGPU/spill-sgpr-used-for-exec-copy.mir
index 9b0f52cb39b01..fcd835c7f09da 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-sgpr-used-for-exec-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-sgpr-used-for-exec-copy.mir
@@ -56,20 +56,15 @@ body: |
bb.0:
liveins: $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr30_sgpr31, $vcc, $vgpr0
; GCN-LABEL: name: spill_exec_copy_reserved_reg
- ; GCN: liveins: $vcc, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $vgpr0, $vgpr2, $sgpr30_sgpr31
+ ; GCN: liveins: $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr30_sgpr31, $vcc, $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: $sgpr28_sgpr29 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
- ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr28_sgpr29
- ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr34, 0, undef $vgpr2
- ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr35, 1, undef $vgpr2
; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr30, 0, killed $vgpr0
; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr31, 1, killed $vgpr0
- ; GCN-NEXT: $sgpr34_sgpr35 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+ ; GCN-NEXT: $sgpr38_sgpr39 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
; GCN-NEXT: $sgpr28_sgpr29 = IMPLICIT_DEF
; GCN-NEXT: $vgpr1 = COPY $vgpr0
; GCN-NEXT: S_NOP 0, implicit $sgpr28_sgpr29
- ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr34_sgpr35
+ ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr38_sgpr39
; GCN-NEXT: $sgpr30 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 0
; GCN-NEXT: $sgpr31 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 1
; GCN-NEXT: S_SETPC_B64 $sgpr30_sgpr31, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8_sgpr9_sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr15, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $vcc
diff --git a/llvm/test/CodeGen/AMDGPU/spill_more_than_wavesize_csr_sgprs.ll b/llvm/test/CodeGen/AMDGPU/spill_more_than_wavesize_csr_sgprs.ll
index 0d6bccad89d82..f7ea8109beea4 100644
--- a/llvm/test/CodeGen/AMDGPU/spill_more_than_wavesize_csr_sgprs.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill_more_than_wavesize_csr_sgprs.ll
@@ -7,149 +7,83 @@ define void @spill_more_than_wavesize_csr_sgprs() {
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
-; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; CHECK-NEXT: s_mov_b64 exec, s[4:5]
; CHECK-NEXT: v_writelane_b32 v0, s35, 0
; CHECK-NEXT: v_writelane_b32 v0, s36, 1
; CHECK-NEXT: v_writelane_b32 v0, s37, 2
-; CHECK-NEXT: v_writelane_b32 v0, s38, 3
-; CHECK-NEXT: v_writelane_b32 v0, s39, 4
-; CHECK-NEXT: v_writelane_b32 v0, s40, 5
-; CHECK-NEXT: v_writelane_b32 v0, s41, 6
-; CHECK-NEXT: v_writelane_b32 v0, s42, 7
-; CHECK-NEXT: v_writelane_b32 v0, s43, 8
-; CHECK-NEXT: v_writelane_b32 v0, s44, 9
-; CHECK-NEXT: v_writelane_b32 v0, s45, 10
-; CHECK-NEXT: v_writelane_b32 v0, s46, 11
-; CHECK-NEXT: v_writelane_b32 v0, s47, 12
-; CHECK-NEXT: v_writelane_b32 v0, s48, 13
-; CHECK-NEXT: v_writelane_b32 v0, s49, 14
-; CHECK-NEXT: v_writelane_b32 v0, s50, 15
-; CHECK-NEXT: v_writelane_b32 v0, s51, 16
-; CHECK-NEXT: v_writelane_b32 v0, s52, 17
-; CHECK-NEXT: v_writelane_b32 v0, s53, 18
-; CHECK-NEXT: v_writelane_b32 v0, s54, 19
-; CHECK-NEXT: v_writelane_b32 v0, s55, 20
-; CHECK-NEXT: v_writelane_b32 v0, s56, 21
-; CHECK-NEXT: v_writelane_b32 v0, s57, 22
-; CHECK-NEXT: v_writelane_b32 v0, s58, 23
-; CHECK-NEXT: v_writelane_b32 v0, s59, 24
-; CHECK-NEXT: v_writelane_b32 v0, s60, 25
-; CHECK-NEXT: v_writelane_b32 v0, s61, 26
-; CHECK-NEXT: v_writelane_b32 v0, s62, 27
-; CHECK-NEXT: v_writelane_b32 v0, s63, 28
-; CHECK-NEXT: v_writelane_b32 v0, s64, 29
-; CHECK-NEXT: v_writelane_b32 v0, s65, 30
-; CHECK-NEXT: v_writelane_b32 v0, s66, 31
-; CHECK-NEXT: v_writelane_b32 v0, s67, 32
-; CHECK-NEXT: v_writelane_b32 v0, s68, 33
-; CHECK-NEXT: v_writelane_b32 v0, s69, 34
-; CHECK-NEXT: v_writelane_b32 v0, s70, 35
-; CHECK-NEXT: v_writelane_b32 v0, s71, 36
-; CHECK-NEXT: v_writelane_b32 v0, s72, 37
-; CHECK-NEXT: v_writelane_b32 v0, s73, 38
-; CHECK-NEXT: v_writelane_b32 v0, s74, 39
-; CHECK-NEXT: v_writelane_b32 v0, s75, 40
-; CHECK-NEXT: v_writelane_b32 v0, s76, 41
-; CHECK-NEXT: v_writelane_b32 v0, s77, 42
-; CHECK-NEXT: v_writelane_b32 v0, s78, 43
-; CHECK-NEXT: v_writelane_b32 v0, s79, 44
-; CHECK-NEXT: v_writelane_b32 v0, s80, 45
-; CHECK-NEXT: v_writelane_b32 v0, s81, 46
-; CHECK-NEXT: v_writelane_b32 v0, s82, 47
-; CHECK-NEXT: v_writelane_b32 v0, s83, 48
-; CHECK-NEXT: v_writelane_b32 v0, s84, 49
-; CHECK-NEXT: v_writelane_b32 v0, s85, 50
-; CHECK-NEXT: v_writelane_b32 v0, s86, 51
-; CHECK-NEXT: v_writelane_b32 v0, s87, 52
-; CHECK-NEXT: v_writelane_b32 v0, s88, 53
-; CHECK-NEXT: v_writelane_b32 v0, s89, 54
-; CHECK-NEXT: v_writelane_b32 v0, s90, 55
-; CHECK-NEXT: v_writelane_b32 v0, s91, 56
-; CHECK-NEXT: v_writelane_b32 v0, s92, 57
-; CHECK-NEXT: v_writelane_b32 v0, s93, 58
-; CHECK-NEXT: v_writelane_b32 v0, s94, 59
-; CHECK-NEXT: v_writelane_b32 v0, s95, 60
-; CHECK-NEXT: v_writelane_b32 v1, s99, 0
-; CHECK-NEXT: v_writelane_b32 v0, s96, 61
-; CHECK-NEXT: v_writelane_b32 v1, s100, 1
-; CHECK-NEXT: v_writelane_b32 v0, s97, 62
-; CHECK-NEXT: v_writelane_b32 v1, s101, 2
-; CHECK-NEXT: v_writelane_b32 v0, s98, 63
-; CHECK-NEXT: v_writelane_b32 v1, s102, 3
+; CHECK-NEXT: v_writelane_b32 v0, s46, 3
+; CHECK-NEXT: v_writelane_b32 v0, s47, 4
+; CHECK-NEXT: v_writelane_b32 v0, s48, 5
+; CHECK-NEXT: v_writelane_b32 v0, s49, 6
+; CHECK-NEXT: v_writelane_b32 v0, s50, 7
+; CHECK-NEXT: v_writelane_b32 v0, s51, 8
+; CHECK-NEXT: v_writelane_b32 v0, s52, 9
+; CHECK-NEXT: v_writelane_b32 v0, s53, 10
+; CHECK-NEXT: v_writelane_b32 v0, s62, 11
+; CHECK-NEXT: v_writelane_b32 v0, s63, 12
+; CHECK-NEXT: v_writelane_b32 v0, s64, 13
+; CHECK-NEXT: v_writelane_b32 v0, s65, 14
+; CHECK-NEXT: v_writelane_b32 v0, s66, 15
+; CHECK-NEXT: v_writelane_b32 v0, s67, 16
+; CHECK-NEXT: v_writelane_b32 v0, s68, 17
+; CHECK-NEXT: v_writelane_b32 v0, s69, 18
+; CHECK-NEXT: v_writelane_b32 v0, s78, 19
+; CHECK-NEXT: v_writelane_b32 v0, s79, 20
+; CHECK-NEXT: v_writelane_b32 v0, s80, 21
+; CHECK-NEXT: v_writelane_b32 v0, s81, 22
+; CHECK-NEXT: v_writelane_b32 v0, s82, 23
+; CHECK-NEXT: v_writelane_b32 v0, s83, 24
+; CHECK-NEXT: v_writelane_b32 v0, s84, 25
+; CHECK-NEXT: v_writelane_b32 v0, s85, 26
+; CHECK-NEXT: v_writelane_b32 v0, s94, 27
+; CHECK-NEXT: v_writelane_b32 v0, s95, 28
+; CHECK-NEXT: v_writelane_b32 v0, s96, 29
+; CHECK-NEXT: v_writelane_b32 v0, s97, 30
+; CHECK-NEXT: v_writelane_b32 v0, s98, 31
+; CHECK-NEXT: v_writelane_b32 v0, s99, 32
+; CHECK-NEXT: v_writelane_b32 v0, s100, 33
+; CHECK-NEXT: v_writelane_b32 v0, s101, 34
+; CHECK-NEXT: v_writelane_b32 v0, s102, 35
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ;;#ASMEND
-; CHECK-NEXT: v_readlane_b32 s102, v1, 3
-; CHECK-NEXT: v_readlane_b32 s101, v1, 2
-; CHECK-NEXT: v_readlane_b32 s100, v1, 1
-; CHECK-NEXT: v_readlane_b32 s99, v1, 0
-; CHECK-NEXT: v_readlane_b32 s98, v0, 63
-; CHECK-NEXT: v_readlane_b32 s97, v0, 62
-; CHECK-NEXT: v_readlane_b32 s96, v0, 61
-; CHECK-NEXT: v_readlane_b32 s95, v0, 60
-; CHECK-NEXT: v_readlane_b32 s94, v0, 59
-; CHECK-NEXT: v_readlane_b32 s93, v0, 58
-; CHECK-NEXT: v_readlane_b32 s92, v0, 57
-; CHECK-NEXT: v_readlane_b32 s91, v0, 56
-; CHECK-NEXT: v_readlane_b32 s90, v0, 55
-; CHECK-NEXT: v_readlane_b32 s89, v0, 54
-; CHECK-NEXT: v_readlane_b32 s88, v0, 53
-; CHECK-NEXT: v_readlane_b32 s87, v0, 52
-; CHECK-NEXT: v_readlane_b32 s86, v0, 51
-; CHECK-NEXT: v_readlane_b32 s85, v0, 50
-; CHECK-NEXT: v_readlane_b32 s84, v0, 49
-; CHECK-NEXT: v_readlane_b32 s83, v0, 48
-; CHECK-NEXT: v_readlane_b32 s82, v0, 47
-; CHECK-NEXT: v_readlane_b32 s81, v0, 46
-; CHECK-NEXT: v_readlane_b32 s80, v0, 45
-; CHECK-NEXT: v_readlane_b32 s79, v0, 44
-; CHECK-NEXT: v_readlane_b32 s78, v0, 43
-; CHECK-NEXT: v_readlane_b32 s77, v0, 42
-; CHECK-NEXT: v_readlane_b32 s76, v0, 41
-; CHECK-NEXT: v_readlane_b32 s75, v0, 40
-; CHECK-NEXT: v_readlane_b32 s74, v0, 39
-; CHECK-NEXT: v_readlane_b32 s73, v0, 38
-; CHECK-NEXT: v_readlane_b32 s72, v0, 37
-; CHECK-NEXT: v_readlane_b32 s71, v0, 36
-; CHECK-NEXT: v_readlane_b32 s70, v0, 35
-; CHECK-NEXT: v_readlane_b32 s69, v0, 34
-; CHECK-NEXT: v_readlane_b32 s68, v0, 33
-; CHECK-NEXT: v_readlane_b32 s67, v0, 32
-; CHECK-NEXT: v_readlane_b32 s66, v0, 31
-; CHECK-NEXT: v_readlane_b32 s65, v0, 30
-; CHECK-NEXT: v_readlane_b32 s64, v0, 29
-; CHECK-NEXT: v_readlane_b32 s63, v0, 28
-; CHECK-NEXT: v_readlane_b32 s62, v0, 27
-; CHECK-NEXT: v_readlane_b32 s61, v0, 26
-; CHECK-NEXT: v_readlane_b32 s60, v0, 25
-; CHECK-NEXT: v_readlane_b32 s59, v0, 24
-; CHECK-NEXT: v_readlane_b32 s58, v0, 23
-; CHECK-NEXT: v_readlane_b32 s57, v0, 22
-; CHECK-NEXT: v_readlane_b32 s56, v0, 21
-; CHECK-NEXT: v_readlane_b32 s55, v0, 20
-; CHECK-NEXT: v_readlane_b32 s54, v0, 19
-; CHECK-NEXT: v_readlane_b32 s53, v0, 18
-; CHECK-NEXT: v_readlane_b32 s52, v0, 17
-; CHECK-NEXT: v_readlane_b32 s51, v0, 16
-; CHECK-NEXT: v_readlane_b32 s50, v0, 15
-; CHECK-NEXT: v_readlane_b32 s49, v0, 14
-; CHECK-NEXT: v_readlane_b32 s48, v0, 13
-; CHECK-NEXT: v_readlane_b32 s47, v0, 12
-; CHECK-NEXT: v_readlane_b32 s46, v0, 11
-; CHECK-NEXT: v_readlane_b32 s45, v0, 10
-; CHECK-NEXT: v_readlane_b32 s44, v0, 9
-; CHECK-NEXT: v_readlane_b32 s43, v0, 8
-; CHECK-NEXT: v_readlane_b32 s42, v0, 7
-; CHECK-NEXT: v_readlane_b32 s41, v0, 6
-; CHECK-NEXT: v_readlane_b32 s40, v0, 5
-; CHECK-NEXT: v_readlane_b32 s39, v0, 4
-; CHECK-NEXT: v_readlane_b32 s38, v0, 3
+; CHECK-NEXT: v_readlane_b32 s102, v0, 35
+; CHECK-NEXT: v_readlane_b32 s101, v0, 34
+; CHECK-NEXT: v_readlane_b32 s100, v0, 33
+; CHECK-NEXT: v_readlane_b32 s99, v0, 32
+; CHECK-NEXT: v_readlane_b32 s98, v0, 31
+; CHECK-NEXT: v_readlane_b32 s97, v0, 30
+; CHECK-NEXT: v_readlane_b32 s96, v0, 29
+; CHECK-NEXT: v_readlane_b32 s95, v0, 28
+; CHECK-NEXT: v_readlane_b32 s94, v0, 27
+; CHECK-NEXT: v_readlane_b32 s85, v0, 26
+; CHECK-NEXT: v_readlane_b32 s84, v0, 25
+; CHECK-NEXT: v_readlane_b32 s83, v0, 24
+; CHECK-NEXT: v_readlane_b32 s82, v0, 23
+; CHECK-NEXT: v_readlane_b32 s81, v0, 22
+; CHECK-NEXT: v_readlane_b32 s80, v0, 21
+; CHECK-NEXT: v_readlane_b32 s79, v0, 20
+; CHECK-NEXT: v_readlane_b32 s78, v0, 19
+; CHECK-NEXT: v_readlane_b32 s69, v0, 18
+; CHECK-NEXT: v_readlane_b32 s68, v0, 17
+; CHECK-NEXT: v_readlane_b32 s67, v0, 16
+; CHECK-NEXT: v_readlane_b32 s66, v0, 15
+; CHECK-NEXT: v_readlane_b32 s65, v0, 14
+; CHECK-NEXT: v_readlane_b32 s64, v0, 13
+; CHECK-NEXT: v_readlane_b32 s63, v0, 12
+; CHECK-NEXT: v_readlane_b32 s62, v0, 11
+; CHECK-NEXT: v_readlane_b32 s53, v0, 10
+; CHECK-NEXT: v_readlane_b32 s52, v0, 9
+; CHECK-NEXT: v_readlane_b32 s51, v0, 8
+; CHECK-NEXT: v_readlane_b32 s50, v0, 7
+; CHECK-NEXT: v_readlane_b32 s49, v0, 6
+; CHECK-NEXT: v_readlane_b32 s48, v0, 5
+; CHECK-NEXT: v_readlane_b32 s47, v0, 4
+; CHECK-NEXT: v_readlane_b32 s46, v0, 3
; CHECK-NEXT: v_readlane_b32 s37, v0, 2
; CHECK-NEXT: v_readlane_b32 s36, v0, 1
; CHECK-NEXT: v_readlane_b32 s35, v0, 0
; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1
; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
-; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
; CHECK-NEXT: s_mov_b64 exec, s[4:5]
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
@@ -172,152 +106,86 @@ define void @spill_more_than_wavesize_csr_sgprs_with_stack_object() {
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1
; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
-; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
; CHECK-NEXT: s_mov_b64 exec, s[4:5]
; CHECK-NEXT: v_writelane_b32 v1, s35, 0
; CHECK-NEXT: v_writelane_b32 v1, s36, 1
; CHECK-NEXT: v_writelane_b32 v1, s37, 2
-; CHECK-NEXT: v_writelane_b32 v1, s38, 3
-; CHECK-NEXT: v_writelane_b32 v1, s39, 4
-; CHECK-NEXT: v_writelane_b32 v1, s40, 5
-; CHECK-NEXT: v_writelane_b32 v1, s41, 6
-; CHECK-NEXT: v_writelane_b32 v1, s42, 7
-; CHECK-NEXT: v_writelane_b32 v1, s43, 8
-; CHECK-NEXT: v_writelane_b32 v1, s44, 9
-; CHECK-NEXT: v_writelane_b32 v1, s45, 10
-; CHECK-NEXT: v_writelane_b32 v1, s46, 11
-; CHECK-NEXT: v_writelane_b32 v1, s47, 12
-; CHECK-NEXT: v_writelane_b32 v1, s48, 13
-; CHECK-NEXT: v_writelane_b32 v1, s49, 14
-; CHECK-NEXT: v_writelane_b32 v1, s50, 15
-; CHECK-NEXT: v_writelane_b32 v1, s51, 16
-; CHECK-NEXT: v_writelane_b32 v1, s52, 17
-; CHECK-NEXT: v_writelane_b32 v1, s53, 18
-; CHECK-NEXT: v_writelane_b32 v1, s54, 19
-; CHECK-NEXT: v_writelane_b32 v1, s55, 20
-; CHECK-NEXT: v_writelane_b32 v1, s56, 21
-; CHECK-NEXT: v_writelane_b32 v1, s57, 22
-; CHECK-NEXT: v_writelane_b32 v1, s58, 23
-; CHECK-NEXT: v_writelane_b32 v1, s59, 24
-; CHECK-NEXT: v_writelane_b32 v1, s60, 25
-; CHECK-NEXT: v_writelane_b32 v1, s61, 26
-; CHECK-NEXT: v_writelane_b32 v1, s62, 27
-; CHECK-NEXT: v_writelane_b32 v1, s63, 28
-; CHECK-NEXT: v_writelane_b32 v1, s64, 29
-; CHECK-NEXT: v_writelane_b32 v1, s65, 30
-; CHECK-NEXT: v_writelane_b32 v1, s66, 31
-; CHECK-NEXT: v_writelane_b32 v1, s67, 32
-; CHECK-NEXT: v_writelane_b32 v1, s68, 33
-; CHECK-NEXT: v_writelane_b32 v1, s69, 34
-; CHECK-NEXT: v_writelane_b32 v1, s70, 35
-; CHECK-NEXT: v_writelane_b32 v1, s71, 36
-; CHECK-NEXT: v_writelane_b32 v1, s72, 37
-; CHECK-NEXT: v_writelane_b32 v1, s73, 38
-; CHECK-NEXT: v_writelane_b32 v1, s74, 39
-; CHECK-NEXT: v_writelane_b32 v1, s75, 40
-; CHECK-NEXT: v_writelane_b32 v1, s76, 41
-; CHECK-NEXT: v_writelane_b32 v1, s77, 42
-; CHECK-NEXT: v_writelane_b32 v1, s78, 43
-; CHECK-NEXT: v_writelane_b32 v1, s79, 44
-; CHECK-NEXT: v_writelane_b32 v1, s80, 45
-; CHECK-NEXT: v_writelane_b32 v1, s81, 46
-; CHECK-NEXT: v_writelane_b32 v1, s82, 47
-; CHECK-NEXT: v_writelane_b32 v1, s83, 48
-; CHECK-NEXT: v_writelane_b32 v1, s84, 49
-; CHECK-NEXT: v_writelane_b32 v1, s85, 50
-; CHECK-NEXT: v_writelane_b32 v1, s86, 51
-; CHECK-NEXT: v_writelane_b32 v1, s87, 52
-; CHECK-NEXT: v_writelane_b32 v1, s88, 53
-; CHECK-NEXT: v_writelane_b32 v1, s89, 54
-; CHECK-NEXT: v_writelane_b32 v1, s90, 55
-; CHECK-NEXT: v_writelane_b32 v1, s91, 56
-; CHECK-NEXT: v_writelane_b32 v1, s92, 57
-; CHECK-NEXT: v_writelane_b32 v1, s93, 58
-; CHECK-NEXT: v_writelane_b32 v1, s94, 59
-; CHECK-NEXT: v_writelane_b32 v1, s95, 60
-; CHECK-NEXT: v_writelane_b32 v2, s99, 0
-; CHECK-NEXT: v_writelane_b32 v1, s96, 61
-; CHECK-NEXT: v_writelane_b32 v2, s100, 1
-; CHECK-NEXT: v_writelane_b32 v1, s97, 62
-; CHECK-NEXT: v_writelane_b32 v2, s101, 2
+; CHECK-NEXT: v_writelane_b32 v1, s46, 3
+; CHECK-NEXT: v_writelane_b32 v1, s47, 4
+; CHECK-NEXT: v_writelane_b32 v1, s48, 5
+; CHECK-NEXT: v_writelane_b32 v1, s49, 6
+; CHECK-NEXT: v_writelane_b32 v1, s50, 7
+; CHECK-NEXT: v_writelane_b32 v1, s51, 8
+; CHECK-NEXT: v_writelane_b32 v1, s52, 9
+; CHECK-NEXT: v_writelane_b32 v1, s53, 10
+; CHECK-NEXT: v_writelane_b32 v1, s62, 11
+; CHECK-NEXT: v_writelane_b32 v1, s63, 12
+; CHECK-NEXT: v_writelane_b32 v1, s64, 13
+; CHECK-NEXT: v_writelane_b32 v1, s65, 14
+; CHECK-NEXT: v_writelane_b32 v1, s66, 15
+; CHECK-NEXT: v_writelane_b32 v1, s67, 16
+; CHECK-NEXT: v_writelane_b32 v1, s68, 17
+; CHECK-NEXT: v_writelane_b32 v1, s69, 18
+; CHECK-NEXT: v_writelane_b32 v1, s78, 19
+; CHECK-NEXT: v_writelane_b32 v1, s79, 20
+; CHECK-NEXT: v_writelane_b32 v1, s80, 21
+; CHECK-NEXT: v_writelane_b32 v1, s81, 22
+; CHECK-NEXT: v_writelane_b32 v1, s82, 23
+; CHECK-NEXT: v_writelane_b32 v1, s83, 24
+; CHECK-NEXT: v_writelane_b32 v1, s84, 25
+; CHECK-NEXT: v_writelane_b32 v1, s85, 26
+; CHECK-NEXT: v_writelane_b32 v1, s94, 27
+; CHECK-NEXT: v_writelane_b32 v1, s95, 28
+; CHECK-NEXT: v_writelane_b32 v1, s96, 29
+; CHECK-NEXT: v_writelane_b32 v1, s97, 30
+; CHECK-NEXT: v_writelane_b32 v1, s98, 31
+; CHECK-NEXT: v_writelane_b32 v1, s99, 32
+; CHECK-NEXT: v_writelane_b32 v1, s100, 33
+; CHECK-NEXT: v_writelane_b32 v1, s101, 34
; CHECK-NEXT: v_mov_b32_e32 v0, 0
-; CHECK-NEXT: v_writelane_b32 v1, s98, 63
-; CHECK-NEXT: v_writelane_b32 v2, s102, 3
+; CHECK-NEXT: v_writelane_b32 v1, s102, 35
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ;;#ASMEND
-; CHECK-NEXT: v_readlane_b32 s102, v2, 3
-; CHECK-NEXT: v_readlane_b32 s101, v2, 2
-; CHECK-NEXT: v_readlane_b32 s100, v2, 1
-; CHECK-NEXT: v_readlane_b32 s99, v2, 0
-; CHECK-NEXT: v_readlane_b32 s98, v1, 63
-; CHECK-NEXT: v_readlane_b32 s97, v1, 62
-; CHECK-NEXT: v_readlane_b32 s96, v1, 61
-; CHECK-NEXT: v_readlane_b32 s95, v1, 60
-; CHECK-NEXT: v_readlane_b32 s94, v1, 59
-; CHECK-NEXT: v_readlane_b32 s93, v1, 58
-; CHECK-NEXT: v_readlane_b32 s92, v1, 57
-; CHECK-NEXT: v_readlane_b32 s91, v1, 56
-; CHECK-NEXT: v_readlane_b32 s90, v1, 55
-; CHECK-NEXT: v_readlane_b32 s89, v1, 54
-; CHECK-NEXT: v_readlane_b32 s88, v1, 53
-; CHECK-NEXT: v_readlane_b32 s87, v1, 52
-; CHECK-NEXT: v_readlane_b32 s86, v1, 51
-; CHECK-NEXT: v_readlane_b32 s85, v1, 50
-; CHECK-NEXT: v_readlane_b32 s84, v1, 49
-; CHECK-NEXT: v_readlane_b32 s83, v1, 48
-; CHECK-NEXT: v_readlane_b32 s82, v1, 47
-; CHECK-NEXT: v_readlane_b32 s81, v1, 46
-; CHECK-NEXT: v_readlane_b32 s80, v1, 45
-; CHECK-NEXT: v_readlane_b32 s79, v1, 44
-; CHECK-NEXT: v_readlane_b32 s78, v1, 43
-; CHECK-NEXT: v_readlane_b32 s77, v1, 42
-; CHECK-NEXT: v_readlane_b32 s76, v1, 41
-; CHECK-NEXT: v_readlane_b32 s75, v1, 40
-; CHECK-NEXT: v_readlane_b32 s74, v1, 39
-; CHECK-NEXT: v_readlane_b32 s73, v1, 38
-; CHECK-NEXT: v_readlane_b32 s72, v1, 37
-; CHECK-NEXT: v_readlane_b32 s71, v1, 36
-; CHECK-NEXT: v_readlane_b32 s70, v1, 35
-; CHECK-NEXT: v_readlane_b32 s69, v1, 34
-; CHECK-NEXT: v_readlane_b32 s68, v1, 33
-; CHECK-NEXT: v_readlane_b32 s67, v1, 32
-; CHECK-NEXT: v_readlane_b32 s66, v1, 31
-; CHECK-NEXT: v_readlane_b32 s65, v1, 30
-; CHECK-NEXT: v_readlane_b32 s64, v1, 29
-; CHECK-NEXT: v_readlane_b32 s63, v1, 28
-; CHECK-NEXT: v_readlane_b32 s62, v1, 27
-; CHECK-NEXT: v_readlane_b32 s61, v1, 26
-; CHECK-NEXT: v_readlane_b32 s60, v1, 25
-; CHECK-NEXT: v_readlane_b32 s59, v1, 24
-; CHECK-NEXT: v_readlane_b32 s58, v1, 23
-; CHECK-NEXT: v_readlane_b32 s57, v1, 22
-; CHECK-NEXT: v_readlane_b32 s56, v1, 21
-; CHECK-NEXT: v_readlane_b32 s55, v1, 20
-; CHECK-NEXT: v_readlane_b32 s54, v1, 19
-; CHECK-NEXT: v_readlane_b32 s53, v1, 18
-; CHECK-NEXT: v_readlane_b32 s52, v1, 17
-; CHECK-NEXT: v_readlane_b32 s51, v1, 16
-; CHECK-NEXT: v_readlane_b32 s50, v1, 15
-; CHECK-NEXT: v_readlane_b32 s49, v1, 14
-; CHECK-NEXT: v_readlane_b32 s48, v1, 13
-; CHECK-NEXT: v_readlane_b32 s47, v1, 12
-; CHECK-NEXT: v_readlane_b32 s46, v1, 11
-; CHECK-NEXT: v_readlane_b32 s45, v1, 10
-; CHECK-NEXT: v_readlane_b32 s44, v1, 9
-; CHECK-NEXT: v_readlane_b32 s43, v1, 8
-; CHECK-NEXT: v_readlane_b32 s42, v1, 7
-; CHECK-NEXT: v_readlane_b32 s41, v1, 6
-; CHECK-NEXT: v_readlane_b32 s40, v1, 5
-; CHECK-NEXT: v_readlane_b32 s39, v1, 4
-; CHECK-NEXT: v_readlane_b32 s38, v1, 3
+; CHECK-NEXT: v_readlane_b32 s102, v1, 35
+; CHECK-NEXT: v_readlane_b32 s101, v1, 34
+; CHECK-NEXT: v_readlane_b32 s100, v1, 33
+; CHECK-NEXT: v_readlane_b32 s99, v1, 32
+; CHECK-NEXT: v_readlane_b32 s98, v1, 31
+; CHECK-NEXT: v_readlane_b32 s97, v1, 30
+; CHECK-NEXT: v_readlane_b32 s96, v1, 29
+; CHECK-NEXT: v_readlane_b32 s95, v1, 28
+; CHECK-NEXT: v_readlane_b32 s94, v1, 27
+; CHECK-NEXT: v_readlane_b32 s85, v1, 26
+; CHECK-NEXT: v_readlane_b32 s84, v1, 25
+; CHECK-NEXT: v_readlane_b32 s83, v1, 24
+; CHECK-NEXT: v_readlane_b32 s82, v1, 23
+; CHECK-NEXT: v_readlane_b32 s81, v1, 22
+; CHECK-NEXT: v_readlane_b32 s80, v1, 21
+; CHECK-NEXT: v_readlane_b32 s79, v1, 20
+; CHECK-NEXT: v_readlane_b32 s78, v1, 19
+; CHECK-NEXT: v_readlane_b32 s69, v1, 18
+; CHECK-NEXT: v_readlane_b32 s68, v1, 17
+; CHECK-NEXT: v_readlane_b32 s67, v1, 16
+; CHECK-NEXT: v_readlane_b32 s66, v1, 15
+; CHECK-NEXT: v_readlane_b32 s65, v1, 14
+; CHECK-NEXT: v_readlane_b32 s64, v1, 13
+; CHECK-NEXT: v_readlane_b32 s63, v1, 12
+; CHECK-NEXT: v_readlane_b32 s62, v1, 11
+; CHECK-NEXT: v_readlane_b32 s53, v1, 10
+; CHECK-NEXT: v_readlane_b32 s52, v1, 9
+; CHECK-NEXT: v_readlane_b32 s51, v1, 8
+; CHECK-NEXT: v_readlane_b32 s50, v1, 7
+; CHECK-NEXT: v_readlane_b32 s49, v1, 6
+; CHECK-NEXT: v_readlane_b32 s48, v1, 5
+; CHECK-NEXT: v_readlane_b32 s47, v1, 4
+; CHECK-NEXT: v_readlane_b32 s46, v1, 3
; CHECK-NEXT: v_readlane_b32 s37, v1, 2
; CHECK-NEXT: v_readlane_b32 s36, v1, 1
; CHECK-NEXT: v_readlane_b32 s35, v1, 0
; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1
; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
-; CHECK-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
; CHECK-NEXT: s_mov_b64 exec, s[4:5]
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir b/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir
index 8f53ec2f992da..359152e9d2b45 100644
--- a/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir
+++ b/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir
@@ -17,70 +17,78 @@ body: |
; RA-NEXT: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; RA-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sgpr_1024 = S_MOV_B32 -1
; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub0:sgpr_1024 = S_MOV_B32 -1
+ ; RA-NEXT: SI_SPILL_S1024_SAVE [[S_MOV_B32_]], %stack.0, implicit $exec, implicit $sgpr32 :: (store (s1024) into %stack.0, align 4, addrspace 5)
; RA-NEXT: undef [[S_MOV_B32_1:%[0-9]+]].sub0:sgpr_1024 = S_MOV_B32 0
+ ; RA-NEXT: SI_SPILL_S1024_SAVE [[S_MOV_B32_1]], %stack.1, implicit $exec, implicit $sgpr32 :: (store (s1024) into %stack.1, align 4, addrspace 5)
; RA-NEXT: {{ $}}
; RA-NEXT: bb.1:
; RA-NEXT: successors: %bb.2(0x80000000)
; RA-NEXT: {{ $}}
- ; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub2:sgpr_1024 = COPY [[S_MOV_B32_]].sub0
- ; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub3:sgpr_1024 = COPY [[S_MOV_B32_]].sub1
- ; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub4:sgpr_1024 = COPY [[S_MOV_B32_]].sub0
- ; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub5:sgpr_1024 = COPY [[S_MOV_B32_]].sub1
- ; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub6:sgpr_1024 = COPY [[S_MOV_B32_]].sub0
- ; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub7:sgpr_1024 = COPY [[S_MOV_B32_]].sub1
- ; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub8:sgpr_1024 = COPY [[S_MOV_B32_]].sub0
- ; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub9:sgpr_1024 = COPY [[S_MOV_B32_]].sub1
- ; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub10:sgpr_1024 = COPY [[S_MOV_B32_]].sub0
- ; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub11:sgpr_1024 = COPY [[S_MOV_B32_]].sub1
- ; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub12:sgpr_1024 = COPY [[S_MOV_B32_]].sub0
- ; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub13:sgpr_1024 = COPY [[S_MOV_B32_]].sub1
- ; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub14:sgpr_1024 = COPY [[S_MOV_B32_]].sub0
- ; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub15:sgpr_1024 = COPY [[S_MOV_B32_]].sub1
- ; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub16:sgpr_1024 = COPY [[S_MOV_B32_]].sub0
- ; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub17:sgpr_1024 = COPY [[S_MOV_B32_]].sub1
- ; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub18:sgpr_1024 = COPY [[S_MOV_B32_]].sub0
- ; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub19:sgpr_1024 = COPY [[S_MOV_B32_]].sub1
- ; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub20:sgpr_1024 = COPY [[S_MOV_B32_]].sub0
- ; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub21:sgpr_1024 = COPY [[S_MOV_B32_]].sub1
- ; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub22:sgpr_1024 = COPY [[S_MOV_B32_]].sub0
- ; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub23:sgpr_1024 = COPY [[S_MOV_B32_]].sub1
- ; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub24:sgpr_1024 = COPY [[S_MOV_B32_]].sub0
- ; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub25:sgpr_1024 = COPY [[S_MOV_B32_]].sub1
- ; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub26:sgpr_1024 = COPY [[S_MOV_B32_]].sub0
- ; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub27:sgpr_1024 = COPY [[S_MOV_B32_]].sub1
- ; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub28:sgpr_1024 = COPY [[S_MOV_B32_]].sub0
- ; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub29:sgpr_1024 = COPY [[S_MOV_B32_]].sub1
- ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub1:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0
- ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub2:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0
- ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub3:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0
- ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub4:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0
- ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub5:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0
- ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub6:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0
- ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub7:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0
- ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub8:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0
- ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub9:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0
- ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub10:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0
- ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub11:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0
- ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub12:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0
- ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub13:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0
- ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub14:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0
- ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub15:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0
- ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub16:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0
- ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub17:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0
- ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub18:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0
- ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub19:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0
- ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub20:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0
- ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub21:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0
- ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub22:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0
- ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub23:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0
- ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub24:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0
- ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub25:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0
- ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub26:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0
- ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub27:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0
- ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub28:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0
- ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub29:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0
- ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub30:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0
- ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub31:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0
+ ; RA-NEXT: [[SI_SPILL_S1024_RESTORE:%[0-9]+]]:sgpr_1024 = SI_SPILL_S1024_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.0, align 4, addrspace 5)
+ ; RA-NEXT: undef [[COPY:%[0-9]+]].sub0_sub1:sgpr_1024 = COPY [[SI_SPILL_S1024_RESTORE]].sub0_sub1
+ ; RA-NEXT: [[COPY:%[0-9]+]].sub2:sgpr_1024 = COPY [[COPY]].sub0
+ ; RA-NEXT: [[COPY:%[0-9]+]].sub3:sgpr_1024 = COPY [[COPY]].sub1
+ ; RA-NEXT: [[COPY:%[0-9]+]].sub4:sgpr_1024 = COPY [[COPY]].sub0
+ ; RA-NEXT: [[COPY:%[0-9]+]].sub5:sgpr_1024 = COPY [[COPY]].sub1
+ ; RA-NEXT: [[COPY:%[0-9]+]].sub6:sgpr_1024 = COPY [[COPY]].sub0
+ ; RA-NEXT: [[COPY:%[0-9]+]].sub7:sgpr_1024 = COPY [[COPY]].sub1
+ ; RA-NEXT: [[COPY:%[0-9]+]].sub8:sgpr_1024 = COPY [[COPY]].sub0
+ ; RA-NEXT: [[COPY:%[0-9]+]].sub9:sgpr_1024 = COPY [[COPY]].sub1
+ ; RA-NEXT: [[COPY:%[0-9]+]].sub10:sgpr_1024 = COPY [[COPY]].sub0
+ ; RA-NEXT: [[COPY:%[0-9]+]].sub11:sgpr_1024 = COPY [[COPY]].sub1
+ ; RA-NEXT: [[COPY:%[0-9]+]].sub12:sgpr_1024 = COPY [[COPY]].sub0
+ ; RA-NEXT: [[COPY:%[0-9]+]].sub13:sgpr_1024 = COPY [[COPY]].sub1
+ ; RA-NEXT: [[COPY:%[0-9]+]].sub14:sgpr_1024 = COPY [[COPY]].sub0
+ ; RA-NEXT: [[COPY:%[0-9]+]].sub15:sgpr_1024 = COPY [[COPY]].sub1
+ ; RA-NEXT: [[COPY:%[0-9]+]].sub16:sgpr_1024 = COPY [[COPY]].sub0
+ ; RA-NEXT: [[COPY:%[0-9]+]].sub17:sgpr_1024 = COPY [[COPY]].sub1
+ ; RA-NEXT: [[COPY:%[0-9]+]].sub18:sgpr_1024 = COPY [[COPY]].sub0
+ ; RA-NEXT: [[COPY:%[0-9]+]].sub19:sgpr_1024 = COPY [[COPY]].sub1
+ ; RA-NEXT: [[COPY:%[0-9]+]].sub20:sgpr_1024 = COPY [[COPY]].sub0
+ ; RA-NEXT: [[COPY:%[0-9]+]].sub21:sgpr_1024 = COPY [[COPY]].sub1
+ ; RA-NEXT: [[COPY:%[0-9]+]].sub22:sgpr_1024 = COPY [[COPY]].sub0
+ ; RA-NEXT: [[COPY:%[0-9]+]].sub23:sgpr_1024 = COPY [[COPY]].sub1
+ ; RA-NEXT: [[COPY:%[0-9]+]].sub24:sgpr_1024 = COPY [[COPY]].sub0
+ ; RA-NEXT: [[COPY:%[0-9]+]].sub25:sgpr_1024 = COPY [[COPY]].sub1
+ ; RA-NEXT: [[COPY:%[0-9]+]].sub26:sgpr_1024 = COPY [[COPY]].sub0
+ ; RA-NEXT: [[COPY:%[0-9]+]].sub27:sgpr_1024 = COPY [[COPY]].sub1
+ ; RA-NEXT: [[COPY:%[0-9]+]].sub28:sgpr_1024 = COPY [[COPY]].sub0
+ ; RA-NEXT: [[COPY:%[0-9]+]].sub29:sgpr_1024 = COPY [[COPY]].sub1
+ ; RA-NEXT: SI_SPILL_S1024_SAVE [[COPY]], %stack.0, implicit $exec, implicit $sgpr32 :: (store (s1024) into %stack.0, align 4, addrspace 5)
+ ; RA-NEXT: [[SI_SPILL_S1024_RESTORE1:%[0-9]+]]:sgpr_1024 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
+ ; RA-NEXT: undef [[COPY1:%[0-9]+]].sub0:sgpr_1024 = COPY [[SI_SPILL_S1024_RESTORE1]].sub0
+ ; RA-NEXT: [[COPY1:%[0-9]+]].sub1:sgpr_1024 = COPY [[COPY1]].sub0
+ ; RA-NEXT: [[COPY1:%[0-9]+]].sub2:sgpr_1024 = COPY [[COPY1]].sub0
+ ; RA-NEXT: [[COPY1:%[0-9]+]].sub3:sgpr_1024 = COPY [[COPY1]].sub0
+ ; RA-NEXT: [[COPY1:%[0-9]+]].sub4:sgpr_1024 = COPY [[COPY1]].sub0
+ ; RA-NEXT: [[COPY1:%[0-9]+]].sub5:sgpr_1024 = COPY [[COPY1]].sub0
+ ; RA-NEXT: [[COPY1:%[0-9]+]].sub6:sgpr_1024 = COPY [[COPY1]].sub0
+ ; RA-NEXT: [[COPY1:%[0-9]+]].sub7:sgpr_1024 = COPY [[COPY1]].sub0
+ ; RA-NEXT: [[COPY1:%[0-9]+]].sub8:sgpr_1024 = COPY [[COPY1]].sub0
+ ; RA-NEXT: [[COPY1:%[0-9]+]].sub9:sgpr_1024 = COPY [[COPY1]].sub0
+ ; RA-NEXT: [[COPY1:%[0-9]+]].sub10:sgpr_1024 = COPY [[COPY1]].sub0
+ ; RA-NEXT: [[COPY1:%[0-9]+]].sub11:sgpr_1024 = COPY [[COPY1]].sub0
+ ; RA-NEXT: [[COPY1:%[0-9]+]].sub12:sgpr_1024 = COPY [[COPY1]].sub0
+ ; RA-NEXT: [[COPY1:%[0-9]+]].sub13:sgpr_1024 = COPY [[COPY1]].sub0
+ ; RA-NEXT: [[COPY1:%[0-9]+]].sub14:sgpr_1024 = COPY [[COPY1]].sub0
+ ; RA-NEXT: [[COPY1:%[0-9]+]].sub15:sgpr_1024 = COPY [[COPY1]].sub0
+ ; RA-NEXT: [[COPY1:%[0-9]+]].sub16:sgpr_1024 = COPY [[COPY1]].sub0
+ ; RA-NEXT: [[COPY1:%[0-9]+]].sub17:sgpr_1024 = COPY [[COPY1]].sub0
+ ; RA-NEXT: [[COPY1:%[0-9]+]].sub18:sgpr_1024 = COPY [[COPY1]].sub0
+ ; RA-NEXT: [[COPY1:%[0-9]+]].sub19:sgpr_1024 = COPY [[COPY1]].sub0
+ ; RA-NEXT: [[COPY1:%[0-9]+]].sub20:sgpr_1024 = COPY [[COPY1]].sub0
+ ; RA-NEXT: [[COPY1:%[0-9]+]].sub21:sgpr_1024 = COPY [[COPY1]].sub0
+ ; RA-NEXT: [[COPY1:%[0-9]+]].sub22:sgpr_1024 = COPY [[COPY1]].sub0
+ ; RA-NEXT: [[COPY1:%[0-9]+]].sub23:sgpr_1024 = COPY [[COPY1]].sub0
+ ; RA-NEXT: [[COPY1:%[0-9]+]].sub24:sgpr_1024 = COPY [[COPY1]].sub0
+ ; RA-NEXT: [[COPY1:%[0-9]+]].sub25:sgpr_1024 = COPY [[COPY1]].sub0
+ ; RA-NEXT: [[COPY1:%[0-9]+]].sub26:sgpr_1024 = COPY [[COPY1]].sub0
+ ; RA-NEXT: [[COPY1:%[0-9]+]].sub27:sgpr_1024 = COPY [[COPY1]].sub0
+ ; RA-NEXT: [[COPY1:%[0-9]+]].sub28:sgpr_1024 = COPY [[COPY1]].sub0
+ ; RA-NEXT: [[COPY1:%[0-9]+]].sub29:sgpr_1024 = COPY [[COPY1]].sub0
+ ; RA-NEXT: [[COPY1:%[0-9]+]].sub30:sgpr_1024 = COPY [[COPY1]].sub0
+ ; RA-NEXT: [[COPY1:%[0-9]+]].sub31:sgpr_1024 = COPY [[COPY1]].sub0
+ ; RA-NEXT: SI_SPILL_S1024_SAVE [[COPY1]], %stack.1, implicit $exec, implicit $sgpr32 :: (store (s1024) into %stack.1, align 4, addrspace 5)
; RA-NEXT: {{ $}}
; RA-NEXT: bb.2:
; RA-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
@@ -95,14 +103,17 @@ body: |
; VR-NEXT: {{ $}}
; VR-NEXT: renamable $sgpr37 = S_MOV_B32 -1
; VR-NEXT: renamable $sgpr36 = S_MOV_B32 -1
- ; VR-NEXT: renamable $sgpr68 = S_MOV_B32 0
+ ; VR-NEXT: SI_SPILL_S1024_SAVE renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s1024) into %stack.0, align 4, addrspace 5)
+ ; VR-NEXT: renamable $sgpr36 = S_MOV_B32 0
; VR-NEXT: renamable $sgpr30_sgpr31 = IMPLICIT_DEF
; VR-NEXT: renamable $sgpr34_sgpr35 = IMPLICIT_DEF
+ ; VR-NEXT: SI_SPILL_S1024_SAVE killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67, %stack.1, implicit $exec, implicit $sgpr32 :: (store (s1024) into %stack.1, align 4, addrspace 5)
; VR-NEXT: {{ $}}
; VR-NEXT: bb.1:
; VR-NEXT: successors: %bb.2(0x80000000)
- ; VR-NEXT: liveins: $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67:0x000000000000000F, $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99:0x0000000000000003
+ ; VR-NEXT: liveins: $sgpr30_sgpr31, $sgpr34_sgpr35
; VR-NEXT: {{ $}}
+ ; VR-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 = SI_SPILL_S1024_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.0, align 4, addrspace 5)
; VR-NEXT: renamable $sgpr38 = COPY renamable $sgpr36
; VR-NEXT: renamable $sgpr39 = COPY renamable $sgpr37
; VR-NEXT: renamable $sgpr40 = COPY renamable $sgpr36
@@ -131,41 +142,44 @@ body: |
; VR-NEXT: renamable $sgpr63 = COPY renamable $sgpr37
; VR-NEXT: renamable $sgpr64 = COPY renamable $sgpr36
; VR-NEXT: renamable $sgpr65 = COPY renamable $sgpr37
- ; VR-NEXT: renamable $sgpr69 = COPY renamable $sgpr68
- ; VR-NEXT: renamable $sgpr70 = COPY renamable $sgpr68
- ; VR-NEXT: renamable $sgpr71 = COPY renamable $sgpr68
- ; VR-NEXT: renamable $sgpr72 = COPY renamable $sgpr68
- ; VR-NEXT: renamable $sgpr73 = COPY renamable $sgpr68
- ; VR-NEXT: renamable $sgpr74 = COPY renamable $sgpr68
- ; VR-NEXT: renamable $sgpr75 = COPY renamable $sgpr68
- ; VR-NEXT: renamable $sgpr76 = COPY renamable $sgpr68
- ; VR-NEXT: renamable $sgpr77 = COPY renamable $sgpr68
- ; VR-NEXT: renamable $sgpr78 = COPY renamable $sgpr68
- ; VR-NEXT: renamable $sgpr79 = COPY renamable $sgpr68
- ; VR-NEXT: renamable $sgpr80 = COPY renamable $sgpr68
- ; VR-NEXT: renamable $sgpr81 = COPY renamable $sgpr68
- ; VR-NEXT: renamable $sgpr82 = COPY renamable $sgpr68
- ; VR-NEXT: renamable $sgpr83 = COPY renamable $sgpr68
- ; VR-NEXT: renamable $sgpr84 = COPY renamable $sgpr68
- ; VR-NEXT: renamable $sgpr85 = COPY renamable $sgpr68
- ; VR-NEXT: renamable $sgpr86 = COPY renamable $sgpr68
- ; VR-NEXT: renamable $sgpr87 = COPY renamable $sgpr68
- ; VR-NEXT: renamable $sgpr88 = COPY renamable $sgpr68
- ; VR-NEXT: renamable $sgpr89 = COPY renamable $sgpr68
- ; VR-NEXT: renamable $sgpr90 = COPY renamable $sgpr68
- ; VR-NEXT: renamable $sgpr91 = COPY renamable $sgpr68
- ; VR-NEXT: renamable $sgpr92 = COPY renamable $sgpr68
- ; VR-NEXT: renamable $sgpr93 = COPY renamable $sgpr68
- ; VR-NEXT: renamable $sgpr94 = COPY renamable $sgpr68
- ; VR-NEXT: renamable $sgpr95 = COPY renamable $sgpr68
- ; VR-NEXT: renamable $sgpr96 = COPY renamable $sgpr68
- ; VR-NEXT: renamable $sgpr97 = COPY renamable $sgpr68
- ; VR-NEXT: renamable $sgpr98 = COPY renamable $sgpr68
- ; VR-NEXT: renamable $sgpr99 = COPY renamable $sgpr68
+ ; VR-NEXT: SI_SPILL_S1024_SAVE killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s1024) into %stack.0, align 4, addrspace 5)
+ ; VR-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
+ ; VR-NEXT: renamable $sgpr37 = COPY renamable $sgpr36
+ ; VR-NEXT: renamable $sgpr38 = COPY renamable $sgpr36
+ ; VR-NEXT: renamable $sgpr39 = COPY renamable $sgpr36
+ ; VR-NEXT: renamable $sgpr40 = COPY renamable $sgpr36
+ ; VR-NEXT: renamable $sgpr41 = COPY renamable $sgpr36
+ ; VR-NEXT: renamable $sgpr42 = COPY renamable $sgpr36
+ ; VR-NEXT: renamable $sgpr43 = COPY renamable $sgpr36
+ ; VR-NEXT: renamable $sgpr44 = COPY renamable $sgpr36
+ ; VR-NEXT: renamable $sgpr45 = COPY renamable $sgpr36
+ ; VR-NEXT: renamable $sgpr46 = COPY renamable $sgpr36
+ ; VR-NEXT: renamable $sgpr47 = COPY renamable $sgpr36
+ ; VR-NEXT: renamable $sgpr48 = COPY renamable $sgpr36
+ ; VR-NEXT: renamable $sgpr49 = COPY renamable $sgpr36
+ ; VR-NEXT: renamable $sgpr50 = COPY renamable $sgpr36
+ ; VR-NEXT: renamable $sgpr51 = COPY renamable $sgpr36
+ ; VR-NEXT: renamable $sgpr52 = COPY renamable $sgpr36
+ ; VR-NEXT: renamable $sgpr53 = COPY renamable $sgpr36
+ ; VR-NEXT: renamable $sgpr54 = COPY renamable $sgpr36
+ ; VR-NEXT: renamable $sgpr55 = COPY renamable $sgpr36
+ ; VR-NEXT: renamable $sgpr56 = COPY renamable $sgpr36
+ ; VR-NEXT: renamable $sgpr57 = COPY renamable $sgpr36
+ ; VR-NEXT: renamable $sgpr58 = COPY renamable $sgpr36
+ ; VR-NEXT: renamable $sgpr59 = COPY renamable $sgpr36
+ ; VR-NEXT: renamable $sgpr60 = COPY renamable $sgpr36
+ ; VR-NEXT: renamable $sgpr61 = COPY renamable $sgpr36
+ ; VR-NEXT: renamable $sgpr62 = COPY renamable $sgpr36
+ ; VR-NEXT: renamable $sgpr63 = COPY renamable $sgpr36
+ ; VR-NEXT: renamable $sgpr64 = COPY renamable $sgpr36
+ ; VR-NEXT: renamable $sgpr65 = COPY renamable $sgpr36
+ ; VR-NEXT: renamable $sgpr66 = COPY renamable $sgpr36
+ ; VR-NEXT: renamable $sgpr67 = COPY renamable $sgpr36
+ ; VR-NEXT: SI_SPILL_S1024_SAVE killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67, %stack.1, implicit $exec, implicit $sgpr32 :: (store (s1024) into %stack.1, align 4, addrspace 5)
; VR-NEXT: {{ $}}
; VR-NEXT: bb.2:
; VR-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
- ; VR-NEXT: liveins: $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67:0x000000000000000F, $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99:0x0000000000000003
+ ; VR-NEXT: liveins: $sgpr30_sgpr31, $sgpr34_sgpr35
; VR-NEXT: {{ $}}
; VR-NEXT: S_NOP 0, csr_amdgpu, implicit renamable $sgpr30_sgpr31, implicit renamable $sgpr34_sgpr35
; VR-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc
diff --git a/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll b/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll
index d4d3b37a0ed1e..89bb346ee98df 100644
--- a/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll
+++ b/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll
@@ -11,12 +11,12 @@ define amdgpu_kernel void @kernel_background_evaluate(ptr addrspace(5) %kg, ptr
; MUBUF-LABEL: kernel_background_evaluate:
; MUBUF: ; %bb.0: ; %entry
; MUBUF-NEXT: s_load_dword s0, s[4:5], 0x24
-; MUBUF-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
-; MUBUF-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
-; MUBUF-NEXT: s_mov_b32 s38, -1
-; MUBUF-NEXT: s_mov_b32 s39, 0x31c16000
-; MUBUF-NEXT: s_add_u32 s36, s36, s11
-; MUBUF-NEXT: s_addc_u32 s37, s37, 0
+; MUBUF-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; MUBUF-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; MUBUF-NEXT: s_mov_b32 s50, -1
+; MUBUF-NEXT: s_mov_b32 s51, 0x31c16000
+; MUBUF-NEXT: s_add_u32 s48, s48, s11
+; MUBUF-NEXT: s_addc_u32 s49, s49, 0
; MUBUF-NEXT: v_mov_b32_e32 v1, 0x2000
; MUBUF-NEXT: v_mov_b32_e32 v2, 0x4000
; MUBUF-NEXT: v_mov_b32_e32 v3, 0
@@ -27,8 +27,8 @@ define amdgpu_kernel void @kernel_background_evaluate(ptr addrspace(5) %kg, ptr
; MUBUF-NEXT: s_mov_b32 s32, 0xc0000
; MUBUF-NEXT: s_waitcnt lgkmcnt(0)
; MUBUF-NEXT: v_mov_b32_e32 v0, s0
-; MUBUF-NEXT: s_mov_b64 s[0:1], s[36:37]
-; MUBUF-NEXT: s_mov_b64 s[2:3], s[38:39]
+; MUBUF-NEXT: s_mov_b64 s[0:1], s[48:49]
+; MUBUF-NEXT: s_mov_b64 s[2:3], s[50:51]
; MUBUF-NEXT: s_swappc_b64 s[30:31], s[4:5]
; MUBUF-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
; MUBUF-NEXT: s_and_saveexec_b32 s0, vcc_lo
@@ -37,12 +37,12 @@ define amdgpu_kernel void @kernel_background_evaluate(ptr addrspace(5) %kg, ptr
; MUBUF-NEXT: v_mov_b32_e32 v0, 0x4004
; MUBUF-NEXT: s_mov_b32 s0, 0x41c64e6d
; MUBUF-NEXT: s_clause 0x1
-; MUBUF-NEXT: buffer_load_dword v1, v0, s[36:39], 0 offen
-; MUBUF-NEXT: buffer_load_dword v2, v0, s[36:39], 0 offen offset:4
+; MUBUF-NEXT: buffer_load_dword v1, v0, s[48:51], 0 offen
+; MUBUF-NEXT: buffer_load_dword v2, v0, s[48:51], 0 offen offset:4
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: v_add_nc_u32_e32 v0, v2, v1
; MUBUF-NEXT: v_mad_u64_u32 v[0:1], s0, v0, s0, 0x3039
-; MUBUF-NEXT: buffer_store_dword v0, v0, s[36:39], 0 offen
+; MUBUF-NEXT: buffer_store_dword v0, v0, s[48:51], 0 offen
; MUBUF-NEXT: .LBB0_2: ; %shader_eval_surface.exit
; MUBUF-NEXT: s_endpgm
;
diff --git a/llvm/test/CodeGen/AMDGPU/stack-realign.ll b/llvm/test/CodeGen/AMDGPU/stack-realign.ll
index 0e568e3071e99..3a078a64aa28e 100644
--- a/llvm/test/CodeGen/AMDGPU/stack-realign.ll
+++ b/llvm/test/CodeGen/AMDGPU/stack-realign.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --function no_free_scratch_sgpr_for_bp_copy --version 5
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; Check that we properly realign the stack. While 4-byte access is all
@@ -415,28 +416,21 @@ define void @no_free_scratch_sgpr_for_bp_copy(<32 x i32> %a, i32 %b) #0 {
; GCN-LABEL: no_free_scratch_sgpr_for_bp_copy:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: s_mov_b32 vcc_lo, s33
-; GCN-NEXT: s_add_i32 s33, s32, 0x1fc0
-; GCN-NEXT: s_and_b32 s33, s33, 0xffffe000
-; GCN-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill
-; GCN-NEXT: s_mov_b64 exec, s[4:5]
-; GCN-NEXT: v_writelane_b32 v1, s34, 0
+; GCN-NEXT: s_mov_b32 s39, s34
; GCN-NEXT: s_mov_b32 s34, s32
; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s34 offset:4
+; GCN-NEXT: s_mov_b32 s38, s33
+; GCN-NEXT: s_add_i32 s33, s32, 0x1fc0
+; GCN-NEXT: s_and_b32 s33, s33, 0xffffe000
; GCN-NEXT: s_addk_i32 s32, 0x6000
; GCN-NEXT: s_mov_b32 s32, s34
-; GCN-NEXT: v_readlane_b32 s34, v1, 0
+; GCN-NEXT: s_mov_b32 s34, s39
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:128
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload
-; GCN-NEXT: s_mov_b64 exec, s[4:5]
-; GCN-NEXT: s_mov_b32 s33, vcc_lo
-; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_mov_b32 s33, s38
; GCN-NEXT: s_setpc_b64 s[30:31]
%local_val = alloca i32, align 128, addrspace(5)
store volatile i32 %b, ptr addrspace(5) %local_val, align 128
diff --git a/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll b/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll
index dd78c2f46dde8..f7300c921a745 100644
--- a/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll
+++ b/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll
@@ -32,15 +32,15 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS1-LABEL: kernel:
; GLOBALNESS1: ; %bb.0: ; %bb
; GLOBALNESS1-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GLOBALNESS1-NEXT: s_load_dwordx4 s[76:79], s[8:9], 0x0
+; GLOBALNESS1-NEXT: s_load_dwordx4 s[96:99], s[8:9], 0x0
; GLOBALNESS1-NEXT: s_load_dword s6, s[8:9], 0x14
; GLOBALNESS1-NEXT: v_mov_b32_e32 v41, v0
; GLOBALNESS1-NEXT: v_mov_b32_e32 v42, 0
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[0:1], 0, 0
; GLOBALNESS1-NEXT: global_store_dword v[0:1], v42, off
; GLOBALNESS1-NEXT: s_waitcnt lgkmcnt(0)
-; GLOBALNESS1-NEXT: global_load_dword v2, v42, s[76:77]
-; GLOBALNESS1-NEXT: s_mov_b64 s[40:41], s[4:5]
+; GLOBALNESS1-NEXT: global_load_dword v2, v42, s[96:97]
+; GLOBALNESS1-NEXT: s_mov_b64 s[48:49], s[4:5]
; GLOBALNESS1-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x18
; GLOBALNESS1-NEXT: s_load_dword s7, s[8:9], 0x20
; GLOBALNESS1-NEXT: s_add_u32 flat_scratch_lo, s12, s17
@@ -49,7 +49,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS1-NEXT: v_mov_b32_e32 v0, 0
; GLOBALNESS1-NEXT: s_addc_u32 s1, s1, 0
; GLOBALNESS1-NEXT: v_mov_b32_e32 v1, 0x40994400
-; GLOBALNESS1-NEXT: s_bitcmp1_b32 s78, 0
+; GLOBALNESS1-NEXT: s_bitcmp1_b32 s98, 0
; GLOBALNESS1-NEXT: s_waitcnt lgkmcnt(0)
; GLOBALNESS1-NEXT: v_cmp_ngt_f64_e32 vcc, s[4:5], v[0:1]
; GLOBALNESS1-NEXT: v_cmp_ngt_f64_e64 s[4:5], s[4:5], 0
@@ -59,24 +59,27 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS1-NEXT: s_xor_b64 s[4:5], s[4:5], -1
; GLOBALNESS1-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GLOBALNESS1-NEXT: s_bitcmp1_b32 s6, 0
-; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[42:43], 1, v0
+; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[50:51], 1, v0
; GLOBALNESS1-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
; GLOBALNESS1-NEXT: s_cselect_b64 s[4:5], -1, 0
; GLOBALNESS1-NEXT: s_xor_b64 s[4:5], s[4:5], -1
; GLOBALNESS1-NEXT: s_bitcmp1_b32 s7, 0
-; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[48:49], 1, v0
+; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[64:65], 1, v0
; GLOBALNESS1-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
; GLOBALNESS1-NEXT: s_cselect_b64 s[4:5], -1, 0
; GLOBALNESS1-NEXT: s_xor_b64 s[4:5], s[4:5], -1
-; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[50:51], 1, v0
+; GLOBALNESS1-NEXT: s_mov_b64 s[46:47], s[8:9]
+; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[8:9], 1, v1
+; GLOBALNESS1-NEXT: ; implicit-def: $vgpr56 : SGPR spill to VGPR lane
+; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[66:67], 1, v0
; GLOBALNESS1-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
-; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[52:53], 1, v0
-; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[44:45], 1, v1
-; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[46:47], 1, v3
-; GLOBALNESS1-NEXT: s_mov_b32 s70, s16
-; GLOBALNESS1-NEXT: s_mov_b64 s[38:39], s[8:9]
-; GLOBALNESS1-NEXT: s_mov_b32 s71, s15
-; GLOBALNESS1-NEXT: s_mov_b32 s72, s14
+; GLOBALNESS1-NEXT: v_writelane_b32 v56, s8, 0
+; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[68:69], 1, v0
+; GLOBALNESS1-NEXT: v_writelane_b32 v56, s9, 1
+; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[62:63], 1, v3
+; GLOBALNESS1-NEXT: s_mov_b32 s82, s16
+; GLOBALNESS1-NEXT: s_mov_b32 s83, s15
+; GLOBALNESS1-NEXT: s_mov_b32 s84, s14
; GLOBALNESS1-NEXT: s_mov_b64 s[34:35], s[10:11]
; GLOBALNESS1-NEXT: s_mov_b32 s32, 0
; GLOBALNESS1-NEXT: ; implicit-def: $vgpr44_vgpr45
@@ -86,17 +89,27 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS1-NEXT: v_cmp_gt_i32_e32 vcc, 1, v2
; GLOBALNESS1-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
; GLOBALNESS1-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
+; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[4:5], 1, v0
; GLOBALNESS1-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
+; GLOBALNESS1-NEXT: v_writelane_b32 v56, s4, 2
; GLOBALNESS1-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
+; GLOBALNESS1-NEXT: v_writelane_b32 v56, s5, 3
+; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[4:5], 1, v3
; GLOBALNESS1-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[54:55], 1, v0
-; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[56:57], 1, v1
-; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[58:59], 1, v3
-; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[60:61], 1, v2
+; GLOBALNESS1-NEXT: v_writelane_b32 v56, s4, 4
+; GLOBALNESS1-NEXT: v_writelane_b32 v56, s5, 5
+; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[4:5], 1, v2
+; GLOBALNESS1-NEXT: v_writelane_b32 v56, s4, 6
+; GLOBALNESS1-NEXT: v_writelane_b32 v56, s5, 7
+; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[80:81], 1, v1
+; GLOBALNESS1-NEXT: v_writelane_b32 v56, s62, 8
+; GLOBALNESS1-NEXT: v_writelane_b32 v56, s63, 9
; GLOBALNESS1-NEXT: s_branch .LBB1_4
; GLOBALNESS1-NEXT: .LBB1_1: ; %bb70.i
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
-; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[60:61]
+; GLOBALNESS1-NEXT: v_readlane_b32 s6, v56, 6
+; GLOBALNESS1-NEXT: v_readlane_b32 s7, v56, 7
+; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[6:7]
; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_29
; GLOBALNESS1-NEXT: .LBB1_2: ; %Flow15
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
@@ -114,34 +127,34 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS1-NEXT: v_mov_b32_e32 v0, 0x80
; GLOBALNESS1-NEXT: v_mov_b32_e32 v1, 0
; GLOBALNESS1-NEXT: flat_load_dword v40, v[0:1]
-; GLOBALNESS1-NEXT: s_add_u32 s8, s38, 40
+; GLOBALNESS1-NEXT: s_add_u32 s8, s46, 40
; GLOBALNESS1-NEXT: buffer_store_dword v42, off, s[0:3], 0
; GLOBALNESS1-NEXT: flat_load_dword v46, v[0:1]
-; GLOBALNESS1-NEXT: s_addc_u32 s9, s39, 0
+; GLOBALNESS1-NEXT: s_addc_u32 s9, s47, 0
; GLOBALNESS1-NEXT: s_getpc_b64 s[4:5]
; GLOBALNESS1-NEXT: s_add_u32 s4, s4, wobble at gotpcrel32@lo+4
; GLOBALNESS1-NEXT: s_addc_u32 s5, s5, wobble at gotpcrel32@hi+12
; GLOBALNESS1-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
-; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[48:49]
; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], s[36:37]
; GLOBALNESS1-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GLOBALNESS1-NEXT: s_mov_b32 s12, s72
-; GLOBALNESS1-NEXT: s_mov_b32 s13, s71
-; GLOBALNESS1-NEXT: s_mov_b32 s14, s70
+; GLOBALNESS1-NEXT: s_mov_b32 s12, s84
+; GLOBALNESS1-NEXT: s_mov_b32 s13, s83
+; GLOBALNESS1-NEXT: s_mov_b32 s14, s82
; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v41
; GLOBALNESS1-NEXT: s_waitcnt lgkmcnt(0)
; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[46:47]
+; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[62:63]
; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], -1
; GLOBALNESS1-NEXT: ; implicit-def: $sgpr4_sgpr5
; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_9
; GLOBALNESS1-NEXT: ; %bb.5: ; %NodeBlock
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
-; GLOBALNESS1-NEXT: s_cmp_lt_i32 s79, 1
+; GLOBALNESS1-NEXT: s_cmp_lt_i32 s99, 1
; GLOBALNESS1-NEXT: s_cbranch_scc1 .LBB1_7
; GLOBALNESS1-NEXT: ; %bb.6: ; %LeafBlock12
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
-; GLOBALNESS1-NEXT: s_cmp_lg_u32 s79, 1
+; GLOBALNESS1-NEXT: s_cmp_lg_u32 s99, 1
; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], -1
; GLOBALNESS1-NEXT: s_cselect_b64 s[6:7], -1, 0
; GLOBALNESS1-NEXT: s_cbranch_execz .LBB1_8
@@ -151,7 +164,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS1-NEXT: ; implicit-def: $sgpr4_sgpr5
; GLOBALNESS1-NEXT: .LBB1_8: ; %LeafBlock
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
-; GLOBALNESS1-NEXT: s_cmp_lg_u32 s79, 0
+; GLOBALNESS1-NEXT: s_cmp_lg_u32 s99, 0
; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], 0
; GLOBALNESS1-NEXT: s_cselect_b64 s[6:7], -1, 0
; GLOBALNESS1-NEXT: .LBB1_9: ; %Flow25
@@ -163,15 +176,17 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[2:3], 0, 0
; GLOBALNESS1-NEXT: flat_load_dword v0, v[2:3]
; GLOBALNESS1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GLOBALNESS1-NEXT: v_cmp_gt_i32_e64 s[62:63], 0, v0
+; GLOBALNESS1-NEXT: v_cmp_gt_i32_e64 s[94:95], 0, v0
; GLOBALNESS1-NEXT: v_mov_b32_e32 v0, 0
; GLOBALNESS1-NEXT: v_mov_b32_e32 v1, 0x3ff00000
-; GLOBALNESS1-NEXT: s_and_saveexec_b64 s[74:75], s[62:63]
+; GLOBALNESS1-NEXT: s_and_saveexec_b64 s[52:53], s[94:95]
; GLOBALNESS1-NEXT: s_cbranch_execz .LBB1_26
; GLOBALNESS1-NEXT: ; %bb.11: ; %bb33.i
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS1-NEXT: global_load_dwordx2 v[0:1], v[2:3], off
-; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[54:55]
+; GLOBALNESS1-NEXT: v_readlane_b32 s4, v56, 2
+; GLOBALNESS1-NEXT: v_readlane_b32 s5, v56, 3
+; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[4:5]
; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_13
; GLOBALNESS1-NEXT: ; %bb.12: ; %bb39.i
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
@@ -185,70 +200,72 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS1-NEXT: s_waitcnt vmcnt(0)
; GLOBALNESS1-NEXT: v_cmp_nlt_f64_e32 vcc, 0, v[0:1]
; GLOBALNESS1-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; GLOBALNESS1-NEXT: v_cmp_eq_u32_e64 s[64:65], 0, v2
-; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[66:67], 1, v0
+; GLOBALNESS1-NEXT: v_cmp_eq_u32_e64 s[96:97], 0, v2
+; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[98:99], 1, v0
; GLOBALNESS1-NEXT: s_branch .LBB1_16
; GLOBALNESS1-NEXT: .LBB1_14: ; %Flow16
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_16 Depth=2
; GLOBALNESS1-NEXT: s_or_b64 exec, exec, s[4:5]
; GLOBALNESS1-NEXT: .LBB1_15: ; %bb63.i
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_16 Depth=2
-; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[52:53]
+; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[68:69]
; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_25
; GLOBALNESS1-NEXT: .LBB1_16: ; %bb44.i
; GLOBALNESS1-NEXT: ; Parent Loop BB1_4 Depth=1
; GLOBALNESS1-NEXT: ; => This Inner Loop Header: Depth=2
-; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[48:49]
+; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[64:65]
; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_15
; GLOBALNESS1-NEXT: ; %bb.17: ; %bb46.i
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_16 Depth=2
-; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[50:51]
+; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[66:67]
; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_15
; GLOBALNESS1-NEXT: ; %bb.18: ; %bb50.i
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_16 Depth=2
-; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[42:43]
+; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[50:51]
; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_21
; GLOBALNESS1-NEXT: ; %bb.19: ; %bb3.i.i
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_16 Depth=2
-; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[44:45]
+; GLOBALNESS1-NEXT: v_readlane_b32 s4, v56, 0
+; GLOBALNESS1-NEXT: v_readlane_b32 s5, v56, 1
+; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[4:5]
; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_21
; GLOBALNESS1-NEXT: ; %bb.20: ; %bb6.i.i
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_16 Depth=2
-; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[66:67]
+; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[98:99]
; GLOBALNESS1-NEXT: .LBB1_21: ; %spam.exit.i
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_16 Depth=2
-; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[56:57]
+; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[80:81]
; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_15
; GLOBALNESS1-NEXT: ; %bb.22: ; %bb55.i
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_16 Depth=2
-; GLOBALNESS1-NEXT: s_add_u32 s68, s38, 40
-; GLOBALNESS1-NEXT: s_addc_u32 s69, s39, 0
+; GLOBALNESS1-NEXT: s_add_u32 s78, s46, 40
+; GLOBALNESS1-NEXT: s_addc_u32 s79, s47, 0
; GLOBALNESS1-NEXT: s_getpc_b64 s[4:5]
; GLOBALNESS1-NEXT: s_add_u32 s4, s4, wobble at gotpcrel32@lo+4
; GLOBALNESS1-NEXT: s_addc_u32 s5, s5, wobble at gotpcrel32@hi+12
-; GLOBALNESS1-NEXT: s_load_dwordx2 s[76:77], s[4:5], 0x0
-; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GLOBALNESS1-NEXT: s_load_dwordx2 s[62:63], s[4:5], 0x0
+; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[48:49]
; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], s[36:37]
-; GLOBALNESS1-NEXT: s_mov_b64 s[8:9], s[68:69]
+; GLOBALNESS1-NEXT: s_mov_b64 s[8:9], s[78:79]
; GLOBALNESS1-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GLOBALNESS1-NEXT: s_mov_b32 s12, s72
-; GLOBALNESS1-NEXT: s_mov_b32 s13, s71
-; GLOBALNESS1-NEXT: s_mov_b32 s14, s70
+; GLOBALNESS1-NEXT: s_mov_b32 s12, s84
+; GLOBALNESS1-NEXT: s_mov_b32 s13, s83
+; GLOBALNESS1-NEXT: s_mov_b32 s14, s82
; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v41
; GLOBALNESS1-NEXT: s_waitcnt lgkmcnt(0)
-; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[76:77]
+; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[62:63]
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[46:47], 0, 0
-; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[48:49]
; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], s[36:37]
-; GLOBALNESS1-NEXT: s_mov_b64 s[8:9], s[68:69]
+; GLOBALNESS1-NEXT: s_mov_b64 s[8:9], s[78:79]
; GLOBALNESS1-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GLOBALNESS1-NEXT: s_mov_b32 s12, s72
-; GLOBALNESS1-NEXT: s_mov_b32 s13, s71
-; GLOBALNESS1-NEXT: s_mov_b32 s14, s70
+; GLOBALNESS1-NEXT: s_mov_b32 s12, s84
+; GLOBALNESS1-NEXT: s_mov_b32 s13, s83
+; GLOBALNESS1-NEXT: s_mov_b32 s14, s82
; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v41
; GLOBALNESS1-NEXT: global_store_dwordx2 v[46:47], v[44:45], off
-; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[76:77]
-; GLOBALNESS1-NEXT: s_and_saveexec_b64 s[4:5], s[64:65]
+; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[62:63]
+; GLOBALNESS1-NEXT: s_and_saveexec_b64 s[4:5], s[96:97]
; GLOBALNESS1-NEXT: s_cbranch_execz .LBB1_14
; GLOBALNESS1-NEXT: ; %bb.23: ; %bb62.i
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_16 Depth=2
@@ -261,15 +278,20 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS1-NEXT: s_branch .LBB1_3
; GLOBALNESS1-NEXT: .LBB1_25: ; %Flow23
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
+; GLOBALNESS1-NEXT: s_load_dwordx4 s[96:99], s[46:47], 0x0
+; GLOBALNESS1-NEXT: v_readlane_b32 s62, v56, 8
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[0:1], 0, 0
+; GLOBALNESS1-NEXT: v_readlane_b32 s63, v56, 9
; GLOBALNESS1-NEXT: .LBB1_26: ; %Flow24
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
-; GLOBALNESS1-NEXT: s_or_b64 exec, exec, s[74:75]
-; GLOBALNESS1-NEXT: s_and_saveexec_b64 s[4:5], s[62:63]
+; GLOBALNESS1-NEXT: s_or_b64 exec, exec, s[52:53]
+; GLOBALNESS1-NEXT: s_and_saveexec_b64 s[4:5], s[94:95]
; GLOBALNESS1-NEXT: s_cbranch_execz .LBB1_2
; GLOBALNESS1-NEXT: ; %bb.27: ; %bb67.i
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
-; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[58:59]
+; GLOBALNESS1-NEXT: v_readlane_b32 s6, v56, 4
+; GLOBALNESS1-NEXT: v_readlane_b32 s7, v56, 5
+; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[6:7]
; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_1
; GLOBALNESS1-NEXT: ; %bb.28: ; %bb69.i
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
@@ -288,17 +310,17 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], -1
; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_32
; GLOBALNESS1-NEXT: ; %bb.31: ; %bb7.i.i
-; GLOBALNESS1-NEXT: s_add_u32 s8, s38, 40
-; GLOBALNESS1-NEXT: s_addc_u32 s9, s39, 0
+; GLOBALNESS1-NEXT: s_add_u32 s8, s46, 40
+; GLOBALNESS1-NEXT: s_addc_u32 s9, s47, 0
; GLOBALNESS1-NEXT: s_getpc_b64 s[16:17]
; GLOBALNESS1-NEXT: s_add_u32 s16, s16, widget at rel32@lo+4
; GLOBALNESS1-NEXT: s_addc_u32 s17, s17, widget at rel32@hi+12
-; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[48:49]
; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], s[36:37]
; GLOBALNESS1-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GLOBALNESS1-NEXT: s_mov_b32 s12, s72
-; GLOBALNESS1-NEXT: s_mov_b32 s13, s71
-; GLOBALNESS1-NEXT: s_mov_b32 s14, s70
+; GLOBALNESS1-NEXT: s_mov_b32 s12, s84
+; GLOBALNESS1-NEXT: s_mov_b32 s13, s83
+; GLOBALNESS1-NEXT: s_mov_b32 s14, s82
; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v41
; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], 0
@@ -306,17 +328,17 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[4:5]
; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_34
; GLOBALNESS1-NEXT: ; %bb.33: ; %bb11.i.i
-; GLOBALNESS1-NEXT: s_add_u32 s8, s38, 40
-; GLOBALNESS1-NEXT: s_addc_u32 s9, s39, 0
+; GLOBALNESS1-NEXT: s_add_u32 s8, s46, 40
+; GLOBALNESS1-NEXT: s_addc_u32 s9, s47, 0
; GLOBALNESS1-NEXT: s_getpc_b64 s[16:17]
; GLOBALNESS1-NEXT: s_add_u32 s16, s16, widget at rel32@lo+4
; GLOBALNESS1-NEXT: s_addc_u32 s17, s17, widget at rel32@hi+12
-; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[48:49]
; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], s[36:37]
; GLOBALNESS1-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GLOBALNESS1-NEXT: s_mov_b32 s12, s72
-; GLOBALNESS1-NEXT: s_mov_b32 s13, s71
-; GLOBALNESS1-NEXT: s_mov_b32 s14, s70
+; GLOBALNESS1-NEXT: s_mov_b32 s12, s84
+; GLOBALNESS1-NEXT: s_mov_b32 s13, s83
+; GLOBALNESS1-NEXT: s_mov_b32 s14, s82
; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v41
; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GLOBALNESS1-NEXT: .LBB1_34: ; %UnifiedUnreachableBlock
@@ -324,15 +346,15 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS0-LABEL: kernel:
; GLOBALNESS0: ; %bb.0: ; %bb
; GLOBALNESS0-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GLOBALNESS0-NEXT: s_load_dwordx4 s[72:75], s[8:9], 0x0
+; GLOBALNESS0-NEXT: s_load_dwordx4 s[96:99], s[8:9], 0x0
; GLOBALNESS0-NEXT: s_load_dword s6, s[8:9], 0x14
; GLOBALNESS0-NEXT: v_mov_b32_e32 v41, v0
; GLOBALNESS0-NEXT: v_mov_b32_e32 v42, 0
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[0:1], 0, 0
; GLOBALNESS0-NEXT: global_store_dword v[0:1], v42, off
; GLOBALNESS0-NEXT: s_waitcnt lgkmcnt(0)
-; GLOBALNESS0-NEXT: global_load_dword v2, v42, s[72:73]
-; GLOBALNESS0-NEXT: s_mov_b64 s[40:41], s[4:5]
+; GLOBALNESS0-NEXT: global_load_dword v2, v42, s[96:97]
+; GLOBALNESS0-NEXT: s_mov_b64 s[48:49], s[4:5]
; GLOBALNESS0-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x18
; GLOBALNESS0-NEXT: s_load_dword s7, s[8:9], 0x20
; GLOBALNESS0-NEXT: s_add_u32 flat_scratch_lo, s12, s17
@@ -341,7 +363,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS0-NEXT: v_mov_b32_e32 v0, 0
; GLOBALNESS0-NEXT: s_addc_u32 s1, s1, 0
; GLOBALNESS0-NEXT: v_mov_b32_e32 v1, 0x40994400
-; GLOBALNESS0-NEXT: s_bitcmp1_b32 s74, 0
+; GLOBALNESS0-NEXT: s_bitcmp1_b32 s98, 0
; GLOBALNESS0-NEXT: s_waitcnt lgkmcnt(0)
; GLOBALNESS0-NEXT: v_cmp_ngt_f64_e32 vcc, s[4:5], v[0:1]
; GLOBALNESS0-NEXT: v_cmp_ngt_f64_e64 s[4:5], s[4:5], 0
@@ -351,24 +373,27 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS0-NEXT: s_xor_b64 s[4:5], s[4:5], -1
; GLOBALNESS0-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GLOBALNESS0-NEXT: s_bitcmp1_b32 s6, 0
-; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[42:43], 1, v0
+; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[50:51], 1, v0
; GLOBALNESS0-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
; GLOBALNESS0-NEXT: s_cselect_b64 s[4:5], -1, 0
; GLOBALNESS0-NEXT: s_xor_b64 s[4:5], s[4:5], -1
; GLOBALNESS0-NEXT: s_bitcmp1_b32 s7, 0
-; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[48:49], 1, v0
+; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[64:65], 1, v0
; GLOBALNESS0-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
; GLOBALNESS0-NEXT: s_cselect_b64 s[4:5], -1, 0
; GLOBALNESS0-NEXT: s_xor_b64 s[4:5], s[4:5], -1
-; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[50:51], 1, v0
+; GLOBALNESS0-NEXT: s_mov_b64 s[46:47], s[8:9]
+; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[8:9], 1, v1
+; GLOBALNESS0-NEXT: ; implicit-def: $vgpr56 : SGPR spill to VGPR lane
+; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[66:67], 1, v0
; GLOBALNESS0-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
-; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[52:53], 1, v0
-; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[44:45], 1, v1
-; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[46:47], 1, v3
-; GLOBALNESS0-NEXT: s_mov_b32 s68, s16
-; GLOBALNESS0-NEXT: s_mov_b64 s[38:39], s[8:9]
-; GLOBALNESS0-NEXT: s_mov_b32 s69, s15
-; GLOBALNESS0-NEXT: s_mov_b32 s70, s14
+; GLOBALNESS0-NEXT: v_writelane_b32 v56, s8, 0
+; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[68:69], 1, v0
+; GLOBALNESS0-NEXT: v_writelane_b32 v56, s9, 1
+; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[62:63], 1, v3
+; GLOBALNESS0-NEXT: s_mov_b32 s78, s16
+; GLOBALNESS0-NEXT: s_mov_b32 s79, s15
+; GLOBALNESS0-NEXT: s_mov_b32 s82, s14
; GLOBALNESS0-NEXT: s_mov_b64 s[34:35], s[10:11]
; GLOBALNESS0-NEXT: s_mov_b32 s32, 0
; GLOBALNESS0-NEXT: ; implicit-def: $vgpr44_vgpr45
@@ -378,17 +403,27 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS0-NEXT: v_cmp_gt_i32_e32 vcc, 1, v2
; GLOBALNESS0-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
; GLOBALNESS0-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
+; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[4:5], 1, v0
; GLOBALNESS0-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
+; GLOBALNESS0-NEXT: v_writelane_b32 v56, s4, 2
; GLOBALNESS0-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
+; GLOBALNESS0-NEXT: v_writelane_b32 v56, s5, 3
+; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[4:5], 1, v3
; GLOBALNESS0-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
-; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[54:55], 1, v0
-; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[56:57], 1, v1
-; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[58:59], 1, v3
-; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[60:61], 1, v2
+; GLOBALNESS0-NEXT: v_writelane_b32 v56, s4, 4
+; GLOBALNESS0-NEXT: v_writelane_b32 v56, s5, 5
+; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[4:5], 1, v2
+; GLOBALNESS0-NEXT: v_writelane_b32 v56, s4, 6
+; GLOBALNESS0-NEXT: v_writelane_b32 v56, s5, 7
+; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[80:81], 1, v1
+; GLOBALNESS0-NEXT: v_writelane_b32 v56, s62, 8
+; GLOBALNESS0-NEXT: v_writelane_b32 v56, s63, 9
; GLOBALNESS0-NEXT: s_branch .LBB1_4
; GLOBALNESS0-NEXT: .LBB1_1: ; %bb70.i
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
-; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[60:61]
+; GLOBALNESS0-NEXT: v_readlane_b32 s6, v56, 6
+; GLOBALNESS0-NEXT: v_readlane_b32 s7, v56, 7
+; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[6:7]
; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_29
; GLOBALNESS0-NEXT: .LBB1_2: ; %Flow15
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
@@ -406,34 +441,34 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS0-NEXT: v_mov_b32_e32 v0, 0x80
; GLOBALNESS0-NEXT: v_mov_b32_e32 v1, 0
; GLOBALNESS0-NEXT: flat_load_dword v40, v[0:1]
-; GLOBALNESS0-NEXT: s_add_u32 s8, s38, 40
+; GLOBALNESS0-NEXT: s_add_u32 s8, s46, 40
; GLOBALNESS0-NEXT: buffer_store_dword v42, off, s[0:3], 0
; GLOBALNESS0-NEXT: flat_load_dword v46, v[0:1]
-; GLOBALNESS0-NEXT: s_addc_u32 s9, s39, 0
+; GLOBALNESS0-NEXT: s_addc_u32 s9, s47, 0
; GLOBALNESS0-NEXT: s_getpc_b64 s[4:5]
; GLOBALNESS0-NEXT: s_add_u32 s4, s4, wobble at gotpcrel32@lo+4
; GLOBALNESS0-NEXT: s_addc_u32 s5, s5, wobble at gotpcrel32@hi+12
; GLOBALNESS0-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
-; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[48:49]
; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], s[36:37]
; GLOBALNESS0-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GLOBALNESS0-NEXT: s_mov_b32 s12, s70
-; GLOBALNESS0-NEXT: s_mov_b32 s13, s69
-; GLOBALNESS0-NEXT: s_mov_b32 s14, s68
+; GLOBALNESS0-NEXT: s_mov_b32 s12, s82
+; GLOBALNESS0-NEXT: s_mov_b32 s13, s79
+; GLOBALNESS0-NEXT: s_mov_b32 s14, s78
; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41
; GLOBALNESS0-NEXT: s_waitcnt lgkmcnt(0)
; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[46:47]
+; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[62:63]
; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], -1
; GLOBALNESS0-NEXT: ; implicit-def: $sgpr4_sgpr5
; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_9
; GLOBALNESS0-NEXT: ; %bb.5: ; %NodeBlock
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
-; GLOBALNESS0-NEXT: s_cmp_lt_i32 s75, 1
+; GLOBALNESS0-NEXT: s_cmp_lt_i32 s99, 1
; GLOBALNESS0-NEXT: s_cbranch_scc1 .LBB1_7
; GLOBALNESS0-NEXT: ; %bb.6: ; %LeafBlock12
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
-; GLOBALNESS0-NEXT: s_cmp_lg_u32 s75, 1
+; GLOBALNESS0-NEXT: s_cmp_lg_u32 s99, 1
; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], -1
; GLOBALNESS0-NEXT: s_cselect_b64 s[6:7], -1, 0
; GLOBALNESS0-NEXT: s_cbranch_execz .LBB1_8
@@ -443,7 +478,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS0-NEXT: ; implicit-def: $sgpr4_sgpr5
; GLOBALNESS0-NEXT: .LBB1_8: ; %LeafBlock
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
-; GLOBALNESS0-NEXT: s_cmp_lg_u32 s75, 0
+; GLOBALNESS0-NEXT: s_cmp_lg_u32 s99, 0
; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], 0
; GLOBALNESS0-NEXT: s_cselect_b64 s[6:7], -1, 0
; GLOBALNESS0-NEXT: .LBB1_9: ; %Flow25
@@ -455,15 +490,18 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[2:3], 0, 0
; GLOBALNESS0-NEXT: flat_load_dword v0, v[2:3]
; GLOBALNESS0-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GLOBALNESS0-NEXT: v_cmp_gt_i32_e64 s[62:63], 0, v0
+; GLOBALNESS0-NEXT: v_cmp_gt_i32_e64 s[94:95], 0, v0
; GLOBALNESS0-NEXT: v_mov_b32_e32 v0, 0
; GLOBALNESS0-NEXT: v_mov_b32_e32 v1, 0x3ff00000
-; GLOBALNESS0-NEXT: s_and_saveexec_b64 s[76:77], s[62:63]
+; GLOBALNESS0-NEXT: s_and_saveexec_b64 s[52:53], s[94:95]
; GLOBALNESS0-NEXT: s_cbranch_execz .LBB1_26
; GLOBALNESS0-NEXT: ; %bb.11: ; %bb33.i
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS0-NEXT: global_load_dwordx2 v[0:1], v[2:3], off
-; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[54:55]
+; GLOBALNESS0-NEXT: v_readlane_b32 s4, v56, 2
+; GLOBALNESS0-NEXT: v_readlane_b32 s5, v56, 3
+; GLOBALNESS0-NEXT: s_mov_b32 s83, s99
+; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[4:5]
; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_13
; GLOBALNESS0-NEXT: ; %bb.12: ; %bb39.i
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
@@ -477,70 +515,72 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS0-NEXT: s_waitcnt vmcnt(0)
; GLOBALNESS0-NEXT: v_cmp_nlt_f64_e32 vcc, 0, v[0:1]
; GLOBALNESS0-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; GLOBALNESS0-NEXT: v_cmp_eq_u32_e64 s[64:65], 0, v2
-; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[66:67], 1, v0
+; GLOBALNESS0-NEXT: v_cmp_eq_u32_e64 s[96:97], 0, v2
+; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[98:99], 1, v0
; GLOBALNESS0-NEXT: s_branch .LBB1_16
; GLOBALNESS0-NEXT: .LBB1_14: ; %Flow16
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_16 Depth=2
; GLOBALNESS0-NEXT: s_or_b64 exec, exec, s[4:5]
; GLOBALNESS0-NEXT: .LBB1_15: ; %bb63.i
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_16 Depth=2
-; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[52:53]
+; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[68:69]
; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_25
; GLOBALNESS0-NEXT: .LBB1_16: ; %bb44.i
; GLOBALNESS0-NEXT: ; Parent Loop BB1_4 Depth=1
; GLOBALNESS0-NEXT: ; => This Inner Loop Header: Depth=2
-; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[48:49]
+; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[64:65]
; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_15
; GLOBALNESS0-NEXT: ; %bb.17: ; %bb46.i
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_16 Depth=2
-; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[50:51]
+; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[66:67]
; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_15
; GLOBALNESS0-NEXT: ; %bb.18: ; %bb50.i
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_16 Depth=2
-; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[42:43]
+; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[50:51]
; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_21
; GLOBALNESS0-NEXT: ; %bb.19: ; %bb3.i.i
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_16 Depth=2
-; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[44:45]
+; GLOBALNESS0-NEXT: v_readlane_b32 s4, v56, 0
+; GLOBALNESS0-NEXT: v_readlane_b32 s5, v56, 1
+; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[4:5]
; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_21
; GLOBALNESS0-NEXT: ; %bb.20: ; %bb6.i.i
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_16 Depth=2
-; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[66:67]
+; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[98:99]
; GLOBALNESS0-NEXT: .LBB1_21: ; %spam.exit.i
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_16 Depth=2
-; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[56:57]
+; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[80:81]
; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_15
; GLOBALNESS0-NEXT: ; %bb.22: ; %bb55.i
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_16 Depth=2
-; GLOBALNESS0-NEXT: s_add_u32 s72, s38, 40
-; GLOBALNESS0-NEXT: s_addc_u32 s73, s39, 0
+; GLOBALNESS0-NEXT: s_add_u32 s84, s46, 40
+; GLOBALNESS0-NEXT: s_addc_u32 s85, s47, 0
; GLOBALNESS0-NEXT: s_getpc_b64 s[4:5]
; GLOBALNESS0-NEXT: s_add_u32 s4, s4, wobble at gotpcrel32@lo+4
; GLOBALNESS0-NEXT: s_addc_u32 s5, s5, wobble at gotpcrel32@hi+12
-; GLOBALNESS0-NEXT: s_load_dwordx2 s[78:79], s[4:5], 0x0
-; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GLOBALNESS0-NEXT: s_load_dwordx2 s[62:63], s[4:5], 0x0
+; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[48:49]
; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], s[36:37]
-; GLOBALNESS0-NEXT: s_mov_b64 s[8:9], s[72:73]
+; GLOBALNESS0-NEXT: s_mov_b64 s[8:9], s[84:85]
; GLOBALNESS0-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GLOBALNESS0-NEXT: s_mov_b32 s12, s70
-; GLOBALNESS0-NEXT: s_mov_b32 s13, s69
-; GLOBALNESS0-NEXT: s_mov_b32 s14, s68
+; GLOBALNESS0-NEXT: s_mov_b32 s12, s82
+; GLOBALNESS0-NEXT: s_mov_b32 s13, s79
+; GLOBALNESS0-NEXT: s_mov_b32 s14, s78
; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41
; GLOBALNESS0-NEXT: s_waitcnt lgkmcnt(0)
-; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[78:79]
+; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[62:63]
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[46:47], 0, 0
-; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[48:49]
; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], s[36:37]
-; GLOBALNESS0-NEXT: s_mov_b64 s[8:9], s[72:73]
+; GLOBALNESS0-NEXT: s_mov_b64 s[8:9], s[84:85]
; GLOBALNESS0-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GLOBALNESS0-NEXT: s_mov_b32 s12, s70
-; GLOBALNESS0-NEXT: s_mov_b32 s13, s69
-; GLOBALNESS0-NEXT: s_mov_b32 s14, s68
+; GLOBALNESS0-NEXT: s_mov_b32 s12, s82
+; GLOBALNESS0-NEXT: s_mov_b32 s13, s79
+; GLOBALNESS0-NEXT: s_mov_b32 s14, s78
; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41
; GLOBALNESS0-NEXT: global_store_dwordx2 v[46:47], v[44:45], off
-; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[78:79]
-; GLOBALNESS0-NEXT: s_and_saveexec_b64 s[4:5], s[64:65]
+; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[62:63]
+; GLOBALNESS0-NEXT: s_and_saveexec_b64 s[4:5], s[96:97]
; GLOBALNESS0-NEXT: s_cbranch_execz .LBB1_14
; GLOBALNESS0-NEXT: ; %bb.23: ; %bb62.i
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_16 Depth=2
@@ -553,15 +593,20 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS0-NEXT: s_branch .LBB1_3
; GLOBALNESS0-NEXT: .LBB1_25: ; %Flow23
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
+; GLOBALNESS0-NEXT: v_readlane_b32 s62, v56, 8
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[0:1], 0, 0
+; GLOBALNESS0-NEXT: s_mov_b32 s99, s83
+; GLOBALNESS0-NEXT: v_readlane_b32 s63, v56, 9
; GLOBALNESS0-NEXT: .LBB1_26: ; %Flow24
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
-; GLOBALNESS0-NEXT: s_or_b64 exec, exec, s[76:77]
-; GLOBALNESS0-NEXT: s_and_saveexec_b64 s[4:5], s[62:63]
+; GLOBALNESS0-NEXT: s_or_b64 exec, exec, s[52:53]
+; GLOBALNESS0-NEXT: s_and_saveexec_b64 s[4:5], s[94:95]
; GLOBALNESS0-NEXT: s_cbranch_execz .LBB1_2
; GLOBALNESS0-NEXT: ; %bb.27: ; %bb67.i
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
-; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[58:59]
+; GLOBALNESS0-NEXT: v_readlane_b32 s6, v56, 4
+; GLOBALNESS0-NEXT: v_readlane_b32 s7, v56, 5
+; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[6:7]
; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_1
; GLOBALNESS0-NEXT: ; %bb.28: ; %bb69.i
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
@@ -580,17 +625,17 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], -1
; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_32
; GLOBALNESS0-NEXT: ; %bb.31: ; %bb7.i.i
-; GLOBALNESS0-NEXT: s_add_u32 s8, s38, 40
-; GLOBALNESS0-NEXT: s_addc_u32 s9, s39, 0
+; GLOBALNESS0-NEXT: s_add_u32 s8, s46, 40
+; GLOBALNESS0-NEXT: s_addc_u32 s9, s47, 0
; GLOBALNESS0-NEXT: s_getpc_b64 s[16:17]
; GLOBALNESS0-NEXT: s_add_u32 s16, s16, widget at rel32@lo+4
; GLOBALNESS0-NEXT: s_addc_u32 s17, s17, widget at rel32@hi+12
-; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[48:49]
; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], s[36:37]
; GLOBALNESS0-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GLOBALNESS0-NEXT: s_mov_b32 s12, s70
-; GLOBALNESS0-NEXT: s_mov_b32 s13, s69
-; GLOBALNESS0-NEXT: s_mov_b32 s14, s68
+; GLOBALNESS0-NEXT: s_mov_b32 s12, s82
+; GLOBALNESS0-NEXT: s_mov_b32 s13, s79
+; GLOBALNESS0-NEXT: s_mov_b32 s14, s78
; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41
; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], 0
@@ -598,17 +643,17 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[4:5]
; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_34
; GLOBALNESS0-NEXT: ; %bb.33: ; %bb11.i.i
-; GLOBALNESS0-NEXT: s_add_u32 s8, s38, 40
-; GLOBALNESS0-NEXT: s_addc_u32 s9, s39, 0
+; GLOBALNESS0-NEXT: s_add_u32 s8, s46, 40
+; GLOBALNESS0-NEXT: s_addc_u32 s9, s47, 0
; GLOBALNESS0-NEXT: s_getpc_b64 s[16:17]
; GLOBALNESS0-NEXT: s_add_u32 s16, s16, widget at rel32@lo+4
; GLOBALNESS0-NEXT: s_addc_u32 s17, s17, widget at rel32@hi+12
-; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[48:49]
; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], s[36:37]
; GLOBALNESS0-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GLOBALNESS0-NEXT: s_mov_b32 s12, s70
-; GLOBALNESS0-NEXT: s_mov_b32 s13, s69
-; GLOBALNESS0-NEXT: s_mov_b32 s14, s68
+; GLOBALNESS0-NEXT: s_mov_b32 s12, s82
+; GLOBALNESS0-NEXT: s_mov_b32 s13, s79
+; GLOBALNESS0-NEXT: s_mov_b32 s14, s78
; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41
; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GLOBALNESS0-NEXT: .LBB1_34: ; %UnifiedUnreachableBlock
diff --git a/llvm/test/CodeGen/AMDGPU/unallocatable-bundle-regression.mir b/llvm/test/CodeGen/AMDGPU/unallocatable-bundle-regression.mir
index b80c478c3761f..edb1f74d738f5 100644
--- a/llvm/test/CodeGen/AMDGPU/unallocatable-bundle-regression.mir
+++ b/llvm/test/CodeGen/AMDGPU/unallocatable-bundle-regression.mir
@@ -27,11 +27,11 @@ body: |
; CHECK-NEXT: renamable $sgpr4 = COPY $sgpr0
; CHECK-NEXT: SI_SPILL_S128_SAVE $sgpr0_sgpr1_sgpr2_sgpr3, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s128) into %stack.0, align 4, addrspace 5)
; CHECK-NEXT: renamable $sgpr5 = S_MOV_B32 0
- ; CHECK-NEXT: renamable $sgpr76 = COPY renamable $sgpr5
- ; CHECK-NEXT: renamable $sgpr77 = COPY renamable $sgpr5
- ; CHECK-NEXT: renamable $sgpr78 = COPY renamable $sgpr5
+ ; CHECK-NEXT: renamable $sgpr88 = COPY renamable $sgpr5
+ ; CHECK-NEXT: renamable $sgpr89 = COPY renamable $sgpr5
+ ; CHECK-NEXT: renamable $sgpr90 = COPY renamable $sgpr5
; CHECK-NEXT: renamable $sgpr0 = S_MOV_B32 1056964608
- ; CHECK-NEXT: renamable $sgpr79 = COPY renamable $sgpr5
+ ; CHECK-NEXT: renamable $sgpr91 = COPY renamable $sgpr5
; CHECK-NEXT: renamable $sgpr1 = COPY renamable $sgpr0
; CHECK-NEXT: renamable $sgpr8 = COPY renamable $sgpr5
; CHECK-NEXT: renamable $sgpr9 = COPY renamable $sgpr5
@@ -43,16 +43,16 @@ body: |
; CHECK-NEXT: renamable $sgpr15 = COPY renamable $sgpr5
; CHECK-NEXT: renamable $vgpr5_vgpr6 = COPY killed renamable $sgpr0_sgpr1
; CHECK-NEXT: renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1088, 0 :: (dereferenceable load (s256), addrspace 6)
- ; CHECK-NEXT: renamable $sgpr80_sgpr81_sgpr82_sgpr83 = S_LOAD_DWORDX4_IMM renamable $sgpr4_sgpr5, 0, 0 :: (load (s128), addrspace 6)
+ ; CHECK-NEXT: renamable $sgpr76_sgpr77_sgpr78_sgpr79 = S_LOAD_DWORDX4_IMM renamable $sgpr4_sgpr5, 0, 0 :: (load (s128), addrspace 6)
; CHECK-NEXT: renamable $sgpr0 = S_MOV_B32 1200
; CHECK-NEXT: renamable $sgpr1 = COPY renamable $sgpr5
; CHECK-NEXT: renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1152, 0 :: (dereferenceable load (s256), addrspace 6)
- ; CHECK-NEXT: renamable $sgpr84_sgpr85_sgpr86_sgpr87 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 0, 0 :: (load (s128), addrspace 6)
+ ; CHECK-NEXT: renamable $sgpr80_sgpr81_sgpr82_sgpr83 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 0, 0 :: (load (s128), addrspace 6)
; CHECK-NEXT: KILL killed renamable $sgpr0, renamable $sgpr1
; CHECK-NEXT: renamable $sgpr0 = S_MOV_B32 1264
; CHECK-NEXT: renamable $sgpr1 = COPY renamable $sgpr5
; CHECK-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1216, 0 :: (dereferenceable load (s256), addrspace 6)
- ; CHECK-NEXT: renamable $sgpr88_sgpr89_sgpr90_sgpr91 = S_LOAD_DWORDX4_IMM killed renamable $sgpr0_sgpr1, 0, 0 :: (load (s128), addrspace 6)
+ ; CHECK-NEXT: renamable $sgpr84_sgpr85_sgpr86_sgpr87 = S_LOAD_DWORDX4_IMM killed renamable $sgpr0_sgpr1, 0, 0 :: (load (s128), addrspace 6)
; CHECK-NEXT: renamable $sgpr0 = S_MOV_B32 1328
; CHECK-NEXT: renamable $sgpr1 = COPY renamable $sgpr5
; CHECK-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1280, 0 :: (dereferenceable load (s256), addrspace 6)
@@ -68,10 +68,10 @@ body: |
; CHECK-NEXT: renamable $sgpr96_sgpr97_sgpr98_sgpr99 = S_LOAD_DWORDX4_IMM killed renamable $sgpr2_sgpr3, 0, 0 :: (load (s128), addrspace 6)
; CHECK-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (load (s128), addrspace 6)
; CHECK-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr0_sgpr1, 0, 0 :: (load (s128), addrspace 6)
- ; CHECK-NEXT: renamable $vgpr7 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, killed renamable $sgpr76_sgpr77_sgpr78_sgpr79, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128))
- ; CHECK-NEXT: renamable $vgpr8 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, killed renamable $sgpr80_sgpr81_sgpr82_sgpr83, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128))
- ; CHECK-NEXT: renamable $vgpr9 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, killed renamable $sgpr84_sgpr85_sgpr86_sgpr87, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128))
- ; CHECK-NEXT: renamable $vgpr10 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43, killed renamable $sgpr88_sgpr89_sgpr90_sgpr91, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128))
+ ; CHECK-NEXT: renamable $vgpr7 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, killed renamable $sgpr88_sgpr89_sgpr90_sgpr91, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128))
+ ; CHECK-NEXT: renamable $vgpr8 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, killed renamable $sgpr76_sgpr77_sgpr78_sgpr79, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128))
+ ; CHECK-NEXT: renamable $vgpr9 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, killed renamable $sgpr80_sgpr81_sgpr82_sgpr83, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128))
+ ; CHECK-NEXT: renamable $vgpr10 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43, killed renamable $sgpr84_sgpr85_sgpr86_sgpr87, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128))
; CHECK-NEXT: renamable $vgpr11 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, killed renamable $sgpr92_sgpr93_sgpr94_sgpr95, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128))
; CHECK-NEXT: renamable $vgpr12 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67, killed renamable $sgpr96_sgpr97_sgpr98_sgpr99, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128))
; CHECK-NEXT: renamable $vgpr13 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, renamable $sgpr4_sgpr5_sgpr6_sgpr7, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128))
diff --git a/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll b/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll
index d9df80ce6c1c0..9afa0e2bb2dcd 100644
--- a/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll
+++ b/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll
@@ -20,16 +20,16 @@ define hidden void @widget() {
; GCN-NEXT: v_writelane_b32 v41, s35, 3
; GCN-NEXT: v_writelane_b32 v41, s36, 4
; GCN-NEXT: v_writelane_b32 v41, s37, 5
-; GCN-NEXT: v_writelane_b32 v41, s38, 6
-; GCN-NEXT: v_writelane_b32 v41, s39, 7
-; GCN-NEXT: v_writelane_b32 v41, s40, 8
-; GCN-NEXT: v_writelane_b32 v41, s41, 9
-; GCN-NEXT: v_writelane_b32 v41, s42, 10
-; GCN-NEXT: v_writelane_b32 v41, s43, 11
-; GCN-NEXT: v_writelane_b32 v41, s44, 12
-; GCN-NEXT: v_writelane_b32 v41, s45, 13
-; GCN-NEXT: v_writelane_b32 v41, s46, 14
-; GCN-NEXT: v_writelane_b32 v41, s47, 15
+; GCN-NEXT: v_writelane_b32 v41, s46, 6
+; GCN-NEXT: v_writelane_b32 v41, s47, 7
+; GCN-NEXT: v_writelane_b32 v41, s48, 8
+; GCN-NEXT: v_writelane_b32 v41, s49, 9
+; GCN-NEXT: v_writelane_b32 v41, s50, 10
+; GCN-NEXT: v_writelane_b32 v41, s51, 11
+; GCN-NEXT: v_writelane_b32 v41, s52, 12
+; GCN-NEXT: v_writelane_b32 v41, s53, 13
+; GCN-NEXT: v_writelane_b32 v41, s62, 14
+; GCN-NEXT: v_writelane_b32 v41, s63, 15
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: flat_load_dword v0, v[0:1]
@@ -37,7 +37,7 @@ define hidden void @widget() {
; GCN-NEXT: s_mov_b64 s[16:17], 0
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_gt_i32_e32 vcc, 21, v0
-; GCN-NEXT: s_mov_b64 s[46:47], 0
+; GCN-NEXT: s_mov_b64 s[62:63], 0
; GCN-NEXT: s_mov_b64 s[18:19], 0
; GCN-NEXT: s_cbranch_vccz .LBB0_9
; GCN-NEXT: ; %bb.1: ; %Flow
@@ -52,30 +52,30 @@ define hidden void @widget() {
; GCN-NEXT: s_addc_u32 s17, s17, wibble at rel32@hi+12
; GCN-NEXT: s_mov_b64 s[34:35], s[4:5]
; GCN-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GCN-NEXT: s_mov_b64 s[38:39], s[8:9]
-; GCN-NEXT: s_mov_b64 s[40:41], s[10:11]
-; GCN-NEXT: s_mov_b32 s42, s12
-; GCN-NEXT: s_mov_b32 s43, s13
-; GCN-NEXT: s_mov_b32 s44, s14
-; GCN-NEXT: s_mov_b32 s45, s15
+; GCN-NEXT: s_mov_b64 s[46:47], s[8:9]
+; GCN-NEXT: s_mov_b64 s[48:49], s[10:11]
+; GCN-NEXT: s_mov_b32 s50, s12
+; GCN-NEXT: s_mov_b32 s51, s13
+; GCN-NEXT: s_mov_b32 s52, s14
+; GCN-NEXT: s_mov_b32 s53, s15
; GCN-NEXT: v_mov_b32_e32 v40, v31
; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GCN-NEXT: v_mov_b32_e32 v31, v40
-; GCN-NEXT: s_mov_b32 s12, s42
-; GCN-NEXT: s_mov_b32 s13, s43
-; GCN-NEXT: s_mov_b32 s14, s44
-; GCN-NEXT: s_mov_b32 s15, s45
+; GCN-NEXT: s_mov_b32 s12, s50
+; GCN-NEXT: s_mov_b32 s13, s51
+; GCN-NEXT: s_mov_b32 s14, s52
+; GCN-NEXT: s_mov_b32 s15, s53
; GCN-NEXT: s_mov_b64 s[4:5], s[34:35]
; GCN-NEXT: s_mov_b64 s[6:7], s[36:37]
-; GCN-NEXT: s_mov_b64 s[8:9], s[38:39]
-; GCN-NEXT: s_mov_b64 s[10:11], s[40:41]
+; GCN-NEXT: s_mov_b64 s[8:9], s[46:47]
+; GCN-NEXT: s_mov_b64 s[10:11], s[48:49]
; GCN-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
; GCN-NEXT: s_mov_b64 s[16:17], 0
-; GCN-NEXT: s_andn2_b64 s[18:19], s[46:47], exec
+; GCN-NEXT: s_andn2_b64 s[18:19], s[62:63], exec
; GCN-NEXT: s_and_b64 s[20:21], vcc, exec
-; GCN-NEXT: s_or_b64 s[46:47], s[18:19], s[20:21]
+; GCN-NEXT: s_or_b64 s[62:63], s[18:19], s[20:21]
; GCN-NEXT: .LBB0_4: ; %Flow2
-; GCN-NEXT: s_and_saveexec_b64 s[18:19], s[46:47]
+; GCN-NEXT: s_and_saveexec_b64 s[18:19], s[62:63]
; GCN-NEXT: s_xor_b64 s[18:19], exec, s[18:19]
; GCN-NEXT: s_cbranch_execz .LBB0_6
; GCN-NEXT: ; %bb.5: ; %bb12
@@ -93,16 +93,16 @@ define hidden void @widget() {
; GCN-NEXT: s_addc_u32 s17, s17, wibble at rel32@hi+12
; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GCN-NEXT: .LBB0_8: ; %UnifiedReturnBlock
-; GCN-NEXT: v_readlane_b32 s47, v41, 15
-; GCN-NEXT: v_readlane_b32 s46, v41, 14
-; GCN-NEXT: v_readlane_b32 s45, v41, 13
-; GCN-NEXT: v_readlane_b32 s44, v41, 12
-; GCN-NEXT: v_readlane_b32 s43, v41, 11
-; GCN-NEXT: v_readlane_b32 s42, v41, 10
-; GCN-NEXT: v_readlane_b32 s41, v41, 9
-; GCN-NEXT: v_readlane_b32 s40, v41, 8
-; GCN-NEXT: v_readlane_b32 s39, v41, 7
-; GCN-NEXT: v_readlane_b32 s38, v41, 6
+; GCN-NEXT: v_readlane_b32 s63, v41, 15
+; GCN-NEXT: v_readlane_b32 s62, v41, 14
+; GCN-NEXT: v_readlane_b32 s53, v41, 13
+; GCN-NEXT: v_readlane_b32 s52, v41, 12
+; GCN-NEXT: v_readlane_b32 s51, v41, 11
+; GCN-NEXT: v_readlane_b32 s50, v41, 10
+; GCN-NEXT: v_readlane_b32 s49, v41, 9
+; GCN-NEXT: v_readlane_b32 s48, v41, 8
+; GCN-NEXT: v_readlane_b32 s47, v41, 7
+; GCN-NEXT: v_readlane_b32 s46, v41, 6
; GCN-NEXT: v_readlane_b32 s37, v41, 5
; GCN-NEXT: v_readlane_b32 s36, v41, 4
; GCN-NEXT: v_readlane_b32 s35, v41, 3
@@ -119,7 +119,7 @@ define hidden void @widget() {
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
; GCN-NEXT: .LBB0_9: ; %bb2
-; GCN-NEXT: v_cmp_eq_u32_e64 s[46:47], 21, v0
+; GCN-NEXT: v_cmp_eq_u32_e64 s[62:63], 21, v0
; GCN-NEXT: v_cmp_ne_u32_e64 s[18:19], 21, v0
; GCN-NEXT: s_mov_b64 vcc, exec
; GCN-NEXT: s_cbranch_execnz .LBB0_2
@@ -272,53 +272,53 @@ define hidden void @blam() {
; GCN-NEXT: v_writelane_b32 v45, s35, 3
; GCN-NEXT: v_writelane_b32 v45, s36, 4
; GCN-NEXT: v_writelane_b32 v45, s37, 5
-; GCN-NEXT: v_writelane_b32 v45, s38, 6
-; GCN-NEXT: v_writelane_b32 v45, s39, 7
-; GCN-NEXT: v_writelane_b32 v45, s40, 8
-; GCN-NEXT: v_writelane_b32 v45, s41, 9
-; GCN-NEXT: v_writelane_b32 v45, s42, 10
-; GCN-NEXT: v_writelane_b32 v45, s43, 11
-; GCN-NEXT: v_writelane_b32 v45, s44, 12
-; GCN-NEXT: v_writelane_b32 v45, s45, 13
-; GCN-NEXT: v_writelane_b32 v45, s46, 14
-; GCN-NEXT: v_writelane_b32 v45, s47, 15
-; GCN-NEXT: v_writelane_b32 v45, s48, 16
-; GCN-NEXT: v_writelane_b32 v45, s49, 17
-; GCN-NEXT: v_writelane_b32 v45, s50, 18
-; GCN-NEXT: v_writelane_b32 v45, s51, 19
-; GCN-NEXT: v_writelane_b32 v45, s52, 20
-; GCN-NEXT: v_writelane_b32 v45, s53, 21
-; GCN-NEXT: v_writelane_b32 v45, s54, 22
-; GCN-NEXT: v_writelane_b32 v45, s55, 23
-; GCN-NEXT: v_writelane_b32 v45, s56, 24
-; GCN-NEXT: v_writelane_b32 v45, s57, 25
+; GCN-NEXT: v_writelane_b32 v45, s46, 6
+; GCN-NEXT: v_writelane_b32 v45, s47, 7
+; GCN-NEXT: v_writelane_b32 v45, s48, 8
+; GCN-NEXT: v_writelane_b32 v45, s49, 9
+; GCN-NEXT: v_writelane_b32 v45, s50, 10
+; GCN-NEXT: v_writelane_b32 v45, s51, 11
+; GCN-NEXT: v_writelane_b32 v45, s52, 12
+; GCN-NEXT: v_writelane_b32 v45, s53, 13
+; GCN-NEXT: v_writelane_b32 v45, s62, 14
+; GCN-NEXT: v_writelane_b32 v45, s63, 15
+; GCN-NEXT: v_writelane_b32 v45, s64, 16
+; GCN-NEXT: v_writelane_b32 v45, s65, 17
+; GCN-NEXT: v_writelane_b32 v45, s66, 18
+; GCN-NEXT: v_writelane_b32 v45, s67, 19
+; GCN-NEXT: v_writelane_b32 v45, s68, 20
+; GCN-NEXT: v_writelane_b32 v45, s69, 21
+; GCN-NEXT: v_writelane_b32 v45, s78, 22
+; GCN-NEXT: v_writelane_b32 v45, s79, 23
+; GCN-NEXT: v_writelane_b32 v45, s80, 24
+; GCN-NEXT: v_writelane_b32 v45, s81, 25
; GCN-NEXT: v_mov_b32_e32 v40, v31
-; GCN-NEXT: s_mov_b32 s46, s15
-; GCN-NEXT: s_mov_b32 s47, s14
-; GCN-NEXT: s_mov_b32 s48, s13
-; GCN-NEXT: s_mov_b32 s49, s12
+; GCN-NEXT: s_mov_b32 s62, s15
+; GCN-NEXT: s_mov_b32 s63, s14
+; GCN-NEXT: s_mov_b32 s64, s13
+; GCN-NEXT: s_mov_b32 s65, s12
; GCN-NEXT: s_mov_b64 s[34:35], s[10:11]
; GCN-NEXT: s_mov_b64 s[36:37], s[8:9]
-; GCN-NEXT: s_mov_b64 s[38:39], s[6:7]
-; GCN-NEXT: s_mov_b64 s[40:41], s[4:5]
+; GCN-NEXT: s_mov_b64 s[46:47], s[6:7]
+; GCN-NEXT: s_mov_b64 s[48:49], s[4:5]
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: v_and_b32_e32 v2, 0x3ff, v40
; GCN-NEXT: flat_load_dword v43, v[0:1]
; GCN-NEXT: v_mov_b32_e32 v42, 0
-; GCN-NEXT: s_mov_b64 s[50:51], 0
+; GCN-NEXT: s_mov_b64 s[66:67], 0
; GCN-NEXT: v_lshlrev_b32_e32 v41, 2, v2
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_cmp_eq_f32_e64 s[52:53], 0, v43
-; GCN-NEXT: v_cmp_neq_f32_e64 s[42:43], 0, v43
+; GCN-NEXT: v_cmp_eq_f32_e64 s[68:69], 0, v43
+; GCN-NEXT: v_cmp_neq_f32_e64 s[50:51], 0, v43
; GCN-NEXT: v_mov_b32_e32 v44, 0x7fc00000
; GCN-NEXT: s_branch .LBB1_2
; GCN-NEXT: .LBB1_1: ; %Flow7
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
; GCN-NEXT: s_or_b64 exec, exec, s[8:9]
; GCN-NEXT: s_and_b64 s[4:5], exec, s[4:5]
-; GCN-NEXT: s_or_b64 s[50:51], s[4:5], s[50:51]
-; GCN-NEXT: s_andn2_b64 exec, exec, s[50:51]
+; GCN-NEXT: s_or_b64 s[66:67], s[4:5], s[66:67]
+; GCN-NEXT: s_andn2_b64 exec, exec, s[66:67]
; GCN-NEXT: s_cbranch_execz .LBB1_18
; GCN-NEXT: .LBB1_2: ; %bb2
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -329,26 +329,26 @@ define hidden void @blam() {
; GCN-NEXT: v_cmp_lt_i32_e32 vcc, 2, v0
; GCN-NEXT: s_mov_b64 s[4:5], -1
; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc
-; GCN-NEXT: s_xor_b64 s[54:55], exec, s[8:9]
+; GCN-NEXT: s_xor_b64 s[78:79], exec, s[8:9]
; GCN-NEXT: s_cbranch_execz .LBB1_12
; GCN-NEXT: ; %bb.3: ; %bb6
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
-; GCN-NEXT: v_cmp_eq_u32_e64 s[44:45], 3, v0
-; GCN-NEXT: s_and_saveexec_b64 s[56:57], s[44:45]
+; GCN-NEXT: v_cmp_eq_u32_e64 s[52:53], 3, v0
+; GCN-NEXT: s_and_saveexec_b64 s[80:81], s[52:53]
; GCN-NEXT: s_cbranch_execz .LBB1_11
; GCN-NEXT: ; %bb.4: ; %bb11
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
; GCN-NEXT: s_getpc_b64 s[16:17]
; GCN-NEXT: s_add_u32 s16, s16, spam at rel32@lo+4
; GCN-NEXT: s_addc_u32 s17, s17, spam at rel32@hi+12
-; GCN-NEXT: s_mov_b64 s[4:5], s[40:41]
-; GCN-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GCN-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GCN-NEXT: s_mov_b64 s[6:7], s[46:47]
; GCN-NEXT: s_mov_b64 s[8:9], s[36:37]
; GCN-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GCN-NEXT: s_mov_b32 s12, s49
-; GCN-NEXT: s_mov_b32 s13, s48
-; GCN-NEXT: s_mov_b32 s14, s47
-; GCN-NEXT: s_mov_b32 s15, s46
+; GCN-NEXT: s_mov_b32 s12, s65
+; GCN-NEXT: s_mov_b32 s13, s64
+; GCN-NEXT: s_mov_b32 s14, s63
+; GCN-NEXT: s_mov_b32 s15, s62
; GCN-NEXT: v_mov_b32_e32 v31, v40
; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GCN-NEXT: v_cmp_neq_f32_e32 vcc, 0, v0
@@ -357,13 +357,13 @@ define hidden void @blam() {
; GCN-NEXT: s_cbranch_execz .LBB1_10
; GCN-NEXT: ; %bb.5: ; %bb14
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
-; GCN-NEXT: s_mov_b64 s[8:9], s[52:53]
-; GCN-NEXT: s_and_saveexec_b64 s[6:7], s[42:43]
+; GCN-NEXT: s_mov_b64 s[8:9], s[68:69]
+; GCN-NEXT: s_and_saveexec_b64 s[6:7], s[50:51]
; GCN-NEXT: s_cbranch_execz .LBB1_7
; GCN-NEXT: ; %bb.6: ; %bb16
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
; GCN-NEXT: buffer_store_dword v44, off, s[0:3], 0
-; GCN-NEXT: s_or_b64 s[8:9], s[52:53], exec
+; GCN-NEXT: s_or_b64 s[8:9], s[68:69], exec
; GCN-NEXT: .LBB1_7: ; %Flow3
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
; GCN-NEXT: s_or_b64 exec, exec, s[6:7]
@@ -382,19 +382,19 @@ define hidden void @blam() {
; GCN-NEXT: .LBB1_10: ; %Flow2
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
-; GCN-NEXT: s_andn2_b64 s[4:5], s[44:45], exec
+; GCN-NEXT: s_andn2_b64 s[4:5], s[52:53], exec
; GCN-NEXT: s_and_b64 s[8:9], vcc, exec
-; GCN-NEXT: s_or_b64 s[44:45], s[4:5], s[8:9]
+; GCN-NEXT: s_or_b64 s[52:53], s[4:5], s[8:9]
; GCN-NEXT: s_and_b64 s[6:7], s[6:7], exec
; GCN-NEXT: .LBB1_11: ; %Flow1
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
-; GCN-NEXT: s_or_b64 exec, exec, s[56:57]
-; GCN-NEXT: s_orn2_b64 s[4:5], s[44:45], exec
+; GCN-NEXT: s_or_b64 exec, exec, s[80:81]
+; GCN-NEXT: s_orn2_b64 s[4:5], s[52:53], exec
; GCN-NEXT: s_and_b64 s[6:7], s[6:7], exec
; GCN-NEXT: ; implicit-def: $vgpr0
; GCN-NEXT: .LBB1_12: ; %Flow
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
-; GCN-NEXT: s_andn2_saveexec_b64 s[8:9], s[54:55]
+; GCN-NEXT: s_andn2_saveexec_b64 s[8:9], s[78:79]
; GCN-NEXT: s_cbranch_execz .LBB1_16
; GCN-NEXT: ; %bb.13: ; %bb8
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
@@ -426,27 +426,27 @@ define hidden void @blam() {
; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec
; GCN-NEXT: s_branch .LBB1_1
; GCN-NEXT: .LBB1_18: ; %DummyReturnBlock
-; GCN-NEXT: s_or_b64 exec, exec, s[50:51]
-; GCN-NEXT: v_readlane_b32 s57, v45, 25
-; GCN-NEXT: v_readlane_b32 s56, v45, 24
-; GCN-NEXT: v_readlane_b32 s55, v45, 23
-; GCN-NEXT: v_readlane_b32 s54, v45, 22
-; GCN-NEXT: v_readlane_b32 s53, v45, 21
-; GCN-NEXT: v_readlane_b32 s52, v45, 20
-; GCN-NEXT: v_readlane_b32 s51, v45, 19
-; GCN-NEXT: v_readlane_b32 s50, v45, 18
-; GCN-NEXT: v_readlane_b32 s49, v45, 17
-; GCN-NEXT: v_readlane_b32 s48, v45, 16
-; GCN-NEXT: v_readlane_b32 s47, v45, 15
-; GCN-NEXT: v_readlane_b32 s46, v45, 14
-; GCN-NEXT: v_readlane_b32 s45, v45, 13
-; GCN-NEXT: v_readlane_b32 s44, v45, 12
-; GCN-NEXT: v_readlane_b32 s43, v45, 11
-; GCN-NEXT: v_readlane_b32 s42, v45, 10
-; GCN-NEXT: v_readlane_b32 s41, v45, 9
-; GCN-NEXT: v_readlane_b32 s40, v45, 8
-; GCN-NEXT: v_readlane_b32 s39, v45, 7
-; GCN-NEXT: v_readlane_b32 s38, v45, 6
+; GCN-NEXT: s_or_b64 exec, exec, s[66:67]
+; GCN-NEXT: v_readlane_b32 s81, v45, 25
+; GCN-NEXT: v_readlane_b32 s80, v45, 24
+; GCN-NEXT: v_readlane_b32 s79, v45, 23
+; GCN-NEXT: v_readlane_b32 s78, v45, 22
+; GCN-NEXT: v_readlane_b32 s69, v45, 21
+; GCN-NEXT: v_readlane_b32 s68, v45, 20
+; GCN-NEXT: v_readlane_b32 s67, v45, 19
+; GCN-NEXT: v_readlane_b32 s66, v45, 18
+; GCN-NEXT: v_readlane_b32 s65, v45, 17
+; GCN-NEXT: v_readlane_b32 s64, v45, 16
+; GCN-NEXT: v_readlane_b32 s63, v45, 15
+; GCN-NEXT: v_readlane_b32 s62, v45, 14
+; GCN-NEXT: v_readlane_b32 s53, v45, 13
+; GCN-NEXT: v_readlane_b32 s52, v45, 12
+; GCN-NEXT: v_readlane_b32 s51, v45, 11
+; GCN-NEXT: v_readlane_b32 s50, v45, 10
+; GCN-NEXT: v_readlane_b32 s49, v45, 9
+; GCN-NEXT: v_readlane_b32 s48, v45, 8
+; GCN-NEXT: v_readlane_b32 s47, v45, 7
+; GCN-NEXT: v_readlane_b32 s46, v45, 6
; GCN-NEXT: v_readlane_b32 s37, v45, 5
; GCN-NEXT: v_readlane_b32 s36, v45, 4
; GCN-NEXT: v_readlane_b32 s35, v45, 3
diff --git a/llvm/test/CodeGen/AMDGPU/use_restore_frame_reg.mir b/llvm/test/CodeGen/AMDGPU/use_restore_frame_reg.mir
index 8a0bf26f81d22..670b7d7b8893b 100644
--- a/llvm/test/CodeGen/AMDGPU/use_restore_frame_reg.mir
+++ b/llvm/test/CodeGen/AMDGPU/use_restore_frame_reg.mir
@@ -37,100 +37,74 @@ body: |
; MUBUF-LABEL: name: use_restore_frame_reg
; MUBUF: bb.0:
; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; MUBUF-NEXT: liveins: $vgpr1, $vgpr2
+ ; MUBUF-NEXT: liveins: $sgpr38, $sgpr39, $vgpr1
; MUBUF-NEXT: {{ $}}
- ; MUBUF-NEXT: $sgpr4 = COPY $sgpr33
+ ; MUBUF-NEXT: $sgpr38 = frame-setup COPY $sgpr33
; MUBUF-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
; MUBUF-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
- ; MUBUF-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; MUBUF-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 9961728, implicit-def dead $scc
- ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: (store (s32) into %stack.20, addrspace 5)
- ; MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7
- ; MUBUF-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, undef $vgpr2
- ; MUBUF-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr34, 1, undef $vgpr2
+ ; MUBUF-NEXT: $sgpr39 = frame-setup COPY $sgpr34
; MUBUF-NEXT: $sgpr34 = frame-setup COPY $sgpr32
; MUBUF-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 11010048, implicit-def dead $scc
; MUBUF-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
; MUBUF-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
; MUBUF-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
; MUBUF-NEXT: $vgpr0 = V_ADD_U32_e32 8192, killed $vgpr0, implicit $exec
- ; MUBUF-NEXT: $vgpr3 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
- ; MUBUF-NEXT: $vgpr3 = V_ADD_U32_e32 155648, killed $vgpr3, implicit $exec
- ; MUBUF-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
+ ; MUBUF-NEXT: $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+ ; MUBUF-NEXT: $vgpr2 = V_ADD_U32_e32 155648, killed $vgpr2, implicit $exec
+ ; MUBUF-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
; MUBUF-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
; MUBUF-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
; MUBUF-NEXT: {{ $}}
; MUBUF-NEXT: bb.1:
; MUBUF-NEXT: successors: %bb.2(0x80000000)
- ; MUBUF-NEXT: liveins: $vgpr2
+ ; MUBUF-NEXT: liveins: $sgpr38, $sgpr39
; MUBUF-NEXT: {{ $}}
; MUBUF-NEXT: S_NOP 0
; MUBUF-NEXT: {{ $}}
; MUBUF-NEXT: bb.2:
- ; MUBUF-NEXT: liveins: $vgpr2
+ ; MUBUF-NEXT: liveins: $sgpr38, $sgpr39
; MUBUF-NEXT: {{ $}}
; MUBUF-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
- ; MUBUF-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 0
- ; MUBUF-NEXT: $sgpr34 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 1
- ; MUBUF-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; MUBUF-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 9961728, implicit-def dead $scc
- ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: (load (s32) from %stack.20, addrspace 5)
- ; MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7
- ; MUBUF-NEXT: $sgpr33 = COPY $sgpr4
+ ; MUBUF-NEXT: $sgpr34 = frame-destroy COPY $sgpr39
+ ; MUBUF-NEXT: $sgpr33 = frame-destroy COPY $sgpr38
; MUBUF-NEXT: S_ENDPGM 0
;
; FLATSCR-LABEL: name: use_restore_frame_reg
; FLATSCR: bb.0:
; FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; FLATSCR-NEXT: liveins: $vgpr1, $vgpr2
+ ; FLATSCR-NEXT: liveins: $sgpr38, $sgpr39, $vgpr1
; FLATSCR-NEXT: {{ $}}
- ; FLATSCR-NEXT: $sgpr4 = COPY $sgpr33
+ ; FLATSCR-NEXT: $sgpr38 = frame-setup COPY $sgpr33
; FLATSCR-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
; FLATSCR-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
- ; FLATSCR-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; FLATSCR-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 155652, implicit-def dead $scc
- ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr2, killed $sgpr5, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.20, addrspace 5)
- ; FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7
- ; FLATSCR-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, undef $vgpr2
- ; FLATSCR-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr34, 1, undef $vgpr2
+ ; FLATSCR-NEXT: $sgpr39 = frame-setup COPY $sgpr34
; FLATSCR-NEXT: $sgpr34 = frame-setup COPY $sgpr32
; FLATSCR-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 172032, implicit-def dead $scc
; FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
; FLATSCR-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
- ; FLATSCR-NEXT: $sgpr33 = S_ADDC_U32 $sgpr33, 8192, implicit-def $scc, implicit $scc
- ; FLATSCR-NEXT: S_BITCMP1_B32 $sgpr33, 0, implicit-def $scc
- ; FLATSCR-NEXT: $sgpr33 = S_BITSET0_B32 0, $sgpr33
- ; FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr33, implicit $exec
- ; FLATSCR-NEXT: $sgpr33 = S_ADDC_U32 $sgpr33, -8192, implicit-def $scc, implicit $scc
- ; FLATSCR-NEXT: S_BITCMP1_B32 $sgpr33, 0, implicit-def $scc
- ; FLATSCR-NEXT: $sgpr33 = S_BITSET0_B32 0, $sgpr33
- ; FLATSCR-NEXT: $sgpr33 = S_ADDC_U32 $sgpr33, 155648, implicit-def $scc, implicit $scc
- ; FLATSCR-NEXT: S_BITCMP1_B32 $sgpr33, 0, implicit-def $scc
- ; FLATSCR-NEXT: $sgpr33 = S_BITSET0_B32 0, $sgpr33
- ; FLATSCR-NEXT: $vgpr0 = V_OR_B32_e32 $sgpr33, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
- ; FLATSCR-NEXT: $sgpr33 = S_ADDC_U32 $sgpr33, -155648, implicit-def $scc, implicit $scc
- ; FLATSCR-NEXT: S_BITCMP1_B32 $sgpr33, 0, implicit-def $scc
- ; FLATSCR-NEXT: $sgpr33 = S_BITSET0_B32 0, $sgpr33
+ ; FLATSCR-NEXT: $sgpr40 = S_ADDC_U32 $sgpr33, 8192, implicit-def $scc, implicit $scc
+ ; FLATSCR-NEXT: S_BITCMP1_B32 $sgpr40, 0, implicit-def $scc
+ ; FLATSCR-NEXT: $sgpr40 = S_BITSET0_B32 0, $sgpr40
+ ; FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr40, implicit $exec
+ ; FLATSCR-NEXT: $sgpr40 = S_ADDC_U32 $sgpr33, 155648, implicit-def $scc, implicit $scc
+ ; FLATSCR-NEXT: S_BITCMP1_B32 $sgpr40, 0, implicit-def $scc
+ ; FLATSCR-NEXT: $sgpr40 = S_BITSET0_B32 0, $sgpr40
+ ; FLATSCR-NEXT: $vgpr0 = V_OR_B32_e32 killed $sgpr40, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
; FLATSCR-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
; FLATSCR-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
; FLATSCR-NEXT: {{ $}}
; FLATSCR-NEXT: bb.1:
; FLATSCR-NEXT: successors: %bb.2(0x80000000)
- ; FLATSCR-NEXT: liveins: $vgpr2
+ ; FLATSCR-NEXT: liveins: $sgpr38, $sgpr39
; FLATSCR-NEXT: {{ $}}
; FLATSCR-NEXT: S_NOP 0
; FLATSCR-NEXT: {{ $}}
; FLATSCR-NEXT: bb.2:
- ; FLATSCR-NEXT: liveins: $vgpr2
+ ; FLATSCR-NEXT: liveins: $sgpr38, $sgpr39
; FLATSCR-NEXT: {{ $}}
; FLATSCR-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
- ; FLATSCR-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 0
- ; FLATSCR-NEXT: $sgpr34 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 1
- ; FLATSCR-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; FLATSCR-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 155652, implicit-def dead $scc
- ; FLATSCR-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr5, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.20, addrspace 5)
- ; FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7
- ; FLATSCR-NEXT: $sgpr33 = COPY $sgpr4
+ ; FLATSCR-NEXT: $sgpr34 = frame-destroy COPY $sgpr39
+ ; FLATSCR-NEXT: $sgpr33 = frame-destroy COPY $sgpr38
; FLATSCR-NEXT: S_ENDPGM 0
bb.0:
liveins: $vgpr1
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-large-tuple-alloc-error.ll b/llvm/test/CodeGen/AMDGPU/vgpr-large-tuple-alloc-error.ll
index 5ced02f28c977..d0798b261abf0 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-large-tuple-alloc-error.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-large-tuple-alloc-error.ll
@@ -33,36 +33,20 @@ define i32 @test_tuple(<16 x i64> %0) {
; GFX900-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; GFX900-NEXT: v_writelane_b32 v63, s36, 0
; GFX900-NEXT: v_writelane_b32 v63, s37, 1
-; GFX900-NEXT: v_writelane_b32 v63, s38, 2
-; GFX900-NEXT: v_writelane_b32 v63, s39, 3
-; GFX900-NEXT: v_writelane_b32 v63, s40, 4
-; GFX900-NEXT: v_writelane_b32 v63, s41, 5
-; GFX900-NEXT: v_writelane_b32 v63, s42, 6
-; GFX900-NEXT: v_writelane_b32 v63, s43, 7
-; GFX900-NEXT: v_writelane_b32 v63, s44, 8
-; GFX900-NEXT: v_writelane_b32 v63, s45, 9
-; GFX900-NEXT: v_writelane_b32 v63, s46, 10
-; GFX900-NEXT: v_writelane_b32 v63, s47, 11
-; GFX900-NEXT: v_writelane_b32 v63, s48, 12
-; GFX900-NEXT: v_writelane_b32 v63, s49, 13
-; GFX900-NEXT: v_writelane_b32 v63, s50, 14
-; GFX900-NEXT: v_writelane_b32 v63, s51, 15
-; GFX900-NEXT: v_writelane_b32 v63, s52, 16
-; GFX900-NEXT: v_writelane_b32 v63, s53, 17
-; GFX900-NEXT: v_writelane_b32 v63, s54, 18
-; GFX900-NEXT: v_writelane_b32 v63, s55, 19
-; GFX900-NEXT: v_writelane_b32 v63, s56, 20
-; GFX900-NEXT: v_writelane_b32 v63, s57, 21
-; GFX900-NEXT: v_writelane_b32 v63, s58, 22
-; GFX900-NEXT: v_writelane_b32 v63, s59, 23
-; GFX900-NEXT: v_writelane_b32 v63, s60, 24
-; GFX900-NEXT: v_writelane_b32 v63, s61, 25
-; GFX900-NEXT: v_writelane_b32 v63, s62, 26
-; GFX900-NEXT: v_writelane_b32 v63, s63, 27
-; GFX900-NEXT: v_writelane_b32 v63, s64, 28
-; GFX900-NEXT: v_writelane_b32 v63, s65, 29
-; GFX900-NEXT: v_writelane_b32 v63, s66, 30
-; GFX900-NEXT: v_writelane_b32 v63, s67, 31
+; GFX900-NEXT: v_writelane_b32 v63, s46, 2
+; GFX900-NEXT: v_writelane_b32 v63, s47, 3
+; GFX900-NEXT: v_writelane_b32 v63, s48, 4
+; GFX900-NEXT: v_writelane_b32 v63, s49, 5
+; GFX900-NEXT: v_writelane_b32 v63, s50, 6
+; GFX900-NEXT: v_writelane_b32 v63, s51, 7
+; GFX900-NEXT: v_writelane_b32 v63, s52, 8
+; GFX900-NEXT: v_writelane_b32 v63, s53, 9
+; GFX900-NEXT: v_writelane_b32 v63, s62, 10
+; GFX900-NEXT: v_writelane_b32 v63, s63, 11
+; GFX900-NEXT: v_writelane_b32 v63, s64, 12
+; GFX900-NEXT: v_writelane_b32 v63, s65, 13
+; GFX900-NEXT: v_writelane_b32 v63, s66, 14
+; GFX900-NEXT: v_writelane_b32 v63, s67, 15
; GFX900-NEXT: v_mov_b32_e32 v33, v30
; GFX900-NEXT: v_mov_b32_e32 v34, v29
; GFX900-NEXT: v_mov_b32_e32 v35, v28
@@ -160,36 +144,20 @@ define i32 @test_tuple(<16 x i64> %0) {
; GFX900-NEXT: ; kill: def $vgpr31 killed $vgpr32 killed $exec
; GFX900-NEXT: ; implicit-def: $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67
; GFX900-NEXT: v_mov_b32_e32 v0, 0
-; GFX900-NEXT: v_readlane_b32 s67, v63, 31
-; GFX900-NEXT: v_readlane_b32 s66, v63, 30
-; GFX900-NEXT: v_readlane_b32 s65, v63, 29
-; GFX900-NEXT: v_readlane_b32 s64, v63, 28
-; GFX900-NEXT: v_readlane_b32 s63, v63, 27
-; GFX900-NEXT: v_readlane_b32 s62, v63, 26
-; GFX900-NEXT: v_readlane_b32 s61, v63, 25
-; GFX900-NEXT: v_readlane_b32 s60, v63, 24
-; GFX900-NEXT: v_readlane_b32 s59, v63, 23
-; GFX900-NEXT: v_readlane_b32 s58, v63, 22
-; GFX900-NEXT: v_readlane_b32 s57, v63, 21
-; GFX900-NEXT: v_readlane_b32 s56, v63, 20
-; GFX900-NEXT: v_readlane_b32 s55, v63, 19
-; GFX900-NEXT: v_readlane_b32 s54, v63, 18
-; GFX900-NEXT: v_readlane_b32 s53, v63, 17
-; GFX900-NEXT: v_readlane_b32 s52, v63, 16
-; GFX900-NEXT: v_readlane_b32 s51, v63, 15
-; GFX900-NEXT: v_readlane_b32 s50, v63, 14
-; GFX900-NEXT: v_readlane_b32 s49, v63, 13
-; GFX900-NEXT: v_readlane_b32 s48, v63, 12
-; GFX900-NEXT: v_readlane_b32 s47, v63, 11
-; GFX900-NEXT: v_readlane_b32 s46, v63, 10
-; GFX900-NEXT: v_readlane_b32 s45, v63, 9
-; GFX900-NEXT: v_readlane_b32 s44, v63, 8
-; GFX900-NEXT: v_readlane_b32 s43, v63, 7
-; GFX900-NEXT: v_readlane_b32 s42, v63, 6
-; GFX900-NEXT: v_readlane_b32 s41, v63, 5
-; GFX900-NEXT: v_readlane_b32 s40, v63, 4
-; GFX900-NEXT: v_readlane_b32 s39, v63, 3
-; GFX900-NEXT: v_readlane_b32 s38, v63, 2
+; GFX900-NEXT: v_readlane_b32 s67, v63, 15
+; GFX900-NEXT: v_readlane_b32 s66, v63, 14
+; GFX900-NEXT: v_readlane_b32 s65, v63, 13
+; GFX900-NEXT: v_readlane_b32 s64, v63, 12
+; GFX900-NEXT: v_readlane_b32 s63, v63, 11
+; GFX900-NEXT: v_readlane_b32 s62, v63, 10
+; GFX900-NEXT: v_readlane_b32 s53, v63, 9
+; GFX900-NEXT: v_readlane_b32 s52, v63, 8
+; GFX900-NEXT: v_readlane_b32 s51, v63, 7
+; GFX900-NEXT: v_readlane_b32 s50, v63, 6
+; GFX900-NEXT: v_readlane_b32 s49, v63, 5
+; GFX900-NEXT: v_readlane_b32 s48, v63, 4
+; GFX900-NEXT: v_readlane_b32 s47, v63, 3
+; GFX900-NEXT: v_readlane_b32 s46, v63, 2
; GFX900-NEXT: v_readlane_b32 s37, v63, 1
; GFX900-NEXT: v_readlane_b32 s36, v63, 0
; GFX900-NEXT: buffer_load_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
@@ -236,36 +204,20 @@ define i32 @test_tuple(<16 x i64> %0) {
; GFX906-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; GFX906-NEXT: v_writelane_b32 v63, s36, 0
; GFX906-NEXT: v_writelane_b32 v63, s37, 1
-; GFX906-NEXT: v_writelane_b32 v63, s38, 2
-; GFX906-NEXT: v_writelane_b32 v63, s39, 3
-; GFX906-NEXT: v_writelane_b32 v63, s40, 4
-; GFX906-NEXT: v_writelane_b32 v63, s41, 5
-; GFX906-NEXT: v_writelane_b32 v63, s42, 6
-; GFX906-NEXT: v_writelane_b32 v63, s43, 7
-; GFX906-NEXT: v_writelane_b32 v63, s44, 8
-; GFX906-NEXT: v_writelane_b32 v63, s45, 9
-; GFX906-NEXT: v_writelane_b32 v63, s46, 10
-; GFX906-NEXT: v_writelane_b32 v63, s47, 11
-; GFX906-NEXT: v_writelane_b32 v63, s48, 12
-; GFX906-NEXT: v_writelane_b32 v63, s49, 13
-; GFX906-NEXT: v_writelane_b32 v63, s50, 14
-; GFX906-NEXT: v_writelane_b32 v63, s51, 15
-; GFX906-NEXT: v_writelane_b32 v63, s52, 16
-; GFX906-NEXT: v_writelane_b32 v63, s53, 17
-; GFX906-NEXT: v_writelane_b32 v63, s54, 18
-; GFX906-NEXT: v_writelane_b32 v63, s55, 19
-; GFX906-NEXT: v_writelane_b32 v63, s56, 20
-; GFX906-NEXT: v_writelane_b32 v63, s57, 21
-; GFX906-NEXT: v_writelane_b32 v63, s58, 22
-; GFX906-NEXT: v_writelane_b32 v63, s59, 23
-; GFX906-NEXT: v_writelane_b32 v63, s60, 24
-; GFX906-NEXT: v_writelane_b32 v63, s61, 25
-; GFX906-NEXT: v_writelane_b32 v63, s62, 26
-; GFX906-NEXT: v_writelane_b32 v63, s63, 27
-; GFX906-NEXT: v_writelane_b32 v63, s64, 28
-; GFX906-NEXT: v_writelane_b32 v63, s65, 29
-; GFX906-NEXT: v_writelane_b32 v63, s66, 30
-; GFX906-NEXT: v_writelane_b32 v63, s67, 31
+; GFX906-NEXT: v_writelane_b32 v63, s46, 2
+; GFX906-NEXT: v_writelane_b32 v63, s47, 3
+; GFX906-NEXT: v_writelane_b32 v63, s48, 4
+; GFX906-NEXT: v_writelane_b32 v63, s49, 5
+; GFX906-NEXT: v_writelane_b32 v63, s50, 6
+; GFX906-NEXT: v_writelane_b32 v63, s51, 7
+; GFX906-NEXT: v_writelane_b32 v63, s52, 8
+; GFX906-NEXT: v_writelane_b32 v63, s53, 9
+; GFX906-NEXT: v_writelane_b32 v63, s62, 10
+; GFX906-NEXT: v_writelane_b32 v63, s63, 11
+; GFX906-NEXT: v_writelane_b32 v63, s64, 12
+; GFX906-NEXT: v_writelane_b32 v63, s65, 13
+; GFX906-NEXT: v_writelane_b32 v63, s66, 14
+; GFX906-NEXT: v_writelane_b32 v63, s67, 15
; GFX906-NEXT: v_mov_b32_e32 v33, v30
; GFX906-NEXT: v_mov_b32_e32 v34, v29
; GFX906-NEXT: v_mov_b32_e32 v35, v28
@@ -363,36 +315,20 @@ define i32 @test_tuple(<16 x i64> %0) {
; GFX906-NEXT: ; kill: def $vgpr31 killed $vgpr32 killed $exec
; GFX906-NEXT: ; implicit-def: $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67
; GFX906-NEXT: v_mov_b32_e32 v0, 0
-; GFX906-NEXT: v_readlane_b32 s67, v63, 31
-; GFX906-NEXT: v_readlane_b32 s66, v63, 30
-; GFX906-NEXT: v_readlane_b32 s65, v63, 29
-; GFX906-NEXT: v_readlane_b32 s64, v63, 28
-; GFX906-NEXT: v_readlane_b32 s63, v63, 27
-; GFX906-NEXT: v_readlane_b32 s62, v63, 26
-; GFX906-NEXT: v_readlane_b32 s61, v63, 25
-; GFX906-NEXT: v_readlane_b32 s60, v63, 24
-; GFX906-NEXT: v_readlane_b32 s59, v63, 23
-; GFX906-NEXT: v_readlane_b32 s58, v63, 22
-; GFX906-NEXT: v_readlane_b32 s57, v63, 21
-; GFX906-NEXT: v_readlane_b32 s56, v63, 20
-; GFX906-NEXT: v_readlane_b32 s55, v63, 19
-; GFX906-NEXT: v_readlane_b32 s54, v63, 18
-; GFX906-NEXT: v_readlane_b32 s53, v63, 17
-; GFX906-NEXT: v_readlane_b32 s52, v63, 16
-; GFX906-NEXT: v_readlane_b32 s51, v63, 15
-; GFX906-NEXT: v_readlane_b32 s50, v63, 14
-; GFX906-NEXT: v_readlane_b32 s49, v63, 13
-; GFX906-NEXT: v_readlane_b32 s48, v63, 12
-; GFX906-NEXT: v_readlane_b32 s47, v63, 11
-; GFX906-NEXT: v_readlane_b32 s46, v63, 10
-; GFX906-NEXT: v_readlane_b32 s45, v63, 9
-; GFX906-NEXT: v_readlane_b32 s44, v63, 8
-; GFX906-NEXT: v_readlane_b32 s43, v63, 7
-; GFX906-NEXT: v_readlane_b32 s42, v63, 6
-; GFX906-NEXT: v_readlane_b32 s41, v63, 5
-; GFX906-NEXT: v_readlane_b32 s40, v63, 4
-; GFX906-NEXT: v_readlane_b32 s39, v63, 3
-; GFX906-NEXT: v_readlane_b32 s38, v63, 2
+; GFX906-NEXT: v_readlane_b32 s67, v63, 15
+; GFX906-NEXT: v_readlane_b32 s66, v63, 14
+; GFX906-NEXT: v_readlane_b32 s65, v63, 13
+; GFX906-NEXT: v_readlane_b32 s64, v63, 12
+; GFX906-NEXT: v_readlane_b32 s63, v63, 11
+; GFX906-NEXT: v_readlane_b32 s62, v63, 10
+; GFX906-NEXT: v_readlane_b32 s53, v63, 9
+; GFX906-NEXT: v_readlane_b32 s52, v63, 8
+; GFX906-NEXT: v_readlane_b32 s51, v63, 7
+; GFX906-NEXT: v_readlane_b32 s50, v63, 6
+; GFX906-NEXT: v_readlane_b32 s49, v63, 5
+; GFX906-NEXT: v_readlane_b32 s48, v63, 4
+; GFX906-NEXT: v_readlane_b32 s47, v63, 3
+; GFX906-NEXT: v_readlane_b32 s46, v63, 2
; GFX906-NEXT: v_readlane_b32 s37, v63, 1
; GFX906-NEXT: v_readlane_b32 s36, v63, 0
; GFX906-NEXT: buffer_load_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
@@ -438,36 +374,20 @@ define i32 @test_tuple(<16 x i64> %0) {
; GFX908-NEXT: v_accvgpr_write_b32 a13, v61 ; Reload Reuse
; GFX908-NEXT: v_writelane_b32 v62, s36, 0
; GFX908-NEXT: v_writelane_b32 v62, s37, 1
-; GFX908-NEXT: v_writelane_b32 v62, s38, 2
-; GFX908-NEXT: v_writelane_b32 v62, s39, 3
-; GFX908-NEXT: v_writelane_b32 v62, s40, 4
-; GFX908-NEXT: v_writelane_b32 v62, s41, 5
-; GFX908-NEXT: v_writelane_b32 v62, s42, 6
-; GFX908-NEXT: v_writelane_b32 v62, s43, 7
-; GFX908-NEXT: v_writelane_b32 v62, s44, 8
-; GFX908-NEXT: v_writelane_b32 v62, s45, 9
-; GFX908-NEXT: v_writelane_b32 v62, s46, 10
-; GFX908-NEXT: v_writelane_b32 v62, s47, 11
-; GFX908-NEXT: v_writelane_b32 v62, s48, 12
-; GFX908-NEXT: v_writelane_b32 v62, s49, 13
-; GFX908-NEXT: v_writelane_b32 v62, s50, 14
-; GFX908-NEXT: v_writelane_b32 v62, s51, 15
-; GFX908-NEXT: v_writelane_b32 v62, s52, 16
-; GFX908-NEXT: v_writelane_b32 v62, s53, 17
-; GFX908-NEXT: v_writelane_b32 v62, s54, 18
-; GFX908-NEXT: v_writelane_b32 v62, s55, 19
-; GFX908-NEXT: v_writelane_b32 v62, s56, 20
-; GFX908-NEXT: v_writelane_b32 v62, s57, 21
-; GFX908-NEXT: v_writelane_b32 v62, s58, 22
-; GFX908-NEXT: v_writelane_b32 v62, s59, 23
-; GFX908-NEXT: v_writelane_b32 v62, s60, 24
-; GFX908-NEXT: v_writelane_b32 v62, s61, 25
-; GFX908-NEXT: v_writelane_b32 v62, s62, 26
-; GFX908-NEXT: v_writelane_b32 v62, s63, 27
-; GFX908-NEXT: v_writelane_b32 v62, s64, 28
-; GFX908-NEXT: v_writelane_b32 v62, s65, 29
-; GFX908-NEXT: v_writelane_b32 v62, s66, 30
-; GFX908-NEXT: v_writelane_b32 v62, s67, 31
+; GFX908-NEXT: v_writelane_b32 v62, s46, 2
+; GFX908-NEXT: v_writelane_b32 v62, s47, 3
+; GFX908-NEXT: v_writelane_b32 v62, s48, 4
+; GFX908-NEXT: v_writelane_b32 v62, s49, 5
+; GFX908-NEXT: v_writelane_b32 v62, s50, 6
+; GFX908-NEXT: v_writelane_b32 v62, s51, 7
+; GFX908-NEXT: v_writelane_b32 v62, s52, 8
+; GFX908-NEXT: v_writelane_b32 v62, s53, 9
+; GFX908-NEXT: v_writelane_b32 v62, s62, 10
+; GFX908-NEXT: v_writelane_b32 v62, s63, 11
+; GFX908-NEXT: v_writelane_b32 v62, s64, 12
+; GFX908-NEXT: v_writelane_b32 v62, s65, 13
+; GFX908-NEXT: v_writelane_b32 v62, s66, 14
+; GFX908-NEXT: v_writelane_b32 v62, s67, 15
; GFX908-NEXT: v_mov_b32_e32 v33, v30
; GFX908-NEXT: v_mov_b32_e32 v34, v29
; GFX908-NEXT: v_mov_b32_e32 v35, v28
@@ -569,36 +489,20 @@ define i32 @test_tuple(<16 x i64> %0) {
; GFX908-NEXT: ; kill: def $vgpr31 killed $vgpr32 killed $exec
; GFX908-NEXT: ; implicit-def: $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67
; GFX908-NEXT: v_mov_b32_e32 v0, 0
-; GFX908-NEXT: v_readlane_b32 s67, v62, 31
-; GFX908-NEXT: v_readlane_b32 s66, v62, 30
-; GFX908-NEXT: v_readlane_b32 s65, v62, 29
-; GFX908-NEXT: v_readlane_b32 s64, v62, 28
-; GFX908-NEXT: v_readlane_b32 s63, v62, 27
-; GFX908-NEXT: v_readlane_b32 s62, v62, 26
-; GFX908-NEXT: v_readlane_b32 s61, v62, 25
-; GFX908-NEXT: v_readlane_b32 s60, v62, 24
-; GFX908-NEXT: v_readlane_b32 s59, v62, 23
-; GFX908-NEXT: v_readlane_b32 s58, v62, 22
-; GFX908-NEXT: v_readlane_b32 s57, v62, 21
-; GFX908-NEXT: v_readlane_b32 s56, v62, 20
-; GFX908-NEXT: v_readlane_b32 s55, v62, 19
-; GFX908-NEXT: v_readlane_b32 s54, v62, 18
-; GFX908-NEXT: v_readlane_b32 s53, v62, 17
-; GFX908-NEXT: v_readlane_b32 s52, v62, 16
-; GFX908-NEXT: v_readlane_b32 s51, v62, 15
-; GFX908-NEXT: v_readlane_b32 s50, v62, 14
-; GFX908-NEXT: v_readlane_b32 s49, v62, 13
-; GFX908-NEXT: v_readlane_b32 s48, v62, 12
-; GFX908-NEXT: v_readlane_b32 s47, v62, 11
-; GFX908-NEXT: v_readlane_b32 s46, v62, 10
-; GFX908-NEXT: v_readlane_b32 s45, v62, 9
-; GFX908-NEXT: v_readlane_b32 s44, v62, 8
-; GFX908-NEXT: v_readlane_b32 s43, v62, 7
-; GFX908-NEXT: v_readlane_b32 s42, v62, 6
-; GFX908-NEXT: v_readlane_b32 s41, v62, 5
-; GFX908-NEXT: v_readlane_b32 s40, v62, 4
-; GFX908-NEXT: v_readlane_b32 s39, v62, 3
-; GFX908-NEXT: v_readlane_b32 s38, v62, 2
+; GFX908-NEXT: v_readlane_b32 s67, v62, 15
+; GFX908-NEXT: v_readlane_b32 s66, v62, 14
+; GFX908-NEXT: v_readlane_b32 s65, v62, 13
+; GFX908-NEXT: v_readlane_b32 s64, v62, 12
+; GFX908-NEXT: v_readlane_b32 s63, v62, 11
+; GFX908-NEXT: v_readlane_b32 s62, v62, 10
+; GFX908-NEXT: v_readlane_b32 s53, v62, 9
+; GFX908-NEXT: v_readlane_b32 s52, v62, 8
+; GFX908-NEXT: v_readlane_b32 s51, v62, 7
+; GFX908-NEXT: v_readlane_b32 s50, v62, 6
+; GFX908-NEXT: v_readlane_b32 s49, v62, 5
+; GFX908-NEXT: v_readlane_b32 s48, v62, 4
+; GFX908-NEXT: v_readlane_b32 s47, v62, 3
+; GFX908-NEXT: v_readlane_b32 s46, v62, 2
; GFX908-NEXT: v_readlane_b32 s37, v62, 1
; GFX908-NEXT: v_readlane_b32 s36, v62, 0
; GFX908-NEXT: v_accvgpr_read_b32 v61, a13 ; Reload Reuse
@@ -644,36 +548,20 @@ define i32 @test_tuple(<16 x i64> %0) {
; GFX90a-NEXT: v_accvgpr_write_b32 a14, v62 ; Reload Reuse
; GFX90a-NEXT: v_writelane_b32 v63, s36, 0
; GFX90a-NEXT: v_writelane_b32 v63, s37, 1
-; GFX90a-NEXT: v_writelane_b32 v63, s38, 2
-; GFX90a-NEXT: v_writelane_b32 v63, s39, 3
-; GFX90a-NEXT: v_writelane_b32 v63, s40, 4
-; GFX90a-NEXT: v_writelane_b32 v63, s41, 5
-; GFX90a-NEXT: v_writelane_b32 v63, s42, 6
-; GFX90a-NEXT: v_writelane_b32 v63, s43, 7
-; GFX90a-NEXT: v_writelane_b32 v63, s44, 8
-; GFX90a-NEXT: v_writelane_b32 v63, s45, 9
-; GFX90a-NEXT: v_writelane_b32 v63, s46, 10
-; GFX90a-NEXT: v_writelane_b32 v63, s47, 11
-; GFX90a-NEXT: v_writelane_b32 v63, s48, 12
-; GFX90a-NEXT: v_writelane_b32 v63, s49, 13
-; GFX90a-NEXT: v_writelane_b32 v63, s50, 14
-; GFX90a-NEXT: v_writelane_b32 v63, s51, 15
-; GFX90a-NEXT: v_writelane_b32 v63, s52, 16
-; GFX90a-NEXT: v_writelane_b32 v63, s53, 17
-; GFX90a-NEXT: v_writelane_b32 v63, s54, 18
-; GFX90a-NEXT: v_writelane_b32 v63, s55, 19
-; GFX90a-NEXT: v_writelane_b32 v63, s56, 20
-; GFX90a-NEXT: v_writelane_b32 v63, s57, 21
-; GFX90a-NEXT: v_writelane_b32 v63, s58, 22
-; GFX90a-NEXT: v_writelane_b32 v63, s59, 23
-; GFX90a-NEXT: v_writelane_b32 v63, s60, 24
-; GFX90a-NEXT: v_writelane_b32 v63, s61, 25
-; GFX90a-NEXT: v_writelane_b32 v63, s62, 26
-; GFX90a-NEXT: v_writelane_b32 v63, s63, 27
-; GFX90a-NEXT: v_writelane_b32 v63, s64, 28
-; GFX90a-NEXT: v_writelane_b32 v63, s65, 29
-; GFX90a-NEXT: v_writelane_b32 v63, s66, 30
-; GFX90a-NEXT: v_writelane_b32 v63, s67, 31
+; GFX90a-NEXT: v_writelane_b32 v63, s46, 2
+; GFX90a-NEXT: v_writelane_b32 v63, s47, 3
+; GFX90a-NEXT: v_writelane_b32 v63, s48, 4
+; GFX90a-NEXT: v_writelane_b32 v63, s49, 5
+; GFX90a-NEXT: v_writelane_b32 v63, s50, 6
+; GFX90a-NEXT: v_writelane_b32 v63, s51, 7
+; GFX90a-NEXT: v_writelane_b32 v63, s52, 8
+; GFX90a-NEXT: v_writelane_b32 v63, s53, 9
+; GFX90a-NEXT: v_writelane_b32 v63, s62, 10
+; GFX90a-NEXT: v_writelane_b32 v63, s63, 11
+; GFX90a-NEXT: v_writelane_b32 v63, s64, 12
+; GFX90a-NEXT: v_writelane_b32 v63, s65, 13
+; GFX90a-NEXT: v_writelane_b32 v63, s66, 14
+; GFX90a-NEXT: v_writelane_b32 v63, s67, 15
; GFX90a-NEXT: v_mov_b32_e32 v33, v30
; GFX90a-NEXT: v_mov_b32_e32 v34, v29
; GFX90a-NEXT: v_mov_b32_e32 v35, v28
@@ -771,36 +659,20 @@ define i32 @test_tuple(<16 x i64> %0) {
; GFX90a-NEXT: ; kill: def $vgpr31 killed $vgpr32 killed $exec
; GFX90a-NEXT: ; implicit-def: $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67
; GFX90a-NEXT: v_mov_b32_e32 v0, 0
-; GFX90a-NEXT: v_readlane_b32 s67, v63, 31
-; GFX90a-NEXT: v_readlane_b32 s66, v63, 30
-; GFX90a-NEXT: v_readlane_b32 s65, v63, 29
-; GFX90a-NEXT: v_readlane_b32 s64, v63, 28
-; GFX90a-NEXT: v_readlane_b32 s63, v63, 27
-; GFX90a-NEXT: v_readlane_b32 s62, v63, 26
-; GFX90a-NEXT: v_readlane_b32 s61, v63, 25
-; GFX90a-NEXT: v_readlane_b32 s60, v63, 24
-; GFX90a-NEXT: v_readlane_b32 s59, v63, 23
-; GFX90a-NEXT: v_readlane_b32 s58, v63, 22
-; GFX90a-NEXT: v_readlane_b32 s57, v63, 21
-; GFX90a-NEXT: v_readlane_b32 s56, v63, 20
-; GFX90a-NEXT: v_readlane_b32 s55, v63, 19
-; GFX90a-NEXT: v_readlane_b32 s54, v63, 18
-; GFX90a-NEXT: v_readlane_b32 s53, v63, 17
-; GFX90a-NEXT: v_readlane_b32 s52, v63, 16
-; GFX90a-NEXT: v_readlane_b32 s51, v63, 15
-; GFX90a-NEXT: v_readlane_b32 s50, v63, 14
-; GFX90a-NEXT: v_readlane_b32 s49, v63, 13
-; GFX90a-NEXT: v_readlane_b32 s48, v63, 12
-; GFX90a-NEXT: v_readlane_b32 s47, v63, 11
-; GFX90a-NEXT: v_readlane_b32 s46, v63, 10
-; GFX90a-NEXT: v_readlane_b32 s45, v63, 9
-; GFX90a-NEXT: v_readlane_b32 s44, v63, 8
-; GFX90a-NEXT: v_readlane_b32 s43, v63, 7
-; GFX90a-NEXT: v_readlane_b32 s42, v63, 6
-; GFX90a-NEXT: v_readlane_b32 s41, v63, 5
-; GFX90a-NEXT: v_readlane_b32 s40, v63, 4
-; GFX90a-NEXT: v_readlane_b32 s39, v63, 3
-; GFX90a-NEXT: v_readlane_b32 s38, v63, 2
+; GFX90a-NEXT: v_readlane_b32 s67, v63, 15
+; GFX90a-NEXT: v_readlane_b32 s66, v63, 14
+; GFX90a-NEXT: v_readlane_b32 s65, v63, 13
+; GFX90a-NEXT: v_readlane_b32 s64, v63, 12
+; GFX90a-NEXT: v_readlane_b32 s63, v63, 11
+; GFX90a-NEXT: v_readlane_b32 s62, v63, 10
+; GFX90a-NEXT: v_readlane_b32 s53, v63, 9
+; GFX90a-NEXT: v_readlane_b32 s52, v63, 8
+; GFX90a-NEXT: v_readlane_b32 s51, v63, 7
+; GFX90a-NEXT: v_readlane_b32 s50, v63, 6
+; GFX90a-NEXT: v_readlane_b32 s49, v63, 5
+; GFX90a-NEXT: v_readlane_b32 s48, v63, 4
+; GFX90a-NEXT: v_readlane_b32 s47, v63, 3
+; GFX90a-NEXT: v_readlane_b32 s46, v63, 2
; GFX90a-NEXT: v_readlane_b32 s37, v63, 1
; GFX90a-NEXT: v_readlane_b32 s36, v63, 0
; GFX90a-NEXT: v_accvgpr_read_b32 v62, a14 ; Reload Reuse
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/spill-phys-vgprs.mir b/llvm/test/CodeGen/MIR/AMDGPU/spill-phys-vgprs.mir
index 4d6e33cf0b68a..b427c5bdd7229 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/spill-phys-vgprs.mir
+++ b/llvm/test/CodeGen/MIR/AMDGPU/spill-phys-vgprs.mir
@@ -1,8 +1,7 @@
# RUN: llc -mtriple=amdgcn-amd-amdhsa --start-before=si-lower-sgpr-spills --stop-after=prologepilog -o - %s | FileCheck %s
# CHECK: csr_sgpr_spill
-# CHECK: spillPhysVGPRs
-# CHECK-NEXT: - '$vgpr0'
+# CHECK-NOT: spillPhysVGPRs
---
name: csr_sgpr_spill
tracksRegLiveness: true
>From 5478409c9853daef8d8e7aaee7d40655b8b76f04 Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Tue, 18 Feb 2025 12:46:55 -0500
Subject: [PATCH 2/2] Start the partition from s40
---
llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td | 14 +-
.../amdgpu-simplify-libcall-pow-codegen.ll | 60 +-
llvm/test/CodeGen/AMDGPU/bf16.ll | 168 +-
...der-no-live-segment-at-def-implicit-def.ll | 42 +-
.../branch-folding-implicit-def-subreg.ll | 191 +-
.../test/CodeGen/AMDGPU/branch-relax-spill.ll | 36 +-
llvm/test/CodeGen/AMDGPU/call-args-inreg.ll | 12 +-
.../CodeGen/AMDGPU/call-argument-types.ll | 2520 ++++++++---------
.../test/CodeGen/AMDGPU/callee-frame-setup.ll | 1426 +++++-----
.../AMDGPU/csr-sgpr-spill-live-ins.mir | 6 -
llvm/test/CodeGen/AMDGPU/ds_read2.ll | 36 +-
.../AMDGPU/dwarf-multi-register-use-crash.ll | 26 +-
.../eliminate-frame-index-s-mov-b32.mir | 44 +-
.../CodeGen/AMDGPU/function-args-inreg.ll | 4 +-
.../CodeGen/AMDGPU/function-resource-usage.ll | 12 +-
.../CodeGen/AMDGPU/gfx-call-non-gfx-func.ll | 128 +-
.../AMDGPU/gfx-callable-argument-types.ll | 64 +-
.../AMDGPU/global_atomics_scan_fadd.ll | 1828 ++++++------
.../AMDGPU/global_atomics_scan_fmax.ll | 1276 ++++-----
.../AMDGPU/global_atomics_scan_fmin.ll | 1276 ++++-----
.../AMDGPU/global_atomics_scan_fsub.ll | 1828 ++++++------
.../greedy-alloc-fail-sgpr1024-spill.mir | 76 +-
.../identical-subrange-spill-infloop.ll | 48 +-
llvm/test/CodeGen/AMDGPU/indirect-call.ll | 320 +--
llvm/test/CodeGen/AMDGPU/issue48473.mir | 2 +-
.../llvm.amdgcn.pops.exiting.wave.id.ll | 48 +-
llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll | 12 +-
llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll | 36 +-
llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll | 12 +-
llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll | 36 +-
.../lower-work-group-id-intrinsics-hsa.ll | 64 +-
...ne-sink-temporal-divergence-swdev407790.ll | 144 +-
.../AMDGPU/materialize-frame-index-sgpr.ll | 342 ++-
...-knownbits-assign-crash-gh-issue-110930.ll | 10 +-
.../AMDGPU/pei-scavenge-sgpr-carry-out.mir | 40 +-
.../CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir | 28 +-
.../test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir | 14 +-
.../AMDGPU/promote-constOffset-to-imm.ll | 446 +--
.../ran-out-of-sgprs-allocation-failure.mir | 119 +-
llvm/test/CodeGen/AMDGPU/select.f16.ll | 4 +-
.../sgpr-spill-update-only-slot-indexes.ll | 16 +-
.../AMDGPU/shufflevector.v2i64.v8i64.ll | 448 +--
llvm/test/CodeGen/AMDGPU/sibling-call.ll | 72 +-
.../AMDGPU/snippet-copy-bundle-regression.mir | 58 +-
.../AMDGPU/spill-sgpr-to-virtual-vgpr.mir | 10 +-
.../AMDGPU/spill-sgpr-used-for-exec-copy.mir | 4 +-
.../spill_more_than_wavesize_csr_sgprs.ll | 64 +-
...tack-pointer-offset-relative-frameindex.ll | 22 +-
llvm/test/CodeGen/AMDGPU/stack-realign.ll | 418 +--
.../AMDGPU/tuple-allocation-failure.ll | 144 +-
.../unallocatable-bundle-regression.mir | 48 +-
.../AMDGPU/unstructured-cfg-def-use-issue.ll | 70 +-
.../CodeGen/AMDGPU/use_restore_frame_reg.mir | 44 +-
.../AMDGPU/vgpr-large-tuple-alloc-error.ll | 64 +-
54 files changed, 7102 insertions(+), 7178 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
index e3861a7d06c3d..e891fdba4e03e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
@@ -91,11 +91,15 @@ def CSR_AMDGPU_AGPRs : CalleeSavedRegs<
>;
def CSR_AMDGPU_SGPRs : CalleeSavedRegs<
- (add (sequence "SGPR%u", 30, 37),
- (sequence "SGPR%u", 46, 53),
- (sequence "SGPR%u", 62, 69),
- (sequence "SGPR%u", 78, 85),
- (sequence "SGPR%u", 94, 105))
+ // Ensure that s30-s31 (return address), s32 (stack pointer), s33 (frame pointer),
+ // and s34 (base pointer) are callee-saved. The striped layout starts from s40,
+ // with a stripe width of 8. The last stripe is 10 wide instead of 8, to avoid
+ // ending with a 2-wide stripe.
+ (add (sequence "SGPR%u", 30, 39),
+ (sequence "SGPR%u", 48, 55),
+ (sequence "SGPR%u", 64, 71),
+ (sequence "SGPR%u", 80, 87),
+ (sequence "SGPR%u", 96, 105))
>;
def CSR_AMDGPU_SI_Gfx_SGPRs : CalleeSavedRegs<
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll
index 905d0deacab35..834dd4d6d0567 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll
@@ -125,8 +125,8 @@ define double @test_pow_fast_f64__integral_y(double %x, i32 %y.i) {
; CHECK-NEXT: v_writelane_b32 v43, s35, 3
; CHECK-NEXT: v_writelane_b32 v43, s36, 4
; CHECK-NEXT: v_writelane_b32 v43, s37, 5
-; CHECK-NEXT: v_writelane_b32 v43, s46, 6
-; CHECK-NEXT: v_writelane_b32 v43, s47, 7
+; CHECK-NEXT: v_writelane_b32 v43, s38, 6
+; CHECK-NEXT: v_writelane_b32 v43, s39, 7
; CHECK-NEXT: s_addk_i32 s32, 0x800
; CHECK-NEXT: v_writelane_b32 v43, s48, 8
; CHECK-NEXT: v_writelane_b32 v43, s49, 9
@@ -153,7 +153,7 @@ define double @test_pow_fast_f64__integral_y(double %x, i32 %y.i) {
; CHECK-NEXT: s_mov_b32 s53, s12
; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11]
; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9]
-; CHECK-NEXT: s_mov_b64 s[46:47], s[6:7]
+; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_cvt_f64_i32_e32 v[2:3], v41
@@ -163,7 +163,7 @@ define double @test_pow_fast_f64__integral_y(double %x, i32 %y.i) {
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
; CHECK-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
-; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47]
+; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37]
; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
; CHECK-NEXT: s_mov_b32 s12, s53
@@ -185,8 +185,8 @@ define double @test_pow_fast_f64__integral_y(double %x, i32 %y.i) {
; CHECK-NEXT: v_readlane_b32 s50, v43, 10
; CHECK-NEXT: v_readlane_b32 s49, v43, 9
; CHECK-NEXT: v_readlane_b32 s48, v43, 8
-; CHECK-NEXT: v_readlane_b32 s47, v43, 7
-; CHECK-NEXT: v_readlane_b32 s46, v43, 6
+; CHECK-NEXT: v_readlane_b32 s39, v43, 7
+; CHECK-NEXT: v_readlane_b32 s38, v43, 6
; CHECK-NEXT: v_readlane_b32 s37, v43, 5
; CHECK-NEXT: v_readlane_b32 s36, v43, 4
; CHECK-NEXT: v_readlane_b32 s35, v43, 3
@@ -266,8 +266,8 @@ define double @test_powr_fast_f64(double %x, double %y) {
; CHECK-NEXT: v_writelane_b32 v43, s35, 3
; CHECK-NEXT: v_writelane_b32 v43, s36, 4
; CHECK-NEXT: v_writelane_b32 v43, s37, 5
-; CHECK-NEXT: v_writelane_b32 v43, s46, 6
-; CHECK-NEXT: v_writelane_b32 v43, s47, 7
+; CHECK-NEXT: v_writelane_b32 v43, s38, 6
+; CHECK-NEXT: v_writelane_b32 v43, s39, 7
; CHECK-NEXT: s_addk_i32 s32, 0x800
; CHECK-NEXT: v_writelane_b32 v43, s48, 8
; CHECK-NEXT: v_writelane_b32 v43, s49, 9
@@ -293,7 +293,7 @@ define double @test_powr_fast_f64(double %x, double %y) {
; CHECK-NEXT: s_mov_b32 s53, s12
; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11]
; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9]
-; CHECK-NEXT: s_mov_b64 s[46:47], s[6:7]
+; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_mul_f64 v[0:1], v[40:41], v[0:1]
@@ -302,7 +302,7 @@ define double @test_powr_fast_f64(double %x, double %y) {
; CHECK-NEXT: s_addc_u32 s5, s5, _Z4exp2d at gotpcrel32@hi+12
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
-; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47]
+; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37]
; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
; CHECK-NEXT: s_mov_b32 s12, s53
@@ -321,8 +321,8 @@ define double @test_powr_fast_f64(double %x, double %y) {
; CHECK-NEXT: v_readlane_b32 s50, v43, 10
; CHECK-NEXT: v_readlane_b32 s49, v43, 9
; CHECK-NEXT: v_readlane_b32 s48, v43, 8
-; CHECK-NEXT: v_readlane_b32 s47, v43, 7
-; CHECK-NEXT: v_readlane_b32 s46, v43, 6
+; CHECK-NEXT: v_readlane_b32 s39, v43, 7
+; CHECK-NEXT: v_readlane_b32 s38, v43, 6
; CHECK-NEXT: v_readlane_b32 s37, v43, 5
; CHECK-NEXT: v_readlane_b32 s36, v43, 4
; CHECK-NEXT: v_readlane_b32 s35, v43, 3
@@ -409,8 +409,8 @@ define double @test_pown_fast_f64(double %x, i32 %y) {
; CHECK-NEXT: v_writelane_b32 v43, s35, 3
; CHECK-NEXT: v_writelane_b32 v43, s36, 4
; CHECK-NEXT: v_writelane_b32 v43, s37, 5
-; CHECK-NEXT: v_writelane_b32 v43, s46, 6
-; CHECK-NEXT: v_writelane_b32 v43, s47, 7
+; CHECK-NEXT: v_writelane_b32 v43, s38, 6
+; CHECK-NEXT: v_writelane_b32 v43, s39, 7
; CHECK-NEXT: s_addk_i32 s32, 0x800
; CHECK-NEXT: v_writelane_b32 v43, s48, 8
; CHECK-NEXT: v_writelane_b32 v43, s49, 9
@@ -437,7 +437,7 @@ define double @test_pown_fast_f64(double %x, i32 %y) {
; CHECK-NEXT: s_mov_b32 s53, s12
; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11]
; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9]
-; CHECK-NEXT: s_mov_b64 s[46:47], s[6:7]
+; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_cvt_f64_i32_e32 v[2:3], v41
@@ -447,7 +447,7 @@ define double @test_pown_fast_f64(double %x, i32 %y) {
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
; CHECK-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
-; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47]
+; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37]
; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
; CHECK-NEXT: s_mov_b32 s12, s53
@@ -469,8 +469,8 @@ define double @test_pown_fast_f64(double %x, i32 %y) {
; CHECK-NEXT: v_readlane_b32 s50, v43, 10
; CHECK-NEXT: v_readlane_b32 s49, v43, 9
; CHECK-NEXT: v_readlane_b32 s48, v43, 8
-; CHECK-NEXT: v_readlane_b32 s47, v43, 7
-; CHECK-NEXT: v_readlane_b32 s46, v43, 6
+; CHECK-NEXT: v_readlane_b32 s39, v43, 7
+; CHECK-NEXT: v_readlane_b32 s38, v43, 6
; CHECK-NEXT: v_readlane_b32 s37, v43, 5
; CHECK-NEXT: v_readlane_b32 s36, v43, 4
; CHECK-NEXT: v_readlane_b32 s35, v43, 3
@@ -552,8 +552,8 @@ define double @test_pown_fast_f64_known_even(double %x, i32 %y.arg) {
; CHECK-NEXT: v_writelane_b32 v42, s35, 3
; CHECK-NEXT: v_writelane_b32 v42, s36, 4
; CHECK-NEXT: v_writelane_b32 v42, s37, 5
-; CHECK-NEXT: v_writelane_b32 v42, s46, 6
-; CHECK-NEXT: v_writelane_b32 v42, s47, 7
+; CHECK-NEXT: v_writelane_b32 v42, s38, 6
+; CHECK-NEXT: v_writelane_b32 v42, s39, 7
; CHECK-NEXT: s_addk_i32 s32, 0x400
; CHECK-NEXT: v_writelane_b32 v42, s48, 8
; CHECK-NEXT: v_writelane_b32 v42, s49, 9
@@ -577,7 +577,7 @@ define double @test_pown_fast_f64_known_even(double %x, i32 %y.arg) {
; CHECK-NEXT: s_mov_b32 s53, s12
; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11]
; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9]
-; CHECK-NEXT: s_mov_b64 s[46:47], s[6:7]
+; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
; CHECK-NEXT: v_lshlrev_b32_e32 v41, 1, v2
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -588,7 +588,7 @@ define double @test_pown_fast_f64_known_even(double %x, i32 %y.arg) {
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
; CHECK-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
-; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47]
+; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37]
; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
; CHECK-NEXT: s_mov_b32 s12, s53
@@ -606,8 +606,8 @@ define double @test_pown_fast_f64_known_even(double %x, i32 %y.arg) {
; CHECK-NEXT: v_readlane_b32 s50, v42, 10
; CHECK-NEXT: v_readlane_b32 s49, v42, 9
; CHECK-NEXT: v_readlane_b32 s48, v42, 8
-; CHECK-NEXT: v_readlane_b32 s47, v42, 7
-; CHECK-NEXT: v_readlane_b32 s46, v42, 6
+; CHECK-NEXT: v_readlane_b32 s39, v42, 7
+; CHECK-NEXT: v_readlane_b32 s38, v42, 6
; CHECK-NEXT: v_readlane_b32 s37, v42, 5
; CHECK-NEXT: v_readlane_b32 s36, v42, 4
; CHECK-NEXT: v_readlane_b32 s35, v42, 3
@@ -694,8 +694,8 @@ define double @test_pown_fast_f64_known_odd(double %x, i32 %y.arg) {
; CHECK-NEXT: v_writelane_b32 v43, s35, 3
; CHECK-NEXT: v_writelane_b32 v43, s36, 4
; CHECK-NEXT: v_writelane_b32 v43, s37, 5
-; CHECK-NEXT: v_writelane_b32 v43, s46, 6
-; CHECK-NEXT: v_writelane_b32 v43, s47, 7
+; CHECK-NEXT: v_writelane_b32 v43, s38, 6
+; CHECK-NEXT: v_writelane_b32 v43, s39, 7
; CHECK-NEXT: s_addk_i32 s32, 0x800
; CHECK-NEXT: v_writelane_b32 v43, s48, 8
; CHECK-NEXT: v_writelane_b32 v43, s49, 9
@@ -721,7 +721,7 @@ define double @test_pown_fast_f64_known_odd(double %x, i32 %y.arg) {
; CHECK-NEXT: s_mov_b32 s53, s12
; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11]
; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9]
-; CHECK-NEXT: s_mov_b64 s[46:47], s[6:7]
+; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
; CHECK-NEXT: v_or_b32_e32 v42, 1, v2
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -732,7 +732,7 @@ define double @test_pown_fast_f64_known_odd(double %x, i32 %y.arg) {
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
; CHECK-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
-; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47]
+; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37]
; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
; CHECK-NEXT: s_mov_b32 s12, s53
@@ -753,8 +753,8 @@ define double @test_pown_fast_f64_known_odd(double %x, i32 %y.arg) {
; CHECK-NEXT: v_readlane_b32 s50, v43, 10
; CHECK-NEXT: v_readlane_b32 s49, v43, 9
; CHECK-NEXT: v_readlane_b32 s48, v43, 8
-; CHECK-NEXT: v_readlane_b32 s47, v43, 7
-; CHECK-NEXT: v_readlane_b32 s46, v43, 6
+; CHECK-NEXT: v_readlane_b32 s39, v43, 7
+; CHECK-NEXT: v_readlane_b32 s38, v43, 6
; CHECK-NEXT: v_readlane_b32 s37, v43, 5
; CHECK-NEXT: v_readlane_b32 s36, v43, 4
; CHECK-NEXT: v_readlane_b32 s35, v43, 3
diff --git a/llvm/test/CodeGen/AMDGPU/bf16.ll b/llvm/test/CodeGen/AMDGPU/bf16.ll
index f36f5d5f00edc..eef02c5fe26ed 100644
--- a/llvm/test/CodeGen/AMDGPU/bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/bf16.ll
@@ -37552,21 +37552,21 @@ define <16 x bfloat> @v_vselect_v16bf16(<16 x i1> %cond, <16 x bfloat> %a, <16 x
; GCN-NEXT: v_and_b32_e32 v8, 1, v14
; GCN-NEXT: v_cmp_eq_u32_e64 s[28:29], 1, v7
; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s32
-; GCN-NEXT: v_cmp_eq_u32_e64 s[38:39], 1, v8
+; GCN-NEXT: v_cmp_eq_u32_e64 s[40:41], 1, v8
; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:64
; GCN-NEXT: v_and_b32_e32 v9, 1, v15
-; GCN-NEXT: v_cmp_eq_u32_e64 s[40:41], 1, v9
+; GCN-NEXT: v_cmp_eq_u32_e64 s[42:43], 1, v9
; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:60
; GCN-NEXT: s_waitcnt vmcnt(2)
; GCN-NEXT: v_mul_f32_e32 v7, 1.0, v7
; GCN-NEXT: s_waitcnt vmcnt(1)
; GCN-NEXT: v_mul_f32_e32 v8, 1.0, v8
-; GCN-NEXT: v_cndmask_b32_e64 v15, v8, v7, s[40:41]
+; GCN-NEXT: v_cndmask_b32_e64 v15, v8, v7, s[42:43]
; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:56
; GCN-NEXT: v_mul_f32_e32 v8, 1.0, v30
; GCN-NEXT: s_waitcnt vmcnt(1)
; GCN-NEXT: v_mul_f32_e32 v9, 1.0, v9
-; GCN-NEXT: v_cndmask_b32_e64 v14, v9, v8, s[38:39]
+; GCN-NEXT: v_cndmask_b32_e64 v14, v9, v8, s[40:41]
; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:52
; GCN-NEXT: v_mul_f32_e32 v9, 1.0, v29
; GCN-NEXT: s_waitcnt vmcnt(1)
@@ -37811,9 +37811,9 @@ define <16 x bfloat> @v_vselect_v16bf16(<16 x i1> %cond, <16 x bfloat> %a, <16 x
; GFX8-NEXT: v_and_b32_e32 v0, 1, v13
; GFX8-NEXT: v_cmp_eq_u32_e64 s[28:29], 1, v0
; GFX8-NEXT: v_and_b32_e32 v0, 1, v14
-; GFX8-NEXT: v_cmp_eq_u32_e64 s[38:39], 1, v0
-; GFX8-NEXT: v_and_b32_e32 v0, 1, v15
; GFX8-NEXT: v_cmp_eq_u32_e64 s[40:41], 1, v0
+; GFX8-NEXT: v_and_b32_e32 v0, 1, v15
+; GFX8-NEXT: v_cmp_eq_u32_e64 s[42:43], 1, v0
; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v22
; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v30
; GFX8-NEXT: v_cndmask_b32_e64 v6, v1, v0, s[28:29]
@@ -37839,9 +37839,9 @@ define <16 x bfloat> @v_vselect_v16bf16(<16 x i1> %cond, <16 x bfloat> %a, <16 x
; GFX8-NEXT: v_or_b32_sdwa v4, v9, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_sdwa v5, v8, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_waitcnt vmcnt(0)
-; GFX8-NEXT: v_cndmask_b32_e64 v10, v0, v23, s[38:39]
+; GFX8-NEXT: v_cndmask_b32_e64 v10, v0, v23, s[40:41]
; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0
-; GFX8-NEXT: v_cndmask_b32_e64 v11, v0, v1, s[40:41]
+; GFX8-NEXT: v_cndmask_b32_e64 v11, v0, v1, s[42:43]
; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v19
; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v27
; GFX8-NEXT: v_cndmask_b32_e64 v3, v1, v0, s[16:17]
@@ -38896,49 +38896,51 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x
; GFX8-NEXT: v_and_b32_e32 v0, 1, v13
; GFX8-NEXT: v_cmp_eq_u32_e64 s[28:29], 1, v0
; GFX8-NEXT: v_and_b32_e32 v0, 1, v14
-; GFX8-NEXT: v_cmp_eq_u32_e64 s[38:39], 1, v0
-; GFX8-NEXT: v_and_b32_e32 v0, 1, v15
; GFX8-NEXT: v_cmp_eq_u32_e64 s[40:41], 1, v0
-; GFX8-NEXT: v_and_b32_e32 v0, 1, v16
+; GFX8-NEXT: v_and_b32_e32 v0, 1, v15
; GFX8-NEXT: v_cmp_eq_u32_e64 s[42:43], 1, v0
-; GFX8-NEXT: v_and_b32_e32 v0, 1, v17
+; GFX8-NEXT: v_and_b32_e32 v0, 1, v16
; GFX8-NEXT: v_cmp_eq_u32_e64 s[44:45], 1, v0
+; GFX8-NEXT: v_and_b32_e32 v0, 1, v17
+; GFX8-NEXT: v_cmp_eq_u32_e64 s[46:47], 1, v0
; GFX8-NEXT: v_and_b32_e32 v0, 1, v18
-; GFX8-NEXT: v_cmp_eq_u32_e64 s[54:55], 1, v0
-; GFX8-NEXT: v_and_b32_e32 v0, 1, v19
; GFX8-NEXT: v_cmp_eq_u32_e64 s[56:57], 1, v0
-; GFX8-NEXT: v_and_b32_e32 v0, 1, v20
+; GFX8-NEXT: v_and_b32_e32 v0, 1, v19
; GFX8-NEXT: v_cmp_eq_u32_e64 s[58:59], 1, v0
-; GFX8-NEXT: v_and_b32_e32 v0, 1, v21
+; GFX8-NEXT: v_and_b32_e32 v0, 1, v20
; GFX8-NEXT: v_cmp_eq_u32_e64 s[60:61], 1, v0
+; GFX8-NEXT: v_and_b32_e32 v0, 1, v21
+; GFX8-NEXT: v_cmp_eq_u32_e64 s[62:63], 1, v0
; GFX8-NEXT: v_and_b32_e32 v0, 1, v22
-; GFX8-NEXT: v_cmp_eq_u32_e64 s[70:71], 1, v0
-; GFX8-NEXT: v_and_b32_e32 v0, 1, v23
; GFX8-NEXT: v_cmp_eq_u32_e64 s[72:73], 1, v0
-; GFX8-NEXT: v_and_b32_e32 v0, 1, v24
+; GFX8-NEXT: v_and_b32_e32 v0, 1, v23
; GFX8-NEXT: v_cmp_eq_u32_e64 s[74:75], 1, v0
-; GFX8-NEXT: v_and_b32_e32 v0, 1, v25
+; GFX8-NEXT: v_and_b32_e32 v0, 1, v24
; GFX8-NEXT: v_cmp_eq_u32_e64 s[76:77], 1, v0
-; GFX8-NEXT: v_and_b32_e32 v0, 1, v26
-; GFX8-NEXT: v_cmp_eq_u32_e64 s[86:87], 1, v0
-; GFX8-NEXT: v_and_b32_e32 v0, 1, v27
+; GFX8-NEXT: v_and_b32_e32 v0, 1, v25
; GFX8-NEXT: v_writelane_b32 v34, s30, 0
-; GFX8-NEXT: v_cmp_eq_u32_e64 s[88:89], 1, v0
-; GFX8-NEXT: v_and_b32_e32 v0, 1, v28
+; GFX8-NEXT: v_cmp_eq_u32_e64 s[78:79], 1, v0
+; GFX8-NEXT: v_and_b32_e32 v0, 1, v26
; GFX8-NEXT: v_writelane_b32 v34, s31, 1
-; GFX8-NEXT: v_cmp_eq_u32_e64 s[90:91], 1, v0
-; GFX8-NEXT: v_and_b32_e32 v0, 1, v29
+; GFX8-NEXT: v_cmp_eq_u32_e64 s[88:89], 1, v0
+; GFX8-NEXT: v_and_b32_e32 v0, 1, v27
; GFX8-NEXT: v_writelane_b32 v34, s34, 2
-; GFX8-NEXT: v_cmp_eq_u32_e64 s[30:31], 1, v0
-; GFX8-NEXT: v_and_b32_e32 v0, 1, v30
+; GFX8-NEXT: v_cmp_eq_u32_e64 s[90:91], 1, v0
+; GFX8-NEXT: v_and_b32_e32 v0, 1, v28
; GFX8-NEXT: v_writelane_b32 v34, s35, 3
-; GFX8-NEXT: v_cmp_eq_u32_e64 s[34:35], 1, v0
-; GFX8-NEXT: buffer_load_ushort v0, off, s[0:3], s32
+; GFX8-NEXT: v_cmp_eq_u32_e64 s[30:31], 1, v0
+; GFX8-NEXT: v_and_b32_e32 v0, 1, v29
; GFX8-NEXT: v_writelane_b32 v34, s36, 4
+; GFX8-NEXT: v_cmp_eq_u32_e64 s[34:35], 1, v0
+; GFX8-NEXT: v_and_b32_e32 v0, 1, v30
; GFX8-NEXT: v_writelane_b32 v34, s37, 5
+; GFX8-NEXT: v_cmp_eq_u32_e64 s[36:37], 1, v0
+; GFX8-NEXT: buffer_load_ushort v0, off, s[0:3], s32
+; GFX8-NEXT: v_writelane_b32 v34, s38, 6
+; GFX8-NEXT: v_writelane_b32 v34, s39, 7
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX8-NEXT: v_cmp_eq_u32_e64 s[36:37], 1, v0
+; GFX8-NEXT: v_cmp_eq_u32_e64 s[38:39], 1, v0
; GFX8-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:68
; GFX8-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4
; GFX8-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:72
@@ -38975,40 +38977,40 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x
; GFX8-NEXT: v_lshrrev_b32_e32 v33, 16, v29
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_lshrrev_b32_e32 v28, 16, v32
-; GFX8-NEXT: v_cndmask_b32_e64 v28, v33, v28, s[36:37]
-; GFX8-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[34:35]
+; GFX8-NEXT: v_cndmask_b32_e64 v28, v33, v28, s[38:39]
+; GFX8-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[36:37]
; GFX8-NEXT: v_lshrrev_b32_e32 v32, 16, v31
; GFX8-NEXT: v_lshrrev_b32_e32 v33, 16, v30
-; GFX8-NEXT: v_cndmask_b32_e64 v32, v33, v32, s[30:31]
-; GFX8-NEXT: v_cndmask_b32_e64 v30, v30, v31, s[90:91]
+; GFX8-NEXT: v_cndmask_b32_e64 v32, v33, v32, s[34:35]
+; GFX8-NEXT: v_cndmask_b32_e64 v30, v30, v31, s[30:31]
; GFX8-NEXT: v_lshrrev_b32_e32 v31, 16, v27
; GFX8-NEXT: v_lshrrev_b32_e32 v33, 16, v26
-; GFX8-NEXT: v_cndmask_b32_e64 v31, v33, v31, s[88:89]
-; GFX8-NEXT: v_cndmask_b32_e64 v26, v26, v27, s[86:87]
+; GFX8-NEXT: v_cndmask_b32_e64 v31, v33, v31, s[90:91]
+; GFX8-NEXT: v_cndmask_b32_e64 v26, v26, v27, s[88:89]
; GFX8-NEXT: v_lshrrev_b32_e32 v27, 16, v25
; GFX8-NEXT: v_lshrrev_b32_e32 v33, 16, v24
-; GFX8-NEXT: v_cndmask_b32_e64 v27, v33, v27, s[76:77]
-; GFX8-NEXT: v_cndmask_b32_e64 v24, v24, v25, s[74:75]
+; GFX8-NEXT: v_cndmask_b32_e64 v27, v33, v27, s[78:79]
+; GFX8-NEXT: v_cndmask_b32_e64 v24, v24, v25, s[76:77]
; GFX8-NEXT: v_lshrrev_b32_e32 v25, 16, v23
; GFX8-NEXT: v_lshrrev_b32_e32 v33, 16, v22
-; GFX8-NEXT: v_cndmask_b32_e64 v25, v33, v25, s[72:73]
-; GFX8-NEXT: v_cndmask_b32_e64 v22, v22, v23, s[70:71]
+; GFX8-NEXT: v_cndmask_b32_e64 v25, v33, v25, s[74:75]
+; GFX8-NEXT: v_cndmask_b32_e64 v22, v22, v23, s[72:73]
; GFX8-NEXT: v_lshrrev_b32_e32 v23, 16, v21
; GFX8-NEXT: v_lshrrev_b32_e32 v33, 16, v20
-; GFX8-NEXT: v_cndmask_b32_e64 v23, v33, v23, s[60:61]
-; GFX8-NEXT: v_cndmask_b32_e64 v20, v20, v21, s[58:59]
+; GFX8-NEXT: v_cndmask_b32_e64 v23, v33, v23, s[62:63]
+; GFX8-NEXT: v_cndmask_b32_e64 v20, v20, v21, s[60:61]
; GFX8-NEXT: v_lshrrev_b32_e32 v21, 16, v19
; GFX8-NEXT: v_lshrrev_b32_e32 v33, 16, v18
-; GFX8-NEXT: v_cndmask_b32_e64 v21, v33, v21, s[56:57]
-; GFX8-NEXT: v_cndmask_b32_e64 v18, v18, v19, s[54:55]
+; GFX8-NEXT: v_cndmask_b32_e64 v21, v33, v21, s[58:59]
+; GFX8-NEXT: v_cndmask_b32_e64 v18, v18, v19, s[56:57]
; GFX8-NEXT: v_lshrrev_b32_e32 v19, 16, v17
; GFX8-NEXT: v_lshrrev_b32_e32 v33, 16, v16
-; GFX8-NEXT: v_cndmask_b32_e64 v19, v33, v19, s[44:45]
-; GFX8-NEXT: v_cndmask_b32_e64 v16, v16, v17, s[42:43]
+; GFX8-NEXT: v_cndmask_b32_e64 v19, v33, v19, s[46:47]
+; GFX8-NEXT: v_cndmask_b32_e64 v16, v16, v17, s[44:45]
; GFX8-NEXT: v_lshrrev_b32_e32 v17, 16, v15
; GFX8-NEXT: v_lshrrev_b32_e32 v33, 16, v14
-; GFX8-NEXT: v_cndmask_b32_e64 v17, v33, v17, s[40:41]
-; GFX8-NEXT: v_cndmask_b32_e64 v14, v14, v15, s[38:39]
+; GFX8-NEXT: v_cndmask_b32_e64 v17, v33, v17, s[42:43]
+; GFX8-NEXT: v_cndmask_b32_e64 v14, v14, v15, s[40:41]
; GFX8-NEXT: v_lshrrev_b32_e32 v15, 16, v13
; GFX8-NEXT: v_lshrrev_b32_e32 v33, 16, v12
; GFX8-NEXT: v_cndmask_b32_e64 v15, v33, v15, s[28:29]
@@ -39069,6 +39071,8 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x
; GFX8-NEXT: v_or_b32_sdwa v13, v26, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_sdwa v14, v30, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_sdwa v15, v29, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_readlane_b32 s39, v34, 7
+; GFX8-NEXT: v_readlane_b32 s38, v34, 6
; GFX8-NEXT: v_readlane_b32 s37, v34, 5
; GFX8-NEXT: v_readlane_b32 s36, v34, 4
; GFX8-NEXT: v_readlane_b32 s35, v34, 3
@@ -39114,37 +39118,37 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x
; GFX9-NEXT: v_and_b32_e32 v0, 1, v12
; GFX9-NEXT: v_cmp_eq_u32_e64 s[28:29], 1, v0
; GFX9-NEXT: v_and_b32_e32 v0, 1, v15
-; GFX9-NEXT: v_cmp_eq_u32_e64 s[38:39], 1, v0
-; GFX9-NEXT: v_and_b32_e32 v0, 1, v14
; GFX9-NEXT: v_cmp_eq_u32_e64 s[40:41], 1, v0
-; GFX9-NEXT: v_and_b32_e32 v0, 1, v17
+; GFX9-NEXT: v_and_b32_e32 v0, 1, v14
; GFX9-NEXT: v_cmp_eq_u32_e64 s[42:43], 1, v0
-; GFX9-NEXT: v_and_b32_e32 v0, 1, v16
+; GFX9-NEXT: v_and_b32_e32 v0, 1, v17
; GFX9-NEXT: v_cmp_eq_u32_e64 s[44:45], 1, v0
+; GFX9-NEXT: v_and_b32_e32 v0, 1, v16
+; GFX9-NEXT: v_cmp_eq_u32_e64 s[46:47], 1, v0
; GFX9-NEXT: v_and_b32_e32 v0, 1, v19
-; GFX9-NEXT: v_cmp_eq_u32_e64 s[54:55], 1, v0
-; GFX9-NEXT: v_and_b32_e32 v0, 1, v18
; GFX9-NEXT: v_cmp_eq_u32_e64 s[56:57], 1, v0
-; GFX9-NEXT: v_and_b32_e32 v0, 1, v21
+; GFX9-NEXT: v_and_b32_e32 v0, 1, v18
; GFX9-NEXT: v_cmp_eq_u32_e64 s[58:59], 1, v0
-; GFX9-NEXT: v_and_b32_e32 v0, 1, v20
+; GFX9-NEXT: v_and_b32_e32 v0, 1, v21
; GFX9-NEXT: v_cmp_eq_u32_e64 s[60:61], 1, v0
+; GFX9-NEXT: v_and_b32_e32 v0, 1, v20
+; GFX9-NEXT: v_cmp_eq_u32_e64 s[62:63], 1, v0
; GFX9-NEXT: v_and_b32_e32 v0, 1, v23
-; GFX9-NEXT: v_cmp_eq_u32_e64 s[70:71], 1, v0
-; GFX9-NEXT: v_and_b32_e32 v0, 1, v22
; GFX9-NEXT: v_cmp_eq_u32_e64 s[72:73], 1, v0
-; GFX9-NEXT: v_and_b32_e32 v0, 1, v25
+; GFX9-NEXT: v_and_b32_e32 v0, 1, v22
; GFX9-NEXT: v_cmp_eq_u32_e64 s[74:75], 1, v0
-; GFX9-NEXT: v_and_b32_e32 v0, 1, v24
+; GFX9-NEXT: v_and_b32_e32 v0, 1, v25
; GFX9-NEXT: v_cmp_eq_u32_e64 s[76:77], 1, v0
+; GFX9-NEXT: v_and_b32_e32 v0, 1, v24
+; GFX9-NEXT: v_cmp_eq_u32_e64 s[78:79], 1, v0
; GFX9-NEXT: v_and_b32_e32 v0, 1, v27
-; GFX9-NEXT: v_cmp_eq_u32_e64 s[86:87], 1, v0
-; GFX9-NEXT: v_and_b32_e32 v0, 1, v26
; GFX9-NEXT: v_cmp_eq_u32_e64 s[88:89], 1, v0
-; GFX9-NEXT: v_and_b32_e32 v0, 1, v29
+; GFX9-NEXT: v_and_b32_e32 v0, 1, v26
; GFX9-NEXT: v_cmp_eq_u32_e64 s[90:91], 1, v0
-; GFX9-NEXT: v_and_b32_e32 v0, 1, v28
+; GFX9-NEXT: v_and_b32_e32 v0, 1, v29
; GFX9-NEXT: v_cmp_eq_u32_e64 s[92:93], 1, v0
+; GFX9-NEXT: v_and_b32_e32 v0, 1, v28
+; GFX9-NEXT: v_cmp_eq_u32_e64 s[94:95], 1, v0
; GFX9-NEXT: buffer_load_ushort v0, off, s[0:3], s32
; GFX9-NEXT: v_writelane_b32 v33, s30, 0
; GFX9-NEXT: v_writelane_b32 v33, s31, 1
@@ -39194,38 +39198,38 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x
; GFX9-NEXT: v_lshrrev_b32_e32 v32, 16, v32
; GFX9-NEXT: v_lshrrev_b32_e32 v31, 16, v31
; GFX9-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[30:31]
-; GFX9-NEXT: v_cndmask_b32_e64 v32, v28, v30, s[92:93]
+; GFX9-NEXT: v_cndmask_b32_e64 v32, v28, v30, s[94:95]
; GFX9-NEXT: v_lshrrev_b32_e32 v30, 16, v30
; GFX9-NEXT: v_lshrrev_b32_e32 v28, 16, v28
-; GFX9-NEXT: v_cndmask_b32_e64 v28, v28, v30, s[90:91]
-; GFX9-NEXT: v_cndmask_b32_e64 v30, v26, v27, s[88:89]
+; GFX9-NEXT: v_cndmask_b32_e64 v28, v28, v30, s[92:93]
+; GFX9-NEXT: v_cndmask_b32_e64 v30, v26, v27, s[90:91]
; GFX9-NEXT: v_lshrrev_b32_e32 v27, 16, v27
; GFX9-NEXT: v_lshrrev_b32_e32 v26, 16, v26
-; GFX9-NEXT: v_cndmask_b32_e64 v26, v26, v27, s[86:87]
-; GFX9-NEXT: v_cndmask_b32_e64 v27, v24, v25, s[76:77]
+; GFX9-NEXT: v_cndmask_b32_e64 v26, v26, v27, s[88:89]
+; GFX9-NEXT: v_cndmask_b32_e64 v27, v24, v25, s[78:79]
; GFX9-NEXT: v_lshrrev_b32_e32 v25, 16, v25
; GFX9-NEXT: v_lshrrev_b32_e32 v24, 16, v24
-; GFX9-NEXT: v_cndmask_b32_e64 v24, v24, v25, s[74:75]
-; GFX9-NEXT: v_cndmask_b32_e64 v25, v22, v23, s[72:73]
+; GFX9-NEXT: v_cndmask_b32_e64 v24, v24, v25, s[76:77]
+; GFX9-NEXT: v_cndmask_b32_e64 v25, v22, v23, s[74:75]
; GFX9-NEXT: v_lshrrev_b32_e32 v23, 16, v23
; GFX9-NEXT: v_lshrrev_b32_e32 v22, 16, v22
-; GFX9-NEXT: v_cndmask_b32_e64 v22, v22, v23, s[70:71]
-; GFX9-NEXT: v_cndmask_b32_e64 v23, v20, v21, s[60:61]
+; GFX9-NEXT: v_cndmask_b32_e64 v22, v22, v23, s[72:73]
+; GFX9-NEXT: v_cndmask_b32_e64 v23, v20, v21, s[62:63]
; GFX9-NEXT: v_lshrrev_b32_e32 v21, 16, v21
; GFX9-NEXT: v_lshrrev_b32_e32 v20, 16, v20
-; GFX9-NEXT: v_cndmask_b32_e64 v20, v20, v21, s[58:59]
-; GFX9-NEXT: v_cndmask_b32_e64 v21, v18, v19, s[56:57]
+; GFX9-NEXT: v_cndmask_b32_e64 v20, v20, v21, s[60:61]
+; GFX9-NEXT: v_cndmask_b32_e64 v21, v18, v19, s[58:59]
; GFX9-NEXT: v_lshrrev_b32_e32 v19, 16, v19
; GFX9-NEXT: v_lshrrev_b32_e32 v18, 16, v18
-; GFX9-NEXT: v_cndmask_b32_e64 v18, v18, v19, s[54:55]
-; GFX9-NEXT: v_cndmask_b32_e64 v19, v16, v17, s[44:45]
+; GFX9-NEXT: v_cndmask_b32_e64 v18, v18, v19, s[56:57]
+; GFX9-NEXT: v_cndmask_b32_e64 v19, v16, v17, s[46:47]
; GFX9-NEXT: v_lshrrev_b32_e32 v17, 16, v17
; GFX9-NEXT: v_lshrrev_b32_e32 v16, 16, v16
-; GFX9-NEXT: v_cndmask_b32_e64 v16, v16, v17, s[42:43]
-; GFX9-NEXT: v_cndmask_b32_e64 v17, v14, v15, s[40:41]
+; GFX9-NEXT: v_cndmask_b32_e64 v16, v16, v17, s[44:45]
+; GFX9-NEXT: v_cndmask_b32_e64 v17, v14, v15, s[42:43]
; GFX9-NEXT: v_lshrrev_b32_e32 v15, 16, v15
; GFX9-NEXT: v_lshrrev_b32_e32 v14, 16, v14
-; GFX9-NEXT: v_cndmask_b32_e64 v14, v14, v15, s[38:39]
+; GFX9-NEXT: v_cndmask_b32_e64 v14, v14, v15, s[40:41]
; GFX9-NEXT: v_cndmask_b32_e64 v15, v12, v13, s[28:29]
; GFX9-NEXT: v_lshrrev_b32_e32 v13, 16, v13
; GFX9-NEXT: v_lshrrev_b32_e32 v12, 16, v12
diff --git a/llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll b/llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll
index 85b9adfe6ea5c..36fa7b97b3c77 100644
--- a/llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll
+++ b/llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll
@@ -9,24 +9,24 @@ define amdgpu_kernel void @blender_no_live_segment_at_def_error(<4 x float> %ext
; CHECK-NEXT: s_addc_u32 s13, s13, 0
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13
-; CHECK-NEXT: s_load_dwordx8 s[96:103], s[8:9], 0x0
+; CHECK-NEXT: s_load_dwordx8 s[48:55], s[8:9], 0x0
; CHECK-NEXT: s_add_u32 s0, s0, s17
; CHECK-NEXT: s_addc_u32 s1, s1, 0
; CHECK-NEXT: s_mov_b32 s12, 0
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
-; CHECK-NEXT: s_cmp_lg_u32 s100, 0
+; CHECK-NEXT: s_cmp_lg_u32 s52, 0
; CHECK-NEXT: s_cbranch_scc1 .LBB0_8
; CHECK-NEXT: ; %bb.1: ; %if.end13.i.i
-; CHECK-NEXT: s_cmp_eq_u32 s102, 0
+; CHECK-NEXT: s_cmp_eq_u32 s54, 0
; CHECK-NEXT: s_cbranch_scc1 .LBB0_4
; CHECK-NEXT: ; %bb.2: ; %if.else251.i.i
-; CHECK-NEXT: s_cmp_lg_u32 s103, 0
+; CHECK-NEXT: s_cmp_lg_u32 s55, 0
; CHECK-NEXT: s_mov_b32 s17, 0
; CHECK-NEXT: s_cselect_b32 s12, -1, 0
; CHECK-NEXT: s_and_b32 vcc_lo, exec_lo, s12
; CHECK-NEXT: s_cbranch_vccz .LBB0_5
; CHECK-NEXT: ; %bb.3:
-; CHECK-NEXT: s_mov_b32 s96, 0
+; CHECK-NEXT: s_mov_b32 s48, 0
; CHECK-NEXT: s_andn2_b32 vcc_lo, exec_lo, s12
; CHECK-NEXT: s_cbranch_vccz .LBB0_6
; CHECK-NEXT: s_branch .LBB0_7
@@ -34,16 +34,16 @@ define amdgpu_kernel void @blender_no_live_segment_at_def_error(<4 x float> %ext
; CHECK-NEXT: s_mov_b32 s14, s12
; CHECK-NEXT: s_mov_b32 s15, s12
; CHECK-NEXT: s_mov_b32 s13, s12
-; CHECK-NEXT: s_mov_b64 s[98:99], s[14:15]
-; CHECK-NEXT: s_mov_b64 s[96:97], s[12:13]
+; CHECK-NEXT: s_mov_b64 s[50:51], s[14:15]
+; CHECK-NEXT: s_mov_b64 s[48:49], s[12:13]
; CHECK-NEXT: s_branch .LBB0_7
; CHECK-NEXT: .LBB0_5: ; %if.then263.i.i
-; CHECK-NEXT: v_cmp_lt_f32_e64 s12, s101, 0
-; CHECK-NEXT: s_mov_b32 s96, 1.0
+; CHECK-NEXT: v_cmp_lt_f32_e64 s12, s53, 0
+; CHECK-NEXT: s_mov_b32 s48, 1.0
; CHECK-NEXT: s_mov_b32 s17, 0x7fc00000
-; CHECK-NEXT: s_mov_b32 s97, s96
-; CHECK-NEXT: s_mov_b32 s98, s96
-; CHECK-NEXT: s_mov_b32 s99, s96
+; CHECK-NEXT: s_mov_b32 s49, s48
+; CHECK-NEXT: s_mov_b32 s50, s48
+; CHECK-NEXT: s_mov_b32 s51, s48
; CHECK-NEXT: s_andn2_b32 vcc_lo, exec_lo, s12
; CHECK-NEXT: s_cbranch_vccnz .LBB0_7
; CHECK-NEXT: .LBB0_6: ; %if.end273.i.i
@@ -55,7 +55,7 @@ define amdgpu_kernel void @blender_no_live_segment_at_def_error(<4 x float> %ext
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; CHECK-NEXT: s_load_dwordx2 s[18:19], s[18:19], 0x0
; CHECK-NEXT: v_lshlrev_b32_e32 v3, 10, v1
-; CHECK-NEXT: v_add_f32_e64 v1, s17, s96
+; CHECK-NEXT: v_add_f32_e64 v1, s17, s48
; CHECK-NEXT: s_mov_b64 s[34:35], s[8:9]
; CHECK-NEXT: s_mov_b64 s[8:9], s[12:13]
; CHECK-NEXT: s_mov_b32 s12, s14
@@ -65,13 +65,13 @@ define amdgpu_kernel void @blender_no_live_segment_at_def_error(<4 x float> %ext
; CHECK-NEXT: v_mov_b32_e32 v2, 0
; CHECK-NEXT: s_mov_b32 s13, s15
; CHECK-NEXT: s_mov_b32 s14, s16
-; CHECK-NEXT: s_mov_b32 s96, 0
+; CHECK-NEXT: s_mov_b32 s48, 0
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[18:19]
; CHECK-NEXT: s_mov_b64 s[8:9], s[34:35]
-; CHECK-NEXT: s_mov_b32 s97, s96
-; CHECK-NEXT: s_mov_b32 s98, s96
-; CHECK-NEXT: s_mov_b32 s99, s96
+; CHECK-NEXT: s_mov_b32 s49, s48
+; CHECK-NEXT: s_mov_b32 s50, s48
+; CHECK-NEXT: s_mov_b32 s51, s48
; CHECK-NEXT: .LBB0_7: ; %if.end294.i.i
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:12
@@ -80,11 +80,11 @@ define amdgpu_kernel void @blender_no_live_segment_at_def_error(<4 x float> %ext
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0
; CHECK-NEXT: .LBB0_8: ; %kernel_direct_lighting.exit
; CHECK-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x20
-; CHECK-NEXT: v_mov_b32_e32 v0, s96
+; CHECK-NEXT: v_mov_b32_e32 v0, s48
; CHECK-NEXT: v_mov_b32_e32 v4, 0
-; CHECK-NEXT: v_mov_b32_e32 v1, s97
-; CHECK-NEXT: v_mov_b32_e32 v2, s98
-; CHECK-NEXT: v_mov_b32_e32 v3, s99
+; CHECK-NEXT: v_mov_b32_e32 v1, s49
+; CHECK-NEXT: v_mov_b32_e32 v2, s50
+; CHECK-NEXT: v_mov_b32_e32 v3, s51
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5]
; CHECK-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
index bce02a4cfacde..e43a021802644 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
@@ -79,16 +79,16 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr18_sgpr19
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr34_sgpr35 = S_MOV_B64 -1
+ ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_MOV_B64 0
; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_MOV_B64 0
; GFX90A-NEXT: renamable $sgpr40_sgpr41 = S_MOV_B64 0
- ; GFX90A-NEXT: renamable $sgpr38_sgpr39 = S_MOV_B64 0
; GFX90A-NEXT: renamable $sgpr66_sgpr67 = S_MOV_B64 0
; GFX90A-NEXT: renamable $sgpr64_sgpr65 = S_MOV_B64 0
- ; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_MOV_B64 0
+ ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_MOV_B64 0
; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_MOV_B64 0
; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_MOV_B64 0
; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_MOV_B64 0
- ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_MOV_B64 0
+ ; GFX90A-NEXT: renamable $sgpr38_sgpr39 = S_MOV_B64 0
; GFX90A-NEXT: renamable $vgpr8_vgpr9 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr6_vgpr7 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr4_vgpr5 = IMPLICIT_DEF
@@ -111,7 +111,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.6.Flow20:
; GFX90A-NEXT: successors: %bb.7(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr19 = COPY renamable $sgpr17, implicit $exec
; GFX90A-NEXT: renamable $vgpr18 = COPY $sgpr17, implicit $exec
@@ -124,7 +124,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.7.Flow19:
; GFX90A-NEXT: successors: %bb.62(0x40000000), %bb.8(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_MOV_B64 0
; GFX90A-NEXT: $sgpr24_sgpr25 = S_AND_SAVEEXEC_B64 $sgpr36_sgpr37, implicit-def $exec, implicit-def $scc, implicit $exec
@@ -132,7 +132,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.8.Flow32:
; GFX90A-NEXT: successors: %bb.9(0x40000000), %bb.10(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr24_sgpr25, implicit-def $scc
; GFX90A-NEXT: $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $sgpr18_sgpr19, implicit-def $exec, implicit-def $scc, implicit $exec
@@ -141,7 +141,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.9.bb89:
; GFX90A-NEXT: successors: %bb.10(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
@@ -149,16 +149,16 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.10.Flow33:
; GFX90A-NEXT: successors: %bb.11(0x40000000), %bb.12(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def $scc
- ; GFX90A-NEXT: $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $sgpr42_sgpr43, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX90A-NEXT: $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $sgpr46_sgpr47, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A-NEXT: renamable $sgpr12_sgpr13 = S_XOR_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def dead $scc
; GFX90A-NEXT: S_CBRANCH_EXECZ %bb.12, implicit $exec
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.11.bb84:
; GFX90A-NEXT: successors: %bb.12(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
@@ -166,16 +166,16 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.12.Flow34:
; GFX90A-NEXT: successors: %bb.13(0x40000000), %bb.14(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def $scc
- ; GFX90A-NEXT: $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $sgpr40_sgpr41, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX90A-NEXT: $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $sgpr42_sgpr43, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A-NEXT: renamable $sgpr12_sgpr13 = S_XOR_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def dead $scc
; GFX90A-NEXT: S_CBRANCH_EXECZ %bb.14, implicit $exec
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.13.bb79:
; GFX90A-NEXT: successors: %bb.14(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
@@ -183,16 +183,16 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.14.Flow35:
; GFX90A-NEXT: successors: %bb.15(0x40000000), %bb.16(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def $scc
- ; GFX90A-NEXT: $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $sgpr38_sgpr39, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX90A-NEXT: $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $sgpr40_sgpr41, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A-NEXT: renamable $sgpr36_sgpr37 = S_XOR_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def dead $scc
; GFX90A-NEXT: S_CBRANCH_EXECZ %bb.16, implicit $exec
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.15.bb72:
; GFX90A-NEXT: successors: %bb.16(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr8 = S_ADD_U32 renamable $sgpr8, 48, implicit-def $scc
; GFX90A-NEXT: renamable $sgpr9 = S_ADDC_U32 killed renamable $sgpr9, 0, implicit-def dead $scc, implicit killed $scc
@@ -206,7 +206,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.16.Flow36:
; GFX90A-NEXT: successors: %bb.17(0x40000000), %bb.18(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr36_sgpr37, implicit-def $scc
; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr66_sgpr67, implicit-def $exec, implicit-def $scc, implicit $exec
@@ -215,7 +215,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.17.bb67:
; GFX90A-NEXT: successors: %bb.18(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
@@ -223,7 +223,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.18.Flow37:
; GFX90A-NEXT: successors: %bb.19(0x40000000), %bb.20(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc
; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr64_sgpr65, implicit-def $exec, implicit-def $scc, implicit $exec
@@ -232,7 +232,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.19.bb62:
; GFX90A-NEXT: successors: %bb.20(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
@@ -240,16 +240,16 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.20.Flow38:
; GFX90A-NEXT: successors: %bb.21(0x40000000), %bb.22(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc
- ; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr62_sgpr63, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr54_sgpr55, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A-NEXT: renamable $sgpr4_sgpr5 = S_XOR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
; GFX90A-NEXT: S_CBRANCH_EXECZ %bb.22, implicit $exec
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.21.bb54:
; GFX90A-NEXT: successors: %bb.22(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
@@ -257,7 +257,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.22.Flow39:
; GFX90A-NEXT: successors: %bb.23(0x40000000), %bb.24(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc
; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr52_sgpr53, implicit-def $exec, implicit-def $scc, implicit $exec
@@ -266,7 +266,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.23.bb47:
; GFX90A-NEXT: successors: %bb.24(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
@@ -274,7 +274,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.24.Flow40:
; GFX90A-NEXT: successors: %bb.25(0x40000000), %bb.26(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc
; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr50_sgpr51, implicit-def $exec, implicit-def $scc, implicit $exec
@@ -283,7 +283,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.25.bb40:
; GFX90A-NEXT: successors: %bb.26(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr48_sgpr49, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
@@ -291,7 +291,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.26.Flow41:
; GFX90A-NEXT: successors: %bb.27(0x40000000), %bb.28(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr48_sgpr49, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc
; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr48_sgpr49, implicit-def $exec, implicit-def $scc, implicit $exec
@@ -300,7 +300,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.27.bb33:
; GFX90A-NEXT: successors: %bb.28(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr46_sgpr47, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
@@ -308,10 +308,10 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.28.Flow42:
; GFX90A-NEXT: successors: %bb.34(0x40000000), %bb.29(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr46_sgpr47, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc
- ; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr46_sgpr47, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr38_sgpr39, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A-NEXT: renamable $sgpr4_sgpr5 = S_XOR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.34, implicit $exec
; GFX90A-NEXT: {{ $}}
@@ -365,13 +365,13 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $vgpr0 = GLOBAL_LOAD_SBYTE renamable $vgpr40_vgpr41, 1024, 0, implicit $exec :: (load (s8) from %ir.i21, addrspace 1)
; GFX90A-NEXT: renamable $vgpr42 = V_ADD_CO_U32_e32 1024, $vgpr40, implicit-def $vcc, implicit $exec
; GFX90A-NEXT: renamable $sgpr34_sgpr35 = S_MOV_B64 0
- ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_MOV_B64 -1
+ ; GFX90A-NEXT: renamable $sgpr38_sgpr39 = S_MOV_B64 -1
+ ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_MOV_B64 0
; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_MOV_B64 0
; GFX90A-NEXT: renamable $sgpr40_sgpr41 = S_MOV_B64 0
- ; GFX90A-NEXT: renamable $sgpr38_sgpr39 = S_MOV_B64 0
; GFX90A-NEXT: renamable $sgpr66_sgpr67 = S_MOV_B64 0
; GFX90A-NEXT: renamable $sgpr64_sgpr65 = S_MOV_B64 0
- ; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_MOV_B64 0
+ ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_MOV_B64 0
; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_MOV_B64 0
; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_MOV_B64 0
; GFX90A-NEXT: renamable $vgpr43, dead renamable $vcc = V_ADDC_U32_e64 0, $vgpr41, killed $vcc, 0, implicit $exec
@@ -400,22 +400,22 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.36.Flow21:
; GFX90A-NEXT: successors: %bb.6(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr24_sgpr25, implicit-def $scc
; GFX90A-NEXT: S_BRANCH %bb.6
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.37.bb27:
; GFX90A-NEXT: successors: %bb.39(0x40000000), %bb.38(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr33, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr44_sgpr45:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr18_sgpr19, $sgpr42_sgpr43, $sgpr64_sgpr65, $sgpr62_sgpr63, $sgpr52_sgpr53, $sgpr66_sgpr67, $sgpr48_sgpr49, $sgpr50_sgpr51
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr33, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr44_sgpr45:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr18_sgpr19, $sgpr46_sgpr47, $sgpr42_sgpr43, $sgpr54_sgpr55, $sgpr52_sgpr53, $sgpr64_sgpr65, $sgpr50_sgpr51, $sgpr66_sgpr67
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr0 = GLOBAL_LOAD_UBYTE renamable $vgpr40_vgpr41, 2048, 0, implicit $exec :: (load (s8) from %ir.i28, addrspace 1)
; GFX90A-NEXT: renamable $vgpr44 = V_ADD_CO_U32_e32 2048, $vgpr40, implicit-def $vcc, implicit $exec
; GFX90A-NEXT: renamable $sgpr40_sgpr41 = S_MOV_B64 -1
- ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = COPY renamable $sgpr36_sgpr37
- ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_MOV_B64 0
+ ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = COPY renamable $sgpr36_sgpr37
; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_MOV_B64 0
; GFX90A-NEXT: renamable $sgpr60_sgpr61 = S_MOV_B64 0
+ ; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_MOV_B64 0
; GFX90A-NEXT: renamable $vgpr45, dead renamable $vcc = V_ADDC_U32_e64 0, $vgpr41, killed $vcc, 0, implicit $exec
; GFX90A-NEXT: renamable $vcc = V_CMP_EQ_U16_e64 0, killed $vgpr0, implicit $exec
; GFX90A-NEXT: renamable $vgpr8_vgpr9 = IMPLICIT_DEF
@@ -440,34 +440,34 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.38.Flow22:
; GFX90A-NEXT: successors: %bb.36(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr38_sgpr39, implicit-def $scc
- ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_XOR_B64 $exec, -1, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr38_sgpr39 = S_XOR_B64 $exec, -1, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_AND_B64 killed renamable $sgpr40_sgpr41, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_AND_B64 killed renamable $sgpr66_sgpr67, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_AND_B64 killed renamable $sgpr64_sgpr65, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_AND_B64 killed renamable $sgpr52_sgpr53, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_AND_B64 killed renamable $sgpr62_sgpr63, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr64_sgpr65 = S_AND_B64 killed renamable $sgpr64_sgpr65, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_AND_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr64_sgpr65 = S_AND_B64 killed renamable $sgpr62_sgpr63, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr66_sgpr67 = S_AND_B64 killed renamable $sgpr60_sgpr61, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr38_sgpr39 = S_AND_B64 killed renamable $sgpr58_sgpr59, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr40_sgpr41 = S_AND_B64 killed renamable $sgpr56_sgpr57, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr40_sgpr41 = S_AND_B64 killed renamable $sgpr58_sgpr59, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_AND_B64 killed renamable $sgpr42_sgpr43, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_AND_B64 killed renamable $sgpr46_sgpr47, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr18_sgpr19 = S_AND_B64 killed renamable $sgpr18_sgpr19, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr36_sgpr37 = S_ANDN2_B64 killed renamable $sgpr36_sgpr37, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_AND_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_AND_B64 killed renamable $sgpr56_sgpr57, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr36_sgpr37 = S_OR_B64 killed renamable $sgpr36_sgpr37, killed renamable $sgpr44_sgpr45, implicit-def dead $scc
; GFX90A-NEXT: S_BRANCH %bb.36
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.39.bb34:
; GFX90A-NEXT: successors: %bb.41(0x40000000), %bb.40(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr33, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr44_sgpr45:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr18_sgpr19, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr33, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr44_sgpr45:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr18_sgpr19, $sgpr46_sgpr47, $sgpr50_sgpr51, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr54_sgpr55, $sgpr62_sgpr63, $sgpr52_sgpr53, $sgpr64_sgpr65, $sgpr66_sgpr67
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr0 = GLOBAL_LOAD_UBYTE renamable $vgpr40_vgpr41, 3072, 0, implicit $exec :: (load (s8) from %ir.i35, addrspace 1)
; GFX90A-NEXT: renamable $vgpr56 = V_ADD_CO_U32_e32 3072, $vgpr40, implicit-def $vcc, implicit $exec
; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_MOV_B64 -1
- ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = COPY renamable $sgpr36_sgpr37
- ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_MOV_B64 0
+ ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = COPY renamable $sgpr36_sgpr37
+ ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_MOV_B64 0
; GFX90A-NEXT: renamable $vgpr57, dead renamable $vcc = V_ADDC_U32_e64 0, $vgpr41, killed $vcc, 0, implicit $exec
; GFX90A-NEXT: renamable $vcc = V_CMP_EQ_U16_e64 0, killed $vgpr0, implicit $exec
; GFX90A-NEXT: renamable $vgpr8_vgpr9 = IMPLICIT_DEF
@@ -495,23 +495,23 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr40_sgpr41, implicit-def $scc
; GFX90A-NEXT: renamable $sgpr40_sgpr41 = S_XOR_B64 $exec, -1, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr66_sgpr67 = S_AND_B64 killed renamable $sgpr42_sgpr43, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr64_sgpr65 = S_AND_B64 killed renamable $sgpr42_sgpr43, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_AND_B64 killed renamable $sgpr62_sgpr63, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_AND_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_AND_B64 killed renamable $sgpr60_sgpr61, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr64_sgpr65 = S_AND_B64 killed renamable $sgpr58_sgpr59, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr60_sgpr61 = S_AND_B64 killed renamable $sgpr56_sgpr57, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_AND_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_AND_B64 killed renamable $sgpr50_sgpr51, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_AND_B64 killed renamable $sgpr48_sgpr49, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr60_sgpr61 = S_AND_B64 killed renamable $sgpr58_sgpr59, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_AND_B64 killed renamable $sgpr56_sgpr57, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_AND_B64 killed renamable $sgpr50_sgpr51, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_AND_B64 killed renamable $sgpr46_sgpr47, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr18_sgpr19 = S_AND_B64 killed renamable $sgpr18_sgpr19, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_ANDN2_B64 renamable $sgpr36_sgpr37, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_AND_B64 killed renamable $sgpr46_sgpr47, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_OR_B64 killed renamable $sgpr44_sgpr45, killed renamable $sgpr46_sgpr47, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_AND_B64 killed renamable $sgpr48_sgpr49, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_OR_B64 killed renamable $sgpr44_sgpr45, killed renamable $sgpr48_sgpr49, implicit-def dead $scc
; GFX90A-NEXT: S_BRANCH %bb.38
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.41.bb41:
; GFX90A-NEXT: successors: %bb.46(0x40000000), %bb.42(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr33, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr44_sgpr45:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr64_sgpr65, $sgpr60_sgpr61, $sgpr66_sgpr67
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr33, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr44_sgpr45:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr52_sgpr53, $sgpr50_sgpr51, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr58 = V_ADD_CO_U32_e32 4096, $vgpr40, implicit-def $vcc, implicit $exec
; GFX90A-NEXT: renamable $sgpr18_sgpr19 = COPY $vcc
@@ -519,7 +519,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $vgpr0 = GLOBAL_LOAD_UBYTE renamable $vgpr58_vgpr59, 0, 0, implicit $exec :: (load (s8) from %ir.i42, addrspace 1)
; GFX90A-NEXT: renamable $sgpr18_sgpr19 = S_MOV_B64 0
; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_MOV_B64 -1
- ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = COPY renamable $sgpr36_sgpr37
+ ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = COPY renamable $sgpr36_sgpr37
; GFX90A-NEXT: renamable $vgpr18, dead renamable $vcc = V_ADDC_U32_e64 0, $vgpr41, killed $vcc, 0, implicit $exec
; GFX90A-NEXT: renamable $vcc = V_CMP_EQ_U16_e64 0, killed $vgpr0, implicit $exec
; GFX90A-NEXT: renamable $vgpr8_vgpr9 = IMPLICIT_DEF
@@ -542,22 +542,22 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.42.Flow24:
; GFX90A-NEXT: successors: %bb.40(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr18, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr18, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr42_sgpr43, implicit-def $scc
; GFX90A-NEXT: renamable $vgpr59 = COPY killed renamable $vgpr18, implicit $exec
; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_XOR_B64 $exec, -1, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_AND_B64 killed renamable $sgpr46_sgpr47, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr60_sgpr61 = S_AND_B64 killed renamable $sgpr64_sgpr65, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_AND_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr60_sgpr61 = S_AND_B64 killed renamable $sgpr60_sgpr61, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_AND_B64 killed renamable $sgpr58_sgpr59, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_AND_B64 killed renamable $sgpr56_sgpr57, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_AND_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_AND_B64 killed renamable $sgpr50_sgpr51, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_AND_B64 killed renamable $sgpr48_sgpr49, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_AND_B64 killed renamable $sgpr52_sgpr53, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr18_sgpr19 = S_AND_B64 killed renamable $sgpr18_sgpr19, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_ANDN2_B64 renamable $sgpr36_sgpr37, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_AND_B64 killed renamable $sgpr52_sgpr53, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_OR_B64 killed renamable $sgpr44_sgpr45, killed renamable $sgpr46_sgpr47, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_AND_B64 killed renamable $sgpr48_sgpr49, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_OR_B64 killed renamable $sgpr44_sgpr45, killed renamable $sgpr48_sgpr49, implicit-def dead $scc
; GFX90A-NEXT: S_BRANCH %bb.40
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.43.bb55:
@@ -596,29 +596,30 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: successors: %bb.47(0x80000000)
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr18, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_XOR_B64 $exec, -1, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr60_sgpr61 = S_XOR_B64 $exec, -1, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr70_sgpr71 = S_AND_B64 killed renamable $sgpr46_sgpr47, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_AND_B64 killed renamable $sgpr48_sgpr49, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr66_sgpr67 = S_AND_B64 killed renamable $sgpr50_sgpr51, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_AND_B64 killed renamable $sgpr56_sgpr57, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_AND_B64 killed renamable $sgpr58_sgpr59, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr60_sgpr61 = S_AND_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr64_sgpr65 = S_AND_B64 killed renamable $sgpr58_sgpr59, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_AND_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_ANDN2_B64 renamable $sgpr36_sgpr37, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_AND_B64 killed renamable $sgpr52_sgpr53, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_OR_B64 killed renamable $sgpr44_sgpr45, killed renamable $sgpr46_sgpr47, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_OR_B64 killed renamable $sgpr44_sgpr45, killed renamable $sgpr46_sgpr47, implicit-def dead $scc
; GFX90A-NEXT: S_BRANCH %bb.47
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.46.bb48:
; GFX90A-NEXT: successors: %bb.43(0x40000000), %bb.47(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr33, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr60_sgpr61, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr66_sgpr67, $sgpr46_sgpr47, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr56_sgpr57
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr33, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr64_sgpr65, $sgpr50_sgpr51, $sgpr66_sgpr67, $sgpr46_sgpr47, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr56_sgpr57
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr60 = V_ADD_CO_U32_e32 5120, $vgpr40, implicit-def $vcc, implicit $exec
; GFX90A-NEXT: renamable $sgpr18_sgpr19 = COPY $vcc
; GFX90A-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 4096, $vgpr40, implicit-def $vcc, implicit $exec
; GFX90A-NEXT: renamable $vgpr1, dead renamable $vcc = V_ADDC_U32_e64 0, $vgpr41, killed $vcc, 0, implicit $exec
; GFX90A-NEXT: renamable $vgpr0 = GLOBAL_LOAD_UBYTE killed renamable $vgpr0_vgpr1, 1024, 0, implicit $exec :: (load (s8) from %ir.i49, addrspace 1)
- ; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_MOV_B64 -1
- ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = COPY renamable $sgpr36_sgpr37
+ ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_MOV_B64 0
+ ; GFX90A-NEXT: renamable $sgpr60_sgpr61 = S_MOV_B64 -1
+ ; GFX90A-NEXT: renamable $sgpr62_sgpr63 = COPY renamable $sgpr36_sgpr37
; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_MOV_B64 0
; GFX90A-NEXT: renamable $vgpr61, dead renamable $vcc = V_ADDC_U32_e64 0, $vgpr41, killed $sgpr18_sgpr19, 0, implicit $exec
; GFX90A-NEXT: renamable $vcc = V_CMP_EQ_U16_e64 0, killed $vgpr0, implicit $exec
@@ -642,20 +643,20 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.47.Flow25:
; GFX90A-NEXT: successors: %bb.42(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr18, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr66_sgpr67, $sgpr68_sgpr69, $sgpr70_sgpr71, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr18, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $sgpr70_sgpr71, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr18_sgpr19, implicit-def $scc
; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_XOR_B64 $exec, -1, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr64_sgpr65 = S_AND_B64 killed renamable $sgpr62_sgpr63, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_AND_B64 killed renamable $sgpr70_sgpr71, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_AND_B64 killed renamable $sgpr68_sgpr69, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_AND_B64 killed renamable $sgpr66_sgpr67, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_AND_B64 killed renamable $sgpr60_sgpr61, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr60_sgpr61 = S_AND_B64 killed renamable $sgpr70_sgpr71, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_AND_B64 killed renamable $sgpr68_sgpr69, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_AND_B64 killed renamable $sgpr66_sgpr67, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_AND_B64 killed renamable $sgpr50_sgpr51, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_AND_B64 killed renamable $sgpr48_sgpr49, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr18_sgpr19 = S_AND_B64 killed renamable $sgpr60_sgpr61, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_AND_B64 killed renamable $sgpr64_sgpr65, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr18_sgpr19 = S_AND_B64 killed renamable $sgpr48_sgpr49, $exec, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_ANDN2_B64 renamable $sgpr36_sgpr37, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_AND_B64 killed renamable $sgpr52_sgpr53, $exec, implicit-def dead $scc
- ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_OR_B64 killed renamable $sgpr44_sgpr45, killed renamable $sgpr52_sgpr53, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_AND_B64 killed renamable $sgpr62_sgpr63, $exec, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_OR_B64 killed renamable $sgpr44_sgpr45, killed renamable $sgpr48_sgpr49, implicit-def dead $scc
; GFX90A-NEXT: S_BRANCH %bb.42
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.48.bb63:
@@ -790,16 +791,16 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr15 = COPY killed renamable $sgpr23, implicit $exec
; GFX90A-NEXT: renamable $vgpr17 = COPY killed renamable $sgpr17, implicit $exec
+ ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_MOV_B64 0
; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_MOV_B64 0
; GFX90A-NEXT: renamable $sgpr40_sgpr41 = S_MOV_B64 0
- ; GFX90A-NEXT: renamable $sgpr38_sgpr39 = S_MOV_B64 0
; GFX90A-NEXT: renamable $sgpr66_sgpr67 = S_MOV_B64 0
; GFX90A-NEXT: renamable $sgpr64_sgpr65 = S_MOV_B64 0
- ; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_MOV_B64 0
+ ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_MOV_B64 0
; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_MOV_B64 0
; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_MOV_B64 0
; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_MOV_B64 0
- ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_MOV_B64 0
+ ; GFX90A-NEXT: renamable $sgpr38_sgpr39 = S_MOV_B64 0
; GFX90A-NEXT: renamable $vgpr8_vgpr9 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr6_vgpr7 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr4_vgpr5 = IMPLICIT_DEF
@@ -883,7 +884,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.62.bb140:
; GFX90A-NEXT: successors: %bb.68(0x40000000), %bb.63(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr36_sgpr37 = S_MOV_B64 -1
; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, killed renamable $sgpr30_sgpr31, implicit-def dead $scc
@@ -891,14 +892,14 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.63.Flow13:
; GFX90A-NEXT: successors: %bb.64(0x40000000), %bb.66(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $vcc = S_ANDN2_B64 $exec, killed renamable $sgpr36_sgpr37, implicit-def dead $scc
; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.66, implicit $vcc
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.64.bb159:
; GFX90A-NEXT: successors: %bb.67(0x40000000), %bb.65(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vcc = V_CMP_NE_U32_e64 0, killed $vgpr30, implicit $exec
; GFX90A-NEXT: $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
@@ -907,21 +908,21 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.65.Flow10:
; GFX90A-NEXT: successors: %bb.66(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $sgpr12_sgpr13 = S_ANDN2_SAVEEXEC_B64 $sgpr12_sgpr13, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def $scc
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.66.Flow14:
; GFX90A-NEXT: successors: %bb.8(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr68_sgpr69 = COPY $exec
; GFX90A-NEXT: S_BRANCH %bb.8
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.67.bb161:
; GFX90A-NEXT: successors: %bb.65(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr2 = V_OR_B32_e32 killed $vgpr21, killed $vgpr23, implicit $exec
; GFX90A-NEXT: renamable $vgpr2 = V_OR_B32_e32 killed $vgpr2, killed $vgpr25, implicit $exec
@@ -940,7 +941,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.68.bb174:
; GFX90A-NEXT: successors: %bb.72(0x40000000), %bb.69(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr26 = V_OR_B32_e32 1, $vgpr24, implicit $exec
; GFX90A-NEXT: renamable $vgpr48 = V_OR_B32_e32 $vgpr26, $vgpr22, implicit $exec
@@ -956,14 +957,14 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.69.Flow:
; GFX90A-NEXT: successors: %bb.70(0x40000000), %bb.71(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x0000000000000003, $vgpr28_vgpr29:0x0000000000000003, $vgpr32_vgpr33:0x0000000000000003, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x0000000000000003, $vgpr28_vgpr29:0x0000000000000003, $vgpr32_vgpr33:0x0000000000000003, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $vcc = S_ANDN2_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def dead $scc
; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.71, implicit $vcc
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.70.bb186:
; GFX90A-NEXT: successors: %bb.71(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x0000000000000003, $vgpr28_vgpr29:0x0000000000000003, $vgpr32_vgpr33:0x0000000000000003, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x0000000000000003, $vgpr28_vgpr29:0x0000000000000003, $vgpr32_vgpr33:0x0000000000000003, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr2_vgpr3 = V_LSHLREV_B64_e64 3, killed $vgpr2_vgpr3, implicit $exec
; GFX90A-NEXT: renamable $vgpr10 = COPY renamable $sgpr27, implicit $exec
@@ -992,14 +993,14 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.71.Flow9:
; GFX90A-NEXT: successors: %bb.63(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr36_sgpr37 = S_MOV_B64 0
; GFX90A-NEXT: S_BRANCH %bb.63
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.72.bb196:
; GFX90A-NEXT: successors: %bb.69(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x0000000000000003, $vgpr28_vgpr29:0x0000000000000003, $vgpr32_vgpr33:0x0000000000000003, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x0000000000000003, $vgpr28_vgpr29:0x0000000000000003, $vgpr32_vgpr33:0x0000000000000003, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr10 = V_OR_B32_e32 $vgpr50, killed $vgpr16, implicit $exec
; GFX90A-NEXT: renamable $vgpr54 = V_OR_B32_e32 killed $vgpr10, killed $vgpr14, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll b/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll
index 7c0c433ac3c51..83ab6c32aee96 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll
@@ -909,39 +909,39 @@ define void @spill_func(ptr addrspace(1) %arg) #0 {
; CHECK-NEXT: v_writelane_b32 v0, s35, 4
; CHECK-NEXT: v_writelane_b32 v0, s36, 5
; CHECK-NEXT: v_writelane_b32 v0, s37, 6
-; CHECK-NEXT: v_writelane_b32 v0, s46, 7
-; CHECK-NEXT: v_writelane_b32 v0, s47, 8
+; CHECK-NEXT: v_writelane_b32 v0, s38, 7
+; CHECK-NEXT: v_writelane_b32 v0, s39, 8
; CHECK-NEXT: v_writelane_b32 v0, s48, 9
; CHECK-NEXT: v_writelane_b32 v0, s49, 10
; CHECK-NEXT: v_writelane_b32 v0, s50, 11
; CHECK-NEXT: v_writelane_b32 v0, s51, 12
; CHECK-NEXT: v_writelane_b32 v0, s52, 13
; CHECK-NEXT: v_writelane_b32 v0, s53, 14
-; CHECK-NEXT: v_writelane_b32 v0, s62, 15
-; CHECK-NEXT: v_writelane_b32 v0, s63, 16
+; CHECK-NEXT: v_writelane_b32 v0, s54, 15
+; CHECK-NEXT: v_writelane_b32 v0, s55, 16
; CHECK-NEXT: v_writelane_b32 v0, s64, 17
; CHECK-NEXT: v_writelane_b32 v0, s65, 18
; CHECK-NEXT: v_writelane_b32 v0, s66, 19
; CHECK-NEXT: v_writelane_b32 v0, s67, 20
; CHECK-NEXT: v_writelane_b32 v0, s68, 21
; CHECK-NEXT: v_writelane_b32 v0, s69, 22
-; CHECK-NEXT: v_writelane_b32 v0, s78, 23
-; CHECK-NEXT: v_writelane_b32 v0, s79, 24
+; CHECK-NEXT: v_writelane_b32 v0, s70, 23
+; CHECK-NEXT: v_writelane_b32 v0, s71, 24
; CHECK-NEXT: v_writelane_b32 v0, s80, 25
; CHECK-NEXT: v_writelane_b32 v0, s81, 26
; CHECK-NEXT: v_writelane_b32 v0, s82, 27
; CHECK-NEXT: v_writelane_b32 v0, s83, 28
; CHECK-NEXT: v_writelane_b32 v0, s84, 29
; CHECK-NEXT: v_writelane_b32 v0, s85, 30
-; CHECK-NEXT: v_writelane_b32 v0, s94, 31
-; CHECK-NEXT: v_writelane_b32 v0, s95, 32
+; CHECK-NEXT: v_writelane_b32 v0, s86, 31
+; CHECK-NEXT: v_writelane_b32 v0, s87, 32
; CHECK-NEXT: v_writelane_b32 v0, s96, 33
; CHECK-NEXT: v_writelane_b32 v0, s97, 34
; CHECK-NEXT: v_writelane_b32 v0, s98, 35
; CHECK-NEXT: v_writelane_b32 v0, s99, 36
-; CHECK-NEXT: s_mov_b32 s38, s12
+; CHECK-NEXT: s_mov_b32 s40, s12
; CHECK-NEXT: v_writelane_b32 v0, s100, 37
-; CHECK-NEXT: s_cmp_eq_u32 s38, 0
+; CHECK-NEXT: s_cmp_eq_u32 s40, 0
; CHECK-NEXT: v_writelane_b32 v0, s101, 38
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: s_mov_b32 s0, 0
@@ -1602,32 +1602,32 @@ define void @spill_func(ptr addrspace(1) %arg) #0 {
; CHECK-NEXT: v_readlane_b32 s98, v0, 35
; CHECK-NEXT: v_readlane_b32 s97, v0, 34
; CHECK-NEXT: v_readlane_b32 s96, v0, 33
-; CHECK-NEXT: v_readlane_b32 s95, v0, 32
-; CHECK-NEXT: v_readlane_b32 s94, v0, 31
+; CHECK-NEXT: v_readlane_b32 s87, v0, 32
+; CHECK-NEXT: v_readlane_b32 s86, v0, 31
; CHECK-NEXT: v_readlane_b32 s85, v0, 30
; CHECK-NEXT: v_readlane_b32 s84, v0, 29
; CHECK-NEXT: v_readlane_b32 s83, v0, 28
; CHECK-NEXT: v_readlane_b32 s82, v0, 27
; CHECK-NEXT: v_readlane_b32 s81, v0, 26
; CHECK-NEXT: v_readlane_b32 s80, v0, 25
-; CHECK-NEXT: v_readlane_b32 s79, v0, 24
-; CHECK-NEXT: v_readlane_b32 s78, v0, 23
+; CHECK-NEXT: v_readlane_b32 s71, v0, 24
+; CHECK-NEXT: v_readlane_b32 s70, v0, 23
; CHECK-NEXT: v_readlane_b32 s69, v0, 22
; CHECK-NEXT: v_readlane_b32 s68, v0, 21
; CHECK-NEXT: v_readlane_b32 s67, v0, 20
; CHECK-NEXT: v_readlane_b32 s66, v0, 19
; CHECK-NEXT: v_readlane_b32 s65, v0, 18
; CHECK-NEXT: v_readlane_b32 s64, v0, 17
-; CHECK-NEXT: v_readlane_b32 s63, v0, 16
-; CHECK-NEXT: v_readlane_b32 s62, v0, 15
+; CHECK-NEXT: v_readlane_b32 s55, v0, 16
+; CHECK-NEXT: v_readlane_b32 s54, v0, 15
; CHECK-NEXT: v_readlane_b32 s53, v0, 14
; CHECK-NEXT: v_readlane_b32 s52, v0, 13
; CHECK-NEXT: v_readlane_b32 s51, v0, 12
; CHECK-NEXT: v_readlane_b32 s50, v0, 11
; CHECK-NEXT: v_readlane_b32 s49, v0, 10
; CHECK-NEXT: v_readlane_b32 s48, v0, 9
-; CHECK-NEXT: v_readlane_b32 s47, v0, 8
-; CHECK-NEXT: v_readlane_b32 s46, v0, 7
+; CHECK-NEXT: v_readlane_b32 s39, v0, 8
+; CHECK-NEXT: v_readlane_b32 s38, v0, 7
; CHECK-NEXT: v_readlane_b32 s37, v0, 6
; CHECK-NEXT: v_readlane_b32 s36, v0, 5
; CHECK-NEXT: v_readlane_b32 s35, v0, 4
diff --git a/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll b/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll
index 394c32c8e4bcf..c10cb0ae6d336 100644
--- a/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll
@@ -1385,15 +1385,15 @@ define void @test_call_external_void_func_a15i32_inreg([13 x i32] inreg %arg0) #
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_mov_b32 s29, s33
; GFX9-NEXT: s_mov_b32 s33, s32
-; GFX9-NEXT: s_or_saveexec_b64 s[38:39], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[40:41], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[38:39]
+; GFX9-NEXT: s_mov_b64 exec, s[40:41]
; GFX9-NEXT: v_writelane_b32 v40, s29, 2
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
-; GFX9-NEXT: s_getpc_b64 s[38:39]
-; GFX9-NEXT: s_add_u32 s38, s38, external_void_func_a15i32_inreg at rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s39, s39, external_void_func_a15i32_inreg at rel32@hi+12
+; GFX9-NEXT: s_getpc_b64 s[40:41]
+; GFX9-NEXT: s_add_u32 s40, s40, external_void_func_a15i32_inreg at rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s41, s41, external_void_func_a15i32_inreg at rel32@hi+12
; GFX9-NEXT: s_mov_b32 s3, s19
; GFX9-NEXT: s_mov_b32 s2, s18
; GFX9-NEXT: s_mov_b32 s1, s17
@@ -1408,7 +1408,7 @@ define void @test_call_external_void_func_a15i32_inreg([13 x i32] inreg %arg0) #
; GFX9-NEXT: s_mov_b32 s23, s27
; GFX9-NEXT: s_mov_b32 s24, s28
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[38:39]
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[40:41]
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
; GFX9-NEXT: s_mov_b32 s32, s33
diff --git a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll
index 48f32a87203a3..2365c68a7cb0b 100644
--- a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll
@@ -63,18 +63,18 @@ declare hidden void @external_void_func_v16i8(<16 x i8>) #0
define amdgpu_kernel void @test_call_external_void_func_i1_imm() #0 {
; VI-LABEL: test_call_external_void_func_i1_imm:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s50, -1
-; VI-NEXT: s_mov_b32 s51, 0xe80000
-; VI-NEXT: s_add_u32 s48, s48, s3
-; VI-NEXT: s_addc_u32 s49, s49, 0
+; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s38, -1
+; VI-NEXT: s_mov_b32 s39, 0xe80000
+; VI-NEXT: s_add_u32 s36, s36, s3
+; VI-NEXT: s_addc_u32 s37, s37, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_i1 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i1 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
; VI-NEXT: v_mov_b32_e32 v0, 1
; VI-NEXT: s_mov_b32 s32, 0
; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
@@ -82,18 +82,18 @@ define amdgpu_kernel void @test_call_external_void_func_i1_imm() #0 {
;
; CI-LABEL: test_call_external_void_func_i1_imm:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s50, -1
-; CI-NEXT: s_mov_b32 s51, 0xe8f000
-; CI-NEXT: s_add_u32 s48, s48, s3
-; CI-NEXT: s_addc_u32 s49, s49, 0
+; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s38, -1
+; CI-NEXT: s_mov_b32 s39, 0xe8f000
+; CI-NEXT: s_add_u32 s36, s36, s3
+; CI-NEXT: s_addc_u32 s37, s37, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_i1 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i1 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
; CI-NEXT: v_mov_b32_e32 v0, 1
; CI-NEXT: s_mov_b32 s32, 0
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
@@ -101,18 +101,18 @@ define amdgpu_kernel void @test_call_external_void_func_i1_imm() #0 {
;
; GFX9-LABEL: test_call_external_void_func_i1_imm:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s3
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s3
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i1 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i1 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: v_mov_b32_e32 v0, 1
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
@@ -155,18 +155,18 @@ define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 {
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc
; VI-NEXT: s_waitcnt vmcnt(0)
-; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s50, -1
-; VI-NEXT: s_mov_b32 s51, 0xe80000
-; VI-NEXT: s_add_u32 s48, s48, s5
-; VI-NEXT: s_addc_u32 s49, s49, 0
+; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s38, -1
+; VI-NEXT: s_mov_b32 s39, 0xe80000
+; VI-NEXT: s_add_u32 s36, s36, s5
+; VI-NEXT: s_addc_u32 s37, s37, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_i1_signext at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i1_signext at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
; VI-NEXT: s_mov_b32 s32, 0
; VI-NEXT: v_bfe_i32 v0, v0, 0, 1
; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
@@ -178,18 +178,18 @@ define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 {
; CI-NEXT: s_mov_b32 s2, -1
; CI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc
; CI-NEXT: s_waitcnt vmcnt(0)
-; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s50, -1
-; CI-NEXT: s_mov_b32 s51, 0xe8f000
-; CI-NEXT: s_add_u32 s48, s48, s5
-; CI-NEXT: s_addc_u32 s49, s49, 0
+; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s38, -1
+; CI-NEXT: s_mov_b32 s39, 0xe8f000
+; CI-NEXT: s_add_u32 s36, s36, s5
+; CI-NEXT: s_addc_u32 s37, s37, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_i1_signext at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i1_signext at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
; CI-NEXT: s_mov_b32 s32, 0
; CI-NEXT: v_bfe_i32 v0, v0, 0, 1
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
@@ -201,18 +201,18 @@ define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 {
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s5
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s5
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i1_signext at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i1_signext at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: v_bfe_i32 v0, v0, 0, 1
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
@@ -265,18 +265,18 @@ define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 {
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc
; VI-NEXT: s_waitcnt vmcnt(0)
-; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s50, -1
-; VI-NEXT: s_mov_b32 s51, 0xe80000
-; VI-NEXT: s_add_u32 s48, s48, s5
-; VI-NEXT: s_addc_u32 s49, s49, 0
+; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s38, -1
+; VI-NEXT: s_mov_b32 s39, 0xe80000
+; VI-NEXT: s_add_u32 s36, s36, s5
+; VI-NEXT: s_addc_u32 s37, s37, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_i1_zeroext at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i1_zeroext at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
; VI-NEXT: s_mov_b32 s32, 0
; VI-NEXT: v_and_b32_e32 v0, 1, v0
; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
@@ -288,18 +288,18 @@ define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 {
; CI-NEXT: s_mov_b32 s2, -1
; CI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc
; CI-NEXT: s_waitcnt vmcnt(0)
-; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s50, -1
-; CI-NEXT: s_mov_b32 s51, 0xe8f000
-; CI-NEXT: s_add_u32 s48, s48, s5
-; CI-NEXT: s_addc_u32 s49, s49, 0
+; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s38, -1
+; CI-NEXT: s_mov_b32 s39, 0xe8f000
+; CI-NEXT: s_add_u32 s36, s36, s5
+; CI-NEXT: s_addc_u32 s37, s37, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_i1_zeroext at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i1_zeroext at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
; CI-NEXT: s_mov_b32 s32, 0
; CI-NEXT: v_and_b32_e32 v0, 1, v0
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
@@ -311,18 +311,18 @@ define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 {
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s5
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s5
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i1_zeroext at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i1_zeroext at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
@@ -370,18 +370,18 @@ define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 {
define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) #0 {
; VI-LABEL: test_call_external_void_func_i8_imm:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s50, -1
-; VI-NEXT: s_mov_b32 s51, 0xe80000
-; VI-NEXT: s_add_u32 s48, s48, s5
-; VI-NEXT: s_addc_u32 s49, s49, 0
+; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s38, -1
+; VI-NEXT: s_mov_b32 s39, 0xe80000
+; VI-NEXT: s_add_u32 s36, s36, s5
+; VI-NEXT: s_addc_u32 s37, s37, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_i8 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i8 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
; VI-NEXT: v_mov_b32_e32 v0, 0x7b
; VI-NEXT: s_mov_b32 s32, 0
; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
@@ -389,18 +389,18 @@ define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) #0 {
;
; CI-LABEL: test_call_external_void_func_i8_imm:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s50, -1
-; CI-NEXT: s_mov_b32 s51, 0xe8f000
-; CI-NEXT: s_add_u32 s48, s48, s5
-; CI-NEXT: s_addc_u32 s49, s49, 0
+; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s38, -1
+; CI-NEXT: s_mov_b32 s39, 0xe8f000
+; CI-NEXT: s_add_u32 s36, s36, s5
+; CI-NEXT: s_addc_u32 s37, s37, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_i8 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i8 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
; CI-NEXT: v_mov_b32_e32 v0, 0x7b
; CI-NEXT: s_mov_b32 s32, 0
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
@@ -408,18 +408,18 @@ define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) #0 {
;
; GFX9-LABEL: test_call_external_void_func_i8_imm:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s5
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s5
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i8 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i8 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
@@ -463,18 +463,18 @@ define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 {
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: buffer_load_sbyte v0, off, s[0:3], 0 glc
; VI-NEXT: s_waitcnt vmcnt(0)
-; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s50, -1
-; VI-NEXT: s_mov_b32 s51, 0xe80000
-; VI-NEXT: s_add_u32 s48, s48, s5
-; VI-NEXT: s_addc_u32 s49, s49, 0
+; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s38, -1
+; VI-NEXT: s_mov_b32 s39, 0xe80000
+; VI-NEXT: s_add_u32 s36, s36, s5
+; VI-NEXT: s_addc_u32 s37, s37, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_i8_signext at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i8_signext at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
; VI-NEXT: s_mov_b32 s32, 0
; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; VI-NEXT: s_endpgm
@@ -485,18 +485,18 @@ define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 {
; CI-NEXT: s_mov_b32 s2, -1
; CI-NEXT: buffer_load_sbyte v0, off, s[0:3], 0 glc
; CI-NEXT: s_waitcnt vmcnt(0)
-; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s50, -1
-; CI-NEXT: s_mov_b32 s51, 0xe8f000
-; CI-NEXT: s_add_u32 s48, s48, s5
-; CI-NEXT: s_addc_u32 s49, s49, 0
+; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s38, -1
+; CI-NEXT: s_mov_b32 s39, 0xe8f000
+; CI-NEXT: s_add_u32 s36, s36, s5
+; CI-NEXT: s_addc_u32 s37, s37, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_i8_signext at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i8_signext at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
; CI-NEXT: s_mov_b32 s32, 0
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
@@ -507,18 +507,18 @@ define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 {
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: buffer_load_sbyte v0, off, s[0:3], 0 glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s5
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s5
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i8_signext at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i8_signext at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX9-NEXT: s_endpgm
@@ -567,18 +567,18 @@ define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 {
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc
; VI-NEXT: s_waitcnt vmcnt(0)
-; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s50, -1
-; VI-NEXT: s_mov_b32 s51, 0xe80000
-; VI-NEXT: s_add_u32 s48, s48, s5
-; VI-NEXT: s_addc_u32 s49, s49, 0
+; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s38, -1
+; VI-NEXT: s_mov_b32 s39, 0xe80000
+; VI-NEXT: s_add_u32 s36, s36, s5
+; VI-NEXT: s_addc_u32 s37, s37, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_i8_zeroext at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i8_zeroext at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
; VI-NEXT: s_mov_b32 s32, 0
; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; VI-NEXT: s_endpgm
@@ -589,18 +589,18 @@ define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 {
; CI-NEXT: s_mov_b32 s2, -1
; CI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc
; CI-NEXT: s_waitcnt vmcnt(0)
-; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s50, -1
-; CI-NEXT: s_mov_b32 s51, 0xe8f000
-; CI-NEXT: s_add_u32 s48, s48, s5
-; CI-NEXT: s_addc_u32 s49, s49, 0
+; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s38, -1
+; CI-NEXT: s_mov_b32 s39, 0xe8f000
+; CI-NEXT: s_add_u32 s36, s36, s5
+; CI-NEXT: s_addc_u32 s37, s37, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_i8_zeroext at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i8_zeroext at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
; CI-NEXT: s_mov_b32 s32, 0
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
@@ -611,18 +611,18 @@ define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 {
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s5
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s5
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i8_zeroext at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i8_zeroext at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX9-NEXT: s_endpgm
@@ -667,18 +667,18 @@ define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 {
define amdgpu_kernel void @test_call_external_void_func_i16_imm() #0 {
; VI-LABEL: test_call_external_void_func_i16_imm:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s50, -1
-; VI-NEXT: s_mov_b32 s51, 0xe80000
-; VI-NEXT: s_add_u32 s48, s48, s3
-; VI-NEXT: s_addc_u32 s49, s49, 0
+; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s38, -1
+; VI-NEXT: s_mov_b32 s39, 0xe80000
+; VI-NEXT: s_add_u32 s36, s36, s3
+; VI-NEXT: s_addc_u32 s37, s37, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_i16 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i16 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
; VI-NEXT: v_mov_b32_e32 v0, 0x7b
; VI-NEXT: s_mov_b32 s32, 0
; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
@@ -686,18 +686,18 @@ define amdgpu_kernel void @test_call_external_void_func_i16_imm() #0 {
;
; CI-LABEL: test_call_external_void_func_i16_imm:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s50, -1
-; CI-NEXT: s_mov_b32 s51, 0xe8f000
-; CI-NEXT: s_add_u32 s48, s48, s3
-; CI-NEXT: s_addc_u32 s49, s49, 0
+; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s38, -1
+; CI-NEXT: s_mov_b32 s39, 0xe8f000
+; CI-NEXT: s_add_u32 s36, s36, s3
+; CI-NEXT: s_addc_u32 s37, s37, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_i16 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i16 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
; CI-NEXT: v_mov_b32_e32 v0, 0x7b
; CI-NEXT: s_mov_b32 s32, 0
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
@@ -705,18 +705,18 @@ define amdgpu_kernel void @test_call_external_void_func_i16_imm() #0 {
;
; GFX9-LABEL: test_call_external_void_func_i16_imm:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s3
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s3
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i16 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i16 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
@@ -759,18 +759,18 @@ define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 {
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: buffer_load_sshort v0, off, s[0:3], 0 glc
; VI-NEXT: s_waitcnt vmcnt(0)
-; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s50, -1
-; VI-NEXT: s_mov_b32 s51, 0xe80000
-; VI-NEXT: s_add_u32 s48, s48, s5
-; VI-NEXT: s_addc_u32 s49, s49, 0
+; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s38, -1
+; VI-NEXT: s_mov_b32 s39, 0xe80000
+; VI-NEXT: s_add_u32 s36, s36, s5
+; VI-NEXT: s_addc_u32 s37, s37, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_i16_signext at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i16_signext at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
; VI-NEXT: s_mov_b32 s32, 0
; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; VI-NEXT: s_endpgm
@@ -781,18 +781,18 @@ define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 {
; CI-NEXT: s_mov_b32 s2, -1
; CI-NEXT: buffer_load_sshort v0, off, s[0:3], 0 glc
; CI-NEXT: s_waitcnt vmcnt(0)
-; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s50, -1
-; CI-NEXT: s_mov_b32 s51, 0xe8f000
-; CI-NEXT: s_add_u32 s48, s48, s5
-; CI-NEXT: s_addc_u32 s49, s49, 0
+; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s38, -1
+; CI-NEXT: s_mov_b32 s39, 0xe8f000
+; CI-NEXT: s_add_u32 s36, s36, s5
+; CI-NEXT: s_addc_u32 s37, s37, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_i16_signext at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i16_signext at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
; CI-NEXT: s_mov_b32 s32, 0
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
@@ -803,18 +803,18 @@ define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 {
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: buffer_load_sshort v0, off, s[0:3], 0 glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s5
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s5
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i16_signext at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i16_signext at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX9-NEXT: s_endpgm
@@ -863,18 +863,18 @@ define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 {
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: buffer_load_ushort v0, off, s[0:3], 0 glc
; VI-NEXT: s_waitcnt vmcnt(0)
-; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s50, -1
-; VI-NEXT: s_mov_b32 s51, 0xe80000
-; VI-NEXT: s_add_u32 s48, s48, s5
-; VI-NEXT: s_addc_u32 s49, s49, 0
+; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s38, -1
+; VI-NEXT: s_mov_b32 s39, 0xe80000
+; VI-NEXT: s_add_u32 s36, s36, s5
+; VI-NEXT: s_addc_u32 s37, s37, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_i16_zeroext at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i16_zeroext at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
; VI-NEXT: s_mov_b32 s32, 0
; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; VI-NEXT: s_endpgm
@@ -885,18 +885,18 @@ define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 {
; CI-NEXT: s_mov_b32 s2, -1
; CI-NEXT: buffer_load_ushort v0, off, s[0:3], 0 glc
; CI-NEXT: s_waitcnt vmcnt(0)
-; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s50, -1
-; CI-NEXT: s_mov_b32 s51, 0xe8f000
-; CI-NEXT: s_add_u32 s48, s48, s5
-; CI-NEXT: s_addc_u32 s49, s49, 0
+; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s38, -1
+; CI-NEXT: s_mov_b32 s39, 0xe8f000
+; CI-NEXT: s_add_u32 s36, s36, s5
+; CI-NEXT: s_addc_u32 s37, s37, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_i16_zeroext at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i16_zeroext at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
; CI-NEXT: s_mov_b32 s32, 0
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
@@ -907,18 +907,18 @@ define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 {
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: buffer_load_ushort v0, off, s[0:3], 0 glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s5
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s5
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i16_zeroext at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i16_zeroext at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX9-NEXT: s_endpgm
@@ -963,18 +963,18 @@ define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 {
define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 {
; VI-LABEL: test_call_external_void_func_i32_imm:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s50, -1
-; VI-NEXT: s_mov_b32 s51, 0xe80000
-; VI-NEXT: s_add_u32 s48, s48, s5
-; VI-NEXT: s_addc_u32 s49, s49, 0
+; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s38, -1
+; VI-NEXT: s_mov_b32 s39, 0xe80000
+; VI-NEXT: s_add_u32 s36, s36, s5
+; VI-NEXT: s_addc_u32 s37, s37, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_i32 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i32 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
; VI-NEXT: v_mov_b32_e32 v0, 42
; VI-NEXT: s_mov_b32 s32, 0
; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
@@ -982,18 +982,18 @@ define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 {
;
; CI-LABEL: test_call_external_void_func_i32_imm:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s50, -1
-; CI-NEXT: s_mov_b32 s51, 0xe8f000
-; CI-NEXT: s_add_u32 s48, s48, s5
-; CI-NEXT: s_addc_u32 s49, s49, 0
+; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s38, -1
+; CI-NEXT: s_mov_b32 s39, 0xe8f000
+; CI-NEXT: s_add_u32 s36, s36, s5
+; CI-NEXT: s_addc_u32 s37, s37, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_i32 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i32 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
; CI-NEXT: v_mov_b32_e32 v0, 42
; CI-NEXT: s_mov_b32 s32, 0
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
@@ -1001,18 +1001,18 @@ define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 {
;
; GFX9-LABEL: test_call_external_void_func_i32_imm:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s5
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s5
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i32 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i32 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: v_mov_b32_e32 v0, 42
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
@@ -1051,18 +1051,18 @@ define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 {
define amdgpu_kernel void @test_call_external_void_func_i64_imm() #0 {
; VI-LABEL: test_call_external_void_func_i64_imm:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s50, -1
-; VI-NEXT: s_mov_b32 s51, 0xe80000
-; VI-NEXT: s_add_u32 s48, s48, s3
-; VI-NEXT: s_addc_u32 s49, s49, 0
+; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s38, -1
+; VI-NEXT: s_mov_b32 s39, 0xe80000
+; VI-NEXT: s_add_u32 s36, s36, s3
+; VI-NEXT: s_addc_u32 s37, s37, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_i64 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i64 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
; VI-NEXT: v_mov_b32_e32 v0, 0x7b
; VI-NEXT: v_mov_b32_e32 v1, 0
; VI-NEXT: s_mov_b32 s32, 0
@@ -1071,18 +1071,18 @@ define amdgpu_kernel void @test_call_external_void_func_i64_imm() #0 {
;
; CI-LABEL: test_call_external_void_func_i64_imm:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s50, -1
-; CI-NEXT: s_mov_b32 s51, 0xe8f000
-; CI-NEXT: s_add_u32 s48, s48, s3
-; CI-NEXT: s_addc_u32 s49, s49, 0
+; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s38, -1
+; CI-NEXT: s_mov_b32 s39, 0xe8f000
+; CI-NEXT: s_add_u32 s36, s36, s3
+; CI-NEXT: s_addc_u32 s37, s37, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_i64 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i64 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
; CI-NEXT: v_mov_b32_e32 v0, 0x7b
; CI-NEXT: v_mov_b32_e32 v1, 0
; CI-NEXT: s_mov_b32 s32, 0
@@ -1091,18 +1091,18 @@ define amdgpu_kernel void @test_call_external_void_func_i64_imm() #0 {
;
; GFX9-LABEL: test_call_external_void_func_i64_imm:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s3
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s3
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i64 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i64 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: s_mov_b32 s32, 0
@@ -1143,69 +1143,69 @@ define amdgpu_kernel void @test_call_external_void_func_i64_imm() #0 {
define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 {
; VI-LABEL: test_call_external_void_func_v2i64:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s50, -1
-; VI-NEXT: s_mov_b32 s51, 0xe80000
+; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s38, -1
+; VI-NEXT: s_mov_b32 s39, 0xe80000
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
; VI-NEXT: s_mov_b32 s0, 0
-; VI-NEXT: s_add_u32 s48, s48, s3
+; VI-NEXT: s_add_u32 s36, s36, s3
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: s_mov_b32 s1, s0
; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
-; VI-NEXT: s_addc_u32 s49, s49, 0
-; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; VI-NEXT: s_addc_u32 s37, s37, 0
+; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2i64 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
; VI-NEXT: s_mov_b32 s32, 0
; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; VI-NEXT: s_endpgm
;
; CI-LABEL: test_call_external_void_func_v2i64:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s50, -1
-; CI-NEXT: s_mov_b32 s51, 0xe8f000
+; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s38, -1
+; CI-NEXT: s_mov_b32 s39, 0xe8f000
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
; CI-NEXT: s_mov_b32 s0, 0
-; CI-NEXT: s_add_u32 s48, s48, s3
+; CI-NEXT: s_add_u32 s36, s36, s3
; CI-NEXT: s_mov_b32 s3, 0xf000
; CI-NEXT: s_mov_b32 s2, -1
; CI-NEXT: s_mov_b32 s1, s0
; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
-; CI-NEXT: s_addc_u32 s49, s49, 0
-; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; CI-NEXT: s_addc_u32 s37, s37, 0
+; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2i64 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
; CI-NEXT: s_mov_b32 s32, 0
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
; GFX9-LABEL: test_call_external_void_func_v2i64:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
; GFX9-NEXT: s_mov_b32 s0, 0
-; GFX9-NEXT: s_add_u32 s48, s48, s3
+; GFX9-NEXT: s_add_u32 s36, s36, s3
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: s_mov_b32 s1, s0
; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i64 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX9-NEXT: s_endpgm
@@ -1252,18 +1252,18 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 {
define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() #0 {
; VI-LABEL: test_call_external_void_func_v2i64_imm:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s50, -1
-; VI-NEXT: s_mov_b32 s51, 0xe80000
-; VI-NEXT: s_add_u32 s48, s48, s3
-; VI-NEXT: s_addc_u32 s49, s49, 0
+; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s38, -1
+; VI-NEXT: s_mov_b32 s39, 0xe80000
+; VI-NEXT: s_add_u32 s36, s36, s3
+; VI-NEXT: s_addc_u32 s37, s37, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2i64 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
; VI-NEXT: v_mov_b32_e32 v0, 1
; VI-NEXT: v_mov_b32_e32 v1, 2
; VI-NEXT: v_mov_b32_e32 v2, 3
@@ -1274,18 +1274,18 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() #0 {
;
; CI-LABEL: test_call_external_void_func_v2i64_imm:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s50, -1
-; CI-NEXT: s_mov_b32 s51, 0xe8f000
-; CI-NEXT: s_add_u32 s48, s48, s3
-; CI-NEXT: s_addc_u32 s49, s49, 0
+; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s38, -1
+; CI-NEXT: s_mov_b32 s39, 0xe8f000
+; CI-NEXT: s_add_u32 s36, s36, s3
+; CI-NEXT: s_addc_u32 s37, s37, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2i64 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
; CI-NEXT: v_mov_b32_e32 v0, 1
; CI-NEXT: v_mov_b32_e32 v1, 2
; CI-NEXT: v_mov_b32_e32 v2, 3
@@ -1296,18 +1296,18 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() #0 {
;
; GFX9-LABEL: test_call_external_void_func_v2i64_imm:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s3
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s3
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i64 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: v_mov_b32_e32 v0, 1
; GFX9-NEXT: v_mov_b32_e32 v1, 2
; GFX9-NEXT: v_mov_b32_e32 v2, 3
@@ -1353,23 +1353,23 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() #0 {
define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 {
; VI-LABEL: test_call_external_void_func_v3i64:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s50, -1
-; VI-NEXT: s_mov_b32 s51, 0xe80000
+; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s38, -1
+; VI-NEXT: s_mov_b32 s39, 0xe80000
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
; VI-NEXT: s_mov_b32 s0, 0
-; VI-NEXT: s_add_u32 s48, s48, s3
+; VI-NEXT: s_add_u32 s36, s36, s3
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: s_mov_b32 s1, s0
; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
-; VI-NEXT: s_addc_u32 s49, s49, 0
-; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; VI-NEXT: s_addc_u32 s37, s37, 0
+; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3i64 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i64 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
; VI-NEXT: v_mov_b32_e32 v4, 1
; VI-NEXT: v_mov_b32_e32 v5, 2
; VI-NEXT: s_mov_b32 s32, 0
@@ -1378,23 +1378,23 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 {
;
; CI-LABEL: test_call_external_void_func_v3i64:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s50, -1
-; CI-NEXT: s_mov_b32 s51, 0xe8f000
+; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s38, -1
+; CI-NEXT: s_mov_b32 s39, 0xe8f000
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
; CI-NEXT: s_mov_b32 s0, 0
-; CI-NEXT: s_add_u32 s48, s48, s3
+; CI-NEXT: s_add_u32 s36, s36, s3
; CI-NEXT: s_mov_b32 s3, 0xf000
; CI-NEXT: s_mov_b32 s2, -1
; CI-NEXT: s_mov_b32 s1, s0
; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
-; CI-NEXT: s_addc_u32 s49, s49, 0
-; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; CI-NEXT: s_addc_u32 s37, s37, 0
+; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3i64 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i64 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
; CI-NEXT: v_mov_b32_e32 v4, 1
; CI-NEXT: v_mov_b32_e32 v5, 2
; CI-NEXT: s_mov_b32 s32, 0
@@ -1403,23 +1403,23 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 {
;
; GFX9-LABEL: test_call_external_void_func_v3i64:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
; GFX9-NEXT: s_mov_b32 s0, 0
-; GFX9-NEXT: s_add_u32 s48, s48, s3
+; GFX9-NEXT: s_add_u32 s36, s36, s3
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: s_mov_b32 s1, s0
; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i64 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i64 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: v_mov_b32_e32 v4, 1
; GFX9-NEXT: v_mov_b32_e32 v5, 2
; GFX9-NEXT: s_mov_b32 s32, 0
@@ -1473,23 +1473,23 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 {
define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 {
; VI-LABEL: test_call_external_void_func_v4i64:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s50, -1
-; VI-NEXT: s_mov_b32 s51, 0xe80000
+; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s38, -1
+; VI-NEXT: s_mov_b32 s39, 0xe80000
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
; VI-NEXT: s_mov_b32 s0, 0
-; VI-NEXT: s_add_u32 s48, s48, s3
+; VI-NEXT: s_add_u32 s36, s36, s3
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: s_mov_b32 s1, s0
; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
-; VI-NEXT: s_addc_u32 s49, s49, 0
-; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; VI-NEXT: s_addc_u32 s37, s37, 0
+; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v4i64 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i64 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
; VI-NEXT: v_mov_b32_e32 v4, 1
; VI-NEXT: v_mov_b32_e32 v5, 2
; VI-NEXT: v_mov_b32_e32 v6, 3
@@ -1500,23 +1500,23 @@ define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 {
;
; CI-LABEL: test_call_external_void_func_v4i64:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s50, -1
-; CI-NEXT: s_mov_b32 s51, 0xe8f000
+; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s38, -1
+; CI-NEXT: s_mov_b32 s39, 0xe8f000
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
; CI-NEXT: s_mov_b32 s0, 0
-; CI-NEXT: s_add_u32 s48, s48, s3
+; CI-NEXT: s_add_u32 s36, s36, s3
; CI-NEXT: s_mov_b32 s3, 0xf000
; CI-NEXT: s_mov_b32 s2, -1
; CI-NEXT: s_mov_b32 s1, s0
; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
-; CI-NEXT: s_addc_u32 s49, s49, 0
-; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; CI-NEXT: s_addc_u32 s37, s37, 0
+; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v4i64 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i64 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
; CI-NEXT: v_mov_b32_e32 v4, 1
; CI-NEXT: v_mov_b32_e32 v5, 2
; CI-NEXT: v_mov_b32_e32 v6, 3
@@ -1527,23 +1527,23 @@ define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 {
;
; GFX9-LABEL: test_call_external_void_func_v4i64:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
; GFX9-NEXT: s_mov_b32 s0, 0
-; GFX9-NEXT: s_add_u32 s48, s48, s3
+; GFX9-NEXT: s_add_u32 s36, s36, s3
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: s_mov_b32 s1, s0
; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i64 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i64 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: v_mov_b32_e32 v4, 1
; GFX9-NEXT: v_mov_b32_e32 v5, 2
; GFX9-NEXT: v_mov_b32_e32 v6, 3
@@ -1601,18 +1601,18 @@ define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 {
define amdgpu_kernel void @test_call_external_void_func_f16_imm() #0 {
; VI-LABEL: test_call_external_void_func_f16_imm:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s50, -1
-; VI-NEXT: s_mov_b32 s51, 0xe80000
-; VI-NEXT: s_add_u32 s48, s48, s3
-; VI-NEXT: s_addc_u32 s49, s49, 0
+; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s38, -1
+; VI-NEXT: s_mov_b32 s39, 0xe80000
+; VI-NEXT: s_add_u32 s36, s36, s3
+; VI-NEXT: s_addc_u32 s37, s37, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_f16 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_f16 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
; VI-NEXT: v_mov_b32_e32 v0, 0x4400
; VI-NEXT: s_mov_b32 s32, 0
; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
@@ -1620,18 +1620,18 @@ define amdgpu_kernel void @test_call_external_void_func_f16_imm() #0 {
;
; CI-LABEL: test_call_external_void_func_f16_imm:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s50, -1
-; CI-NEXT: s_mov_b32 s51, 0xe8f000
-; CI-NEXT: s_add_u32 s48, s48, s3
-; CI-NEXT: s_addc_u32 s49, s49, 0
+; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s38, -1
+; CI-NEXT: s_mov_b32 s39, 0xe8f000
+; CI-NEXT: s_add_u32 s36, s36, s3
+; CI-NEXT: s_addc_u32 s37, s37, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_f16 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_f16 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
; CI-NEXT: v_mov_b32_e32 v0, 4.0
; CI-NEXT: s_mov_b32 s32, 0
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
@@ -1639,18 +1639,18 @@ define amdgpu_kernel void @test_call_external_void_func_f16_imm() #0 {
;
; GFX9-LABEL: test_call_external_void_func_f16_imm:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s3
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s3
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_f16 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_f16 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: v_mov_b32_e32 v0, 0x4400
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
@@ -1689,18 +1689,18 @@ define amdgpu_kernel void @test_call_external_void_func_f16_imm() #0 {
define amdgpu_kernel void @test_call_external_void_func_f32_imm() #0 {
; VI-LABEL: test_call_external_void_func_f32_imm:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s50, -1
-; VI-NEXT: s_mov_b32 s51, 0xe80000
-; VI-NEXT: s_add_u32 s48, s48, s3
-; VI-NEXT: s_addc_u32 s49, s49, 0
+; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s38, -1
+; VI-NEXT: s_mov_b32 s39, 0xe80000
+; VI-NEXT: s_add_u32 s36, s36, s3
+; VI-NEXT: s_addc_u32 s37, s37, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_f32 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_f32 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
; VI-NEXT: v_mov_b32_e32 v0, 4.0
; VI-NEXT: s_mov_b32 s32, 0
; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
@@ -1708,18 +1708,18 @@ define amdgpu_kernel void @test_call_external_void_func_f32_imm() #0 {
;
; CI-LABEL: test_call_external_void_func_f32_imm:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s50, -1
-; CI-NEXT: s_mov_b32 s51, 0xe8f000
-; CI-NEXT: s_add_u32 s48, s48, s3
-; CI-NEXT: s_addc_u32 s49, s49, 0
+; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s38, -1
+; CI-NEXT: s_mov_b32 s39, 0xe8f000
+; CI-NEXT: s_add_u32 s36, s36, s3
+; CI-NEXT: s_addc_u32 s37, s37, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_f32 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_f32 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
; CI-NEXT: v_mov_b32_e32 v0, 4.0
; CI-NEXT: s_mov_b32 s32, 0
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
@@ -1727,18 +1727,18 @@ define amdgpu_kernel void @test_call_external_void_func_f32_imm() #0 {
;
; GFX9-LABEL: test_call_external_void_func_f32_imm:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s3
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s3
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_f32 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_f32 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: v_mov_b32_e32 v0, 4.0
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
@@ -1777,18 +1777,18 @@ define amdgpu_kernel void @test_call_external_void_func_f32_imm() #0 {
define amdgpu_kernel void @test_call_external_void_func_v2f32_imm() #0 {
; VI-LABEL: test_call_external_void_func_v2f32_imm:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s50, -1
-; VI-NEXT: s_mov_b32 s51, 0xe80000
-; VI-NEXT: s_add_u32 s48, s48, s3
-; VI-NEXT: s_addc_u32 s49, s49, 0
+; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s38, -1
+; VI-NEXT: s_mov_b32 s39, 0xe80000
+; VI-NEXT: s_add_u32 s36, s36, s3
+; VI-NEXT: s_addc_u32 s37, s37, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2f32 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2f32 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
; VI-NEXT: v_mov_b32_e32 v0, 1.0
; VI-NEXT: v_mov_b32_e32 v1, 2.0
; VI-NEXT: s_mov_b32 s32, 0
@@ -1797,18 +1797,18 @@ define amdgpu_kernel void @test_call_external_void_func_v2f32_imm() #0 {
;
; CI-LABEL: test_call_external_void_func_v2f32_imm:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s50, -1
-; CI-NEXT: s_mov_b32 s51, 0xe8f000
-; CI-NEXT: s_add_u32 s48, s48, s3
-; CI-NEXT: s_addc_u32 s49, s49, 0
+; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s38, -1
+; CI-NEXT: s_mov_b32 s39, 0xe8f000
+; CI-NEXT: s_add_u32 s36, s36, s3
+; CI-NEXT: s_addc_u32 s37, s37, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2f32 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2f32 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
; CI-NEXT: v_mov_b32_e32 v0, 1.0
; CI-NEXT: v_mov_b32_e32 v1, 2.0
; CI-NEXT: s_mov_b32 s32, 0
@@ -1817,18 +1817,18 @@ define amdgpu_kernel void @test_call_external_void_func_v2f32_imm() #0 {
;
; GFX9-LABEL: test_call_external_void_func_v2f32_imm:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s3
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s3
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2f32 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2f32 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: v_mov_b32_e32 v0, 1.0
; GFX9-NEXT: v_mov_b32_e32 v1, 2.0
; GFX9-NEXT: s_mov_b32 s32, 0
@@ -1869,18 +1869,18 @@ define amdgpu_kernel void @test_call_external_void_func_v2f32_imm() #0 {
define amdgpu_kernel void @test_call_external_void_func_v3f32_imm() #0 {
; VI-LABEL: test_call_external_void_func_v3f32_imm:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s50, -1
-; VI-NEXT: s_mov_b32 s51, 0xe80000
-; VI-NEXT: s_add_u32 s48, s48, s3
-; VI-NEXT: s_addc_u32 s49, s49, 0
+; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s38, -1
+; VI-NEXT: s_mov_b32 s39, 0xe80000
+; VI-NEXT: s_add_u32 s36, s36, s3
+; VI-NEXT: s_addc_u32 s37, s37, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3f32 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f32 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
; VI-NEXT: v_mov_b32_e32 v0, 1.0
; VI-NEXT: v_mov_b32_e32 v1, 2.0
; VI-NEXT: v_mov_b32_e32 v2, 4.0
@@ -1890,18 +1890,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3f32_imm() #0 {
;
; CI-LABEL: test_call_external_void_func_v3f32_imm:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s50, -1
-; CI-NEXT: s_mov_b32 s51, 0xe8f000
-; CI-NEXT: s_add_u32 s48, s48, s3
-; CI-NEXT: s_addc_u32 s49, s49, 0
+; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s38, -1
+; CI-NEXT: s_mov_b32 s39, 0xe8f000
+; CI-NEXT: s_add_u32 s36, s36, s3
+; CI-NEXT: s_addc_u32 s37, s37, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3f32 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f32 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
; CI-NEXT: v_mov_b32_e32 v0, 1.0
; CI-NEXT: v_mov_b32_e32 v1, 2.0
; CI-NEXT: v_mov_b32_e32 v2, 4.0
@@ -1911,18 +1911,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3f32_imm() #0 {
;
; GFX9-LABEL: test_call_external_void_func_v3f32_imm:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s3
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s3
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3f32 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3f32 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: v_mov_b32_e32 v0, 1.0
; GFX9-NEXT: v_mov_b32_e32 v1, 2.0
; GFX9-NEXT: v_mov_b32_e32 v2, 4.0
@@ -1966,18 +1966,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3f32_imm() #0 {
define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 {
; VI-LABEL: test_call_external_void_func_v5f32_imm:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s50, -1
-; VI-NEXT: s_mov_b32 s51, 0xe80000
-; VI-NEXT: s_add_u32 s48, s48, s3
-; VI-NEXT: s_addc_u32 s49, s49, 0
+; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s38, -1
+; VI-NEXT: s_mov_b32 s39, 0xe80000
+; VI-NEXT: s_add_u32 s36, s36, s3
+; VI-NEXT: s_addc_u32 s37, s37, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v5f32 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v5f32 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
; VI-NEXT: v_mov_b32_e32 v0, 1.0
; VI-NEXT: v_mov_b32_e32 v1, 2.0
; VI-NEXT: v_mov_b32_e32 v2, 4.0
@@ -1989,18 +1989,18 @@ define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 {
;
; CI-LABEL: test_call_external_void_func_v5f32_imm:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s50, -1
-; CI-NEXT: s_mov_b32 s51, 0xe8f000
-; CI-NEXT: s_add_u32 s48, s48, s3
-; CI-NEXT: s_addc_u32 s49, s49, 0
+; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s38, -1
+; CI-NEXT: s_mov_b32 s39, 0xe8f000
+; CI-NEXT: s_add_u32 s36, s36, s3
+; CI-NEXT: s_addc_u32 s37, s37, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v5f32 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v5f32 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
; CI-NEXT: v_mov_b32_e32 v0, 1.0
; CI-NEXT: v_mov_b32_e32 v1, 2.0
; CI-NEXT: v_mov_b32_e32 v2, 4.0
@@ -2012,18 +2012,18 @@ define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 {
;
; GFX9-LABEL: test_call_external_void_func_v5f32_imm:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s3
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s3
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v5f32 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v5f32 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: v_mov_b32_e32 v0, 1.0
; GFX9-NEXT: v_mov_b32_e32 v1, 2.0
; GFX9-NEXT: v_mov_b32_e32 v2, 4.0
@@ -2072,18 +2072,18 @@ define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 {
define amdgpu_kernel void @test_call_external_void_func_f64_imm() #0 {
; VI-LABEL: test_call_external_void_func_f64_imm:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s50, -1
-; VI-NEXT: s_mov_b32 s51, 0xe80000
-; VI-NEXT: s_add_u32 s48, s48, s3
-; VI-NEXT: s_addc_u32 s49, s49, 0
+; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s38, -1
+; VI-NEXT: s_mov_b32 s39, 0xe80000
+; VI-NEXT: s_add_u32 s36, s36, s3
+; VI-NEXT: s_addc_u32 s37, s37, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_f64 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_f64 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
; VI-NEXT: v_mov_b32_e32 v0, 0
; VI-NEXT: v_mov_b32_e32 v1, 0x40100000
; VI-NEXT: s_mov_b32 s32, 0
@@ -2092,18 +2092,18 @@ define amdgpu_kernel void @test_call_external_void_func_f64_imm() #0 {
;
; CI-LABEL: test_call_external_void_func_f64_imm:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s50, -1
-; CI-NEXT: s_mov_b32 s51, 0xe8f000
-; CI-NEXT: s_add_u32 s48, s48, s3
-; CI-NEXT: s_addc_u32 s49, s49, 0
+; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s38, -1
+; CI-NEXT: s_mov_b32 s39, 0xe8f000
+; CI-NEXT: s_add_u32 s36, s36, s3
+; CI-NEXT: s_addc_u32 s37, s37, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_f64 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_f64 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
; CI-NEXT: v_mov_b32_e32 v0, 0
; CI-NEXT: v_mov_b32_e32 v1, 0x40100000
; CI-NEXT: s_mov_b32 s32, 0
@@ -2112,18 +2112,18 @@ define amdgpu_kernel void @test_call_external_void_func_f64_imm() #0 {
;
; GFX9-LABEL: test_call_external_void_func_f64_imm:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s3
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s3
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_f64 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_f64 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: v_mov_b32_e32 v1, 0x40100000
; GFX9-NEXT: s_mov_b32 s32, 0
@@ -2164,18 +2164,18 @@ define amdgpu_kernel void @test_call_external_void_func_f64_imm() #0 {
define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() #0 {
; VI-LABEL: test_call_external_void_func_v2f64_imm:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s50, -1
-; VI-NEXT: s_mov_b32 s51, 0xe80000
-; VI-NEXT: s_add_u32 s48, s48, s3
-; VI-NEXT: s_addc_u32 s49, s49, 0
+; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s38, -1
+; VI-NEXT: s_mov_b32 s39, 0xe80000
+; VI-NEXT: s_add_u32 s36, s36, s3
+; VI-NEXT: s_addc_u32 s37, s37, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2f64 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2f64 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
; VI-NEXT: v_mov_b32_e32 v0, 0
; VI-NEXT: v_mov_b32_e32 v1, 2.0
; VI-NEXT: v_mov_b32_e32 v2, 0
@@ -2186,18 +2186,18 @@ define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() #0 {
;
; CI-LABEL: test_call_external_void_func_v2f64_imm:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s50, -1
-; CI-NEXT: s_mov_b32 s51, 0xe8f000
-; CI-NEXT: s_add_u32 s48, s48, s3
-; CI-NEXT: s_addc_u32 s49, s49, 0
+; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s38, -1
+; CI-NEXT: s_mov_b32 s39, 0xe8f000
+; CI-NEXT: s_add_u32 s36, s36, s3
+; CI-NEXT: s_addc_u32 s37, s37, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2f64 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2f64 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
; CI-NEXT: v_mov_b32_e32 v0, 0
; CI-NEXT: v_mov_b32_e32 v1, 2.0
; CI-NEXT: v_mov_b32_e32 v2, 0
@@ -2208,18 +2208,18 @@ define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() #0 {
;
; GFX9-LABEL: test_call_external_void_func_v2f64_imm:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s3
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s3
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2f64 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2f64 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: v_mov_b32_e32 v1, 2.0
; GFX9-NEXT: v_mov_b32_e32 v2, 0
@@ -2265,18 +2265,18 @@ define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() #0 {
define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 {
; VI-LABEL: test_call_external_void_func_v3f64_imm:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s50, -1
-; VI-NEXT: s_mov_b32 s51, 0xe80000
-; VI-NEXT: s_add_u32 s48, s48, s3
-; VI-NEXT: s_addc_u32 s49, s49, 0
+; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s38, -1
+; VI-NEXT: s_mov_b32 s39, 0xe80000
+; VI-NEXT: s_add_u32 s36, s36, s3
+; VI-NEXT: s_addc_u32 s37, s37, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3f64 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f64 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
; VI-NEXT: v_mov_b32_e32 v0, 0
; VI-NEXT: v_mov_b32_e32 v1, 2.0
; VI-NEXT: v_mov_b32_e32 v2, 0
@@ -2289,18 +2289,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 {
;
; CI-LABEL: test_call_external_void_func_v3f64_imm:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s50, -1
-; CI-NEXT: s_mov_b32 s51, 0xe8f000
-; CI-NEXT: s_add_u32 s48, s48, s3
-; CI-NEXT: s_addc_u32 s49, s49, 0
+; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s38, -1
+; CI-NEXT: s_mov_b32 s39, 0xe8f000
+; CI-NEXT: s_add_u32 s36, s36, s3
+; CI-NEXT: s_addc_u32 s37, s37, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3f64 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f64 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
; CI-NEXT: v_mov_b32_e32 v0, 0
; CI-NEXT: v_mov_b32_e32 v1, 2.0
; CI-NEXT: v_mov_b32_e32 v2, 0
@@ -2313,18 +2313,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 {
;
; GFX9-LABEL: test_call_external_void_func_v3f64_imm:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s3
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s3
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3f64 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3f64 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: v_mov_b32_e32 v1, 2.0
; GFX9-NEXT: v_mov_b32_e32 v2, 0
@@ -2375,42 +2375,42 @@ define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 {
define amdgpu_kernel void @test_call_external_void_func_v2i16() #0 {
; VI-LABEL: test_call_external_void_func_v2i16:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s50, -1
-; VI-NEXT: s_mov_b32 s51, 0xe80000
-; VI-NEXT: s_add_u32 s48, s48, s3
+; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s38, -1
+; VI-NEXT: s_mov_b32 s39, 0xe80000
+; VI-NEXT: s_add_u32 s36, s36, s3
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: buffer_load_dword v0, off, s[0:3], 0
-; VI-NEXT: s_addc_u32 s49, s49, 0
+; VI-NEXT: s_addc_u32 s37, s37, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2i16 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i16 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
; VI-NEXT: s_mov_b32 s32, 0
; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; VI-NEXT: s_endpgm
;
; CI-LABEL: test_call_external_void_func_v2i16:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s50, -1
-; CI-NEXT: s_mov_b32 s51, 0xe8f000
-; CI-NEXT: s_add_u32 s48, s48, s3
+; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s38, -1
+; CI-NEXT: s_mov_b32 s39, 0xe8f000
+; CI-NEXT: s_add_u32 s36, s36, s3
; CI-NEXT: s_mov_b32 s3, 0xf000
; CI-NEXT: s_mov_b32 s2, -1
; CI-NEXT: buffer_load_dword v0, off, s[0:3], 0
-; CI-NEXT: s_addc_u32 s49, s49, 0
+; CI-NEXT: s_addc_u32 s37, s37, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2i16 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i16 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
; CI-NEXT: s_mov_b32 s32, 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v0
@@ -2419,21 +2419,21 @@ define amdgpu_kernel void @test_call_external_void_func_v2i16() #0 {
;
; GFX9-LABEL: test_call_external_void_func_v2i16:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s3
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s3
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], 0
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i16 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i16 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX9-NEXT: s_endpgm
@@ -2476,42 +2476,42 @@ define amdgpu_kernel void @test_call_external_void_func_v2i16() #0 {
define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 {
; VI-LABEL: test_call_external_void_func_v3i16:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s50, -1
-; VI-NEXT: s_mov_b32 s51, 0xe80000
-; VI-NEXT: s_add_u32 s48, s48, s3
+; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s38, -1
+; VI-NEXT: s_mov_b32 s39, 0xe80000
+; VI-NEXT: s_add_u32 s36, s36, s3
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0
-; VI-NEXT: s_addc_u32 s49, s49, 0
+; VI-NEXT: s_addc_u32 s37, s37, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3i16 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
; VI-NEXT: s_mov_b32 s32, 0
; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; VI-NEXT: s_endpgm
;
; CI-LABEL: test_call_external_void_func_v3i16:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s50, -1
-; CI-NEXT: s_mov_b32 s51, 0xe8f000
-; CI-NEXT: s_add_u32 s48, s48, s3
+; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s38, -1
+; CI-NEXT: s_mov_b32 s39, 0xe8f000
+; CI-NEXT: s_add_u32 s36, s36, s3
; CI-NEXT: s_mov_b32 s3, 0xf000
; CI-NEXT: s_mov_b32 s2, -1
; CI-NEXT: buffer_load_dwordx2 v[2:3], off, s[0:3], 0
-; CI-NEXT: s_addc_u32 s49, s49, 0
+; CI-NEXT: s_addc_u32 s37, s37, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3i16 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
; CI-NEXT: s_mov_b32 s32, 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: v_alignbit_b32 v1, v3, v2, 16
@@ -2522,21 +2522,21 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 {
;
; GFX9-LABEL: test_call_external_void_func_v3i16:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s3
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s3
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i16 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX9-NEXT: s_endpgm
@@ -2579,42 +2579,42 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 {
define amdgpu_kernel void @test_call_external_void_func_v3f16() #0 {
; VI-LABEL: test_call_external_void_func_v3f16:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s50, -1
-; VI-NEXT: s_mov_b32 s51, 0xe80000
-; VI-NEXT: s_add_u32 s48, s48, s3
+; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s38, -1
+; VI-NEXT: s_mov_b32 s39, 0xe80000
+; VI-NEXT: s_add_u32 s36, s36, s3
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0
-; VI-NEXT: s_addc_u32 s49, s49, 0
+; VI-NEXT: s_addc_u32 s37, s37, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3f16 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
; VI-NEXT: s_mov_b32 s32, 0
; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; VI-NEXT: s_endpgm
;
; CI-LABEL: test_call_external_void_func_v3f16:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s50, -1
-; CI-NEXT: s_mov_b32 s51, 0xe8f000
-; CI-NEXT: s_add_u32 s48, s48, s3
+; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s38, -1
+; CI-NEXT: s_mov_b32 s39, 0xe8f000
+; CI-NEXT: s_add_u32 s36, s36, s3
; CI-NEXT: s_mov_b32 s3, 0xf000
; CI-NEXT: s_mov_b32 s2, -1
; CI-NEXT: buffer_load_dwordx2 v[1:2], off, s[0:3], 0
-; CI-NEXT: s_addc_u32 s49, s49, 0
+; CI-NEXT: s_addc_u32 s37, s37, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3f16 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
; CI-NEXT: s_mov_b32 s32, 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: v_cvt_f32_f16_e32 v0, v1
@@ -2626,21 +2626,21 @@ define amdgpu_kernel void @test_call_external_void_func_v3f16() #0 {
;
; GFX9-LABEL: test_call_external_void_func_v3f16:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s3
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s3
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3f16 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX9-NEXT: s_endpgm
@@ -2683,18 +2683,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3f16() #0 {
define amdgpu_kernel void @test_call_external_void_func_v3i16_imm() #0 {
; VI-LABEL: test_call_external_void_func_v3i16_imm:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s50, -1
-; VI-NEXT: s_mov_b32 s51, 0xe80000
-; VI-NEXT: s_add_u32 s48, s48, s3
-; VI-NEXT: s_addc_u32 s49, s49, 0
+; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s38, -1
+; VI-NEXT: s_mov_b32 s39, 0xe80000
+; VI-NEXT: s_add_u32 s36, s36, s3
+; VI-NEXT: s_addc_u32 s37, s37, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3i16 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
; VI-NEXT: v_mov_b32_e32 v0, 0x20001
; VI-NEXT: v_mov_b32_e32 v1, 3
; VI-NEXT: s_mov_b32 s32, 0
@@ -2703,18 +2703,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16_imm() #0 {
;
; CI-LABEL: test_call_external_void_func_v3i16_imm:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s50, -1
-; CI-NEXT: s_mov_b32 s51, 0xe8f000
-; CI-NEXT: s_add_u32 s48, s48, s3
-; CI-NEXT: s_addc_u32 s49, s49, 0
+; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s38, -1
+; CI-NEXT: s_mov_b32 s39, 0xe8f000
+; CI-NEXT: s_add_u32 s36, s36, s3
+; CI-NEXT: s_addc_u32 s37, s37, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3i16 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
; CI-NEXT: v_mov_b32_e32 v0, 1
; CI-NEXT: v_mov_b32_e32 v1, 2
; CI-NEXT: v_mov_b32_e32 v2, 3
@@ -2724,18 +2724,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16_imm() #0 {
;
; GFX9-LABEL: test_call_external_void_func_v3i16_imm:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s3
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s3
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i16 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: v_mov_b32_e32 v0, 0x20001
; GFX9-NEXT: v_mov_b32_e32 v1, 3
; GFX9-NEXT: s_mov_b32 s32, 0
@@ -2776,18 +2776,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16_imm() #0 {
define amdgpu_kernel void @test_call_external_void_func_v3f16_imm() #0 {
; VI-LABEL: test_call_external_void_func_v3f16_imm:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s50, -1
-; VI-NEXT: s_mov_b32 s51, 0xe80000
-; VI-NEXT: s_add_u32 s48, s48, s3
-; VI-NEXT: s_addc_u32 s49, s49, 0
+; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s38, -1
+; VI-NEXT: s_mov_b32 s39, 0xe80000
+; VI-NEXT: s_add_u32 s36, s36, s3
+; VI-NEXT: s_addc_u32 s37, s37, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3f16 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
; VI-NEXT: v_mov_b32_e32 v0, 0x40003c00
; VI-NEXT: v_mov_b32_e32 v1, 0x4400
; VI-NEXT: s_mov_b32 s32, 0
@@ -2796,18 +2796,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3f16_imm() #0 {
;
; CI-LABEL: test_call_external_void_func_v3f16_imm:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s50, -1
-; CI-NEXT: s_mov_b32 s51, 0xe8f000
-; CI-NEXT: s_add_u32 s48, s48, s3
-; CI-NEXT: s_addc_u32 s49, s49, 0
+; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s38, -1
+; CI-NEXT: s_mov_b32 s39, 0xe8f000
+; CI-NEXT: s_add_u32 s36, s36, s3
+; CI-NEXT: s_addc_u32 s37, s37, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3f16 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
; CI-NEXT: v_mov_b32_e32 v0, 1.0
; CI-NEXT: v_mov_b32_e32 v1, 2.0
; CI-NEXT: v_mov_b32_e32 v2, 4.0
@@ -2817,18 +2817,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3f16_imm() #0 {
;
; GFX9-LABEL: test_call_external_void_func_v3f16_imm:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s3
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s3
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3f16 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: v_mov_b32_e32 v0, 0x40003c00
; GFX9-NEXT: v_mov_b32_e32 v1, 0x4400
; GFX9-NEXT: s_mov_b32 s32, 0
@@ -2870,42 +2870,42 @@ define amdgpu_kernel void @test_call_external_void_func_v3f16_imm() #0 {
define amdgpu_kernel void @test_call_external_void_func_v4i16() #0 {
; VI-LABEL: test_call_external_void_func_v4i16:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s50, -1
-; VI-NEXT: s_mov_b32 s51, 0xe80000
-; VI-NEXT: s_add_u32 s48, s48, s3
+; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s38, -1
+; VI-NEXT: s_mov_b32 s39, 0xe80000
+; VI-NEXT: s_add_u32 s36, s36, s3
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0
-; VI-NEXT: s_addc_u32 s49, s49, 0
+; VI-NEXT: s_addc_u32 s37, s37, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v4i16 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
; VI-NEXT: s_mov_b32 s32, 0
; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; VI-NEXT: s_endpgm
;
; CI-LABEL: test_call_external_void_func_v4i16:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s50, -1
-; CI-NEXT: s_mov_b32 s51, 0xe8f000
-; CI-NEXT: s_add_u32 s48, s48, s3
+; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s38, -1
+; CI-NEXT: s_mov_b32 s39, 0xe8f000
+; CI-NEXT: s_add_u32 s36, s36, s3
; CI-NEXT: s_mov_b32 s3, 0xf000
; CI-NEXT: s_mov_b32 s2, -1
; CI-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0
-; CI-NEXT: s_addc_u32 s49, s49, 0
+; CI-NEXT: s_addc_u32 s37, s37, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v4i16 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
; CI-NEXT: s_mov_b32 s32, 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: v_lshrrev_b32_e32 v4, 16, v0
@@ -2917,21 +2917,21 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16() #0 {
;
; GFX9-LABEL: test_call_external_void_func_v4i16:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s3
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s3
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i16 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX9-NEXT: s_endpgm
@@ -2974,18 +2974,18 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16() #0 {
define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 {
; VI-LABEL: test_call_external_void_func_v4i16_imm:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s50, -1
-; VI-NEXT: s_mov_b32 s51, 0xe80000
-; VI-NEXT: s_add_u32 s48, s48, s3
-; VI-NEXT: s_addc_u32 s49, s49, 0
+; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s38, -1
+; VI-NEXT: s_mov_b32 s39, 0xe80000
+; VI-NEXT: s_add_u32 s36, s36, s3
+; VI-NEXT: s_addc_u32 s37, s37, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v4i16 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
; VI-NEXT: v_mov_b32_e32 v0, 0x20001
; VI-NEXT: v_mov_b32_e32 v1, 0x40003
; VI-NEXT: s_mov_b32 s32, 0
@@ -2994,18 +2994,18 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 {
;
; CI-LABEL: test_call_external_void_func_v4i16_imm:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s50, -1
-; CI-NEXT: s_mov_b32 s51, 0xe8f000
-; CI-NEXT: s_add_u32 s48, s48, s3
-; CI-NEXT: s_addc_u32 s49, s49, 0
+; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s38, -1
+; CI-NEXT: s_mov_b32 s39, 0xe8f000
+; CI-NEXT: s_add_u32 s36, s36, s3
+; CI-NEXT: s_addc_u32 s37, s37, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v4i16 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
; CI-NEXT: v_mov_b32_e32 v0, 1
; CI-NEXT: v_mov_b32_e32 v1, 2
; CI-NEXT: v_mov_b32_e32 v2, 3
@@ -3016,18 +3016,18 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 {
;
; GFX9-LABEL: test_call_external_void_func_v4i16_imm:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s3
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s3
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i16 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: v_mov_b32_e32 v0, 0x20001
; GFX9-NEXT: v_mov_b32_e32 v1, 0x40003
; GFX9-NEXT: s_mov_b32 s32, 0
@@ -3069,42 +3069,42 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 {
define amdgpu_kernel void @test_call_external_void_func_v2f16() #0 {
; VI-LABEL: test_call_external_void_func_v2f16:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s50, -1
-; VI-NEXT: s_mov_b32 s51, 0xe80000
-; VI-NEXT: s_add_u32 s48, s48, s3
+; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s38, -1
+; VI-NEXT: s_mov_b32 s39, 0xe80000
+; VI-NEXT: s_add_u32 s36, s36, s3
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: buffer_load_dword v0, off, s[0:3], 0
-; VI-NEXT: s_addc_u32 s49, s49, 0
+; VI-NEXT: s_addc_u32 s37, s37, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2f16 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2f16 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
; VI-NEXT: s_mov_b32 s32, 0
; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; VI-NEXT: s_endpgm
;
; CI-LABEL: test_call_external_void_func_v2f16:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s50, -1
-; CI-NEXT: s_mov_b32 s51, 0xe8f000
-; CI-NEXT: s_add_u32 s48, s48, s3
+; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s38, -1
+; CI-NEXT: s_mov_b32 s39, 0xe8f000
+; CI-NEXT: s_add_u32 s36, s36, s3
; CI-NEXT: s_mov_b32 s3, 0xf000
; CI-NEXT: s_mov_b32 s2, -1
; CI-NEXT: buffer_load_dword v1, off, s[0:3], 0
-; CI-NEXT: s_addc_u32 s49, s49, 0
+; CI-NEXT: s_addc_u32 s37, s37, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2f16 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2f16 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
; CI-NEXT: s_mov_b32 s32, 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: v_cvt_f32_f16_e32 v0, v1
@@ -3115,21 +3115,21 @@ define amdgpu_kernel void @test_call_external_void_func_v2f16() #0 {
;
; GFX9-LABEL: test_call_external_void_func_v2f16:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s3
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s3
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], 0
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2f16 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2f16 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX9-NEXT: s_endpgm
@@ -3172,63 +3172,63 @@ define amdgpu_kernel void @test_call_external_void_func_v2f16() #0 {
define amdgpu_kernel void @test_call_external_void_func_v2i32() #0 {
; VI-LABEL: test_call_external_void_func_v2i32:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s50, -1
-; VI-NEXT: s_mov_b32 s51, 0xe80000
-; VI-NEXT: s_add_u32 s48, s48, s3
+; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s38, -1
+; VI-NEXT: s_mov_b32 s39, 0xe80000
+; VI-NEXT: s_add_u32 s36, s36, s3
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0
-; VI-NEXT: s_addc_u32 s49, s49, 0
+; VI-NEXT: s_addc_u32 s37, s37, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2i32 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
; VI-NEXT: s_mov_b32 s32, 0
; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; VI-NEXT: s_endpgm
;
; CI-LABEL: test_call_external_void_func_v2i32:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s50, -1
-; CI-NEXT: s_mov_b32 s51, 0xe8f000
-; CI-NEXT: s_add_u32 s48, s48, s3
+; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s38, -1
+; CI-NEXT: s_mov_b32 s39, 0xe8f000
+; CI-NEXT: s_add_u32 s36, s36, s3
; CI-NEXT: s_mov_b32 s3, 0xf000
; CI-NEXT: s_mov_b32 s2, -1
; CI-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0
-; CI-NEXT: s_addc_u32 s49, s49, 0
+; CI-NEXT: s_addc_u32 s37, s37, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2i32 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
; CI-NEXT: s_mov_b32 s32, 0
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
; GFX9-LABEL: test_call_external_void_func_v2i32:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s3
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s3
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i32 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX9-NEXT: s_endpgm
@@ -3271,18 +3271,18 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32() #0 {
define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() #0 {
; VI-LABEL: test_call_external_void_func_v2i32_imm:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s50, -1
-; VI-NEXT: s_mov_b32 s51, 0xe80000
-; VI-NEXT: s_add_u32 s48, s48, s3
-; VI-NEXT: s_addc_u32 s49, s49, 0
+; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s38, -1
+; VI-NEXT: s_mov_b32 s39, 0xe80000
+; VI-NEXT: s_add_u32 s36, s36, s3
+; VI-NEXT: s_addc_u32 s37, s37, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2i32 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
; VI-NEXT: v_mov_b32_e32 v0, 1
; VI-NEXT: v_mov_b32_e32 v1, 2
; VI-NEXT: s_mov_b32 s32, 0
@@ -3291,18 +3291,18 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() #0 {
;
; CI-LABEL: test_call_external_void_func_v2i32_imm:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s50, -1
-; CI-NEXT: s_mov_b32 s51, 0xe8f000
-; CI-NEXT: s_add_u32 s48, s48, s3
-; CI-NEXT: s_addc_u32 s49, s49, 0
+; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s38, -1
+; CI-NEXT: s_mov_b32 s39, 0xe8f000
+; CI-NEXT: s_add_u32 s36, s36, s3
+; CI-NEXT: s_addc_u32 s37, s37, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2i32 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
; CI-NEXT: v_mov_b32_e32 v0, 1
; CI-NEXT: v_mov_b32_e32 v1, 2
; CI-NEXT: s_mov_b32 s32, 0
@@ -3311,18 +3311,18 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() #0 {
;
; GFX9-LABEL: test_call_external_void_func_v2i32_imm:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s3
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s3
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i32 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: v_mov_b32_e32 v0, 1
; GFX9-NEXT: v_mov_b32_e32 v1, 2
; GFX9-NEXT: s_mov_b32 s32, 0
@@ -3363,18 +3363,18 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() #0 {
define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 {
; VI-LABEL: test_call_external_void_func_v3i32_imm:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s50, -1
-; VI-NEXT: s_mov_b32 s51, 0xe80000
-; VI-NEXT: s_add_u32 s48, s48, s5
-; VI-NEXT: s_addc_u32 s49, s49, 0
+; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s38, -1
+; VI-NEXT: s_mov_b32 s39, 0xe80000
+; VI-NEXT: s_add_u32 s36, s36, s5
+; VI-NEXT: s_addc_u32 s37, s37, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3i32 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
; VI-NEXT: v_mov_b32_e32 v0, 3
; VI-NEXT: v_mov_b32_e32 v1, 4
; VI-NEXT: v_mov_b32_e32 v2, 5
@@ -3384,18 +3384,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 {
;
; CI-LABEL: test_call_external_void_func_v3i32_imm:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s50, -1
-; CI-NEXT: s_mov_b32 s51, 0xe8f000
-; CI-NEXT: s_add_u32 s48, s48, s5
-; CI-NEXT: s_addc_u32 s49, s49, 0
+; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s38, -1
+; CI-NEXT: s_mov_b32 s39, 0xe8f000
+; CI-NEXT: s_add_u32 s36, s36, s5
+; CI-NEXT: s_addc_u32 s37, s37, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3i32 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
; CI-NEXT: v_mov_b32_e32 v0, 3
; CI-NEXT: v_mov_b32_e32 v1, 4
; CI-NEXT: v_mov_b32_e32 v2, 5
@@ -3405,18 +3405,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 {
;
; GFX9-LABEL: test_call_external_void_func_v3i32_imm:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s5
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s5
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i32 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: v_mov_b32_e32 v0, 3
; GFX9-NEXT: v_mov_b32_e32 v1, 4
; GFX9-NEXT: v_mov_b32_e32 v2, 5
@@ -3460,18 +3460,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 {
define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 {
; VI-LABEL: test_call_external_void_func_v3i32_i32:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s50, -1
-; VI-NEXT: s_mov_b32 s51, 0xe80000
-; VI-NEXT: s_add_u32 s48, s48, s5
-; VI-NEXT: s_addc_u32 s49, s49, 0
+; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s38, -1
+; VI-NEXT: s_mov_b32 s39, 0xe80000
+; VI-NEXT: s_add_u32 s36, s36, s5
+; VI-NEXT: s_addc_u32 s37, s37, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3i32_i32 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32_i32 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
; VI-NEXT: v_mov_b32_e32 v0, 3
; VI-NEXT: v_mov_b32_e32 v1, 4
; VI-NEXT: v_mov_b32_e32 v2, 5
@@ -3482,18 +3482,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 {
;
; CI-LABEL: test_call_external_void_func_v3i32_i32:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s50, -1
-; CI-NEXT: s_mov_b32 s51, 0xe8f000
-; CI-NEXT: s_add_u32 s48, s48, s5
-; CI-NEXT: s_addc_u32 s49, s49, 0
+; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s38, -1
+; CI-NEXT: s_mov_b32 s39, 0xe8f000
+; CI-NEXT: s_add_u32 s36, s36, s5
+; CI-NEXT: s_addc_u32 s37, s37, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3i32_i32 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32_i32 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
; CI-NEXT: v_mov_b32_e32 v0, 3
; CI-NEXT: v_mov_b32_e32 v1, 4
; CI-NEXT: v_mov_b32_e32 v2, 5
@@ -3504,18 +3504,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 {
;
; GFX9-LABEL: test_call_external_void_func_v3i32_i32:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s5
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s5
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i32_i32 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32_i32 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: v_mov_b32_e32 v0, 3
; GFX9-NEXT: v_mov_b32_e32 v1, 4
; GFX9-NEXT: v_mov_b32_e32 v2, 5
@@ -3561,63 +3561,63 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 {
define amdgpu_kernel void @test_call_external_void_func_v4i32() #0 {
; VI-LABEL: test_call_external_void_func_v4i32:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s50, -1
-; VI-NEXT: s_mov_b32 s51, 0xe80000
-; VI-NEXT: s_add_u32 s48, s48, s3
+; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s38, -1
+; VI-NEXT: s_mov_b32 s39, 0xe80000
+; VI-NEXT: s_add_u32 s36, s36, s3
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
-; VI-NEXT: s_addc_u32 s49, s49, 0
+; VI-NEXT: s_addc_u32 s37, s37, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v4i32 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
; VI-NEXT: s_mov_b32 s32, 0
; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; VI-NEXT: s_endpgm
;
; CI-LABEL: test_call_external_void_func_v4i32:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s50, -1
-; CI-NEXT: s_mov_b32 s51, 0xe8f000
-; CI-NEXT: s_add_u32 s48, s48, s3
+; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s38, -1
+; CI-NEXT: s_mov_b32 s39, 0xe8f000
+; CI-NEXT: s_add_u32 s36, s36, s3
; CI-NEXT: s_mov_b32 s3, 0xf000
; CI-NEXT: s_mov_b32 s2, -1
; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
-; CI-NEXT: s_addc_u32 s49, s49, 0
+; CI-NEXT: s_addc_u32 s37, s37, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v4i32 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
; CI-NEXT: s_mov_b32 s32, 0
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
; GFX9-LABEL: test_call_external_void_func_v4i32:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s3
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s3
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i32 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX9-NEXT: s_endpgm
@@ -3660,40 +3660,40 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32() #0 {
define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() #0 {
; VI-LABEL: test_call_external_void_func_v4i32_imm:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s50, -1
-; VI-NEXT: s_mov_b32 s51, 0xe80000
-; VI-NEXT: s_add_u32 s48, s48, s3
-; VI-NEXT: s_addc_u32 s49, s49, 0
+; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s38, -1
+; VI-NEXT: s_mov_b32 s39, 0xe80000
+; VI-NEXT: s_add_u32 s36, s36, s3
+; VI-NEXT: s_addc_u32 s37, s37, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v4i32 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
; VI-NEXT: v_mov_b32_e32 v0, 1
; VI-NEXT: v_mov_b32_e32 v1, 2
; VI-NEXT: v_mov_b32_e32 v2, 3
; VI-NEXT: v_mov_b32_e32 v3, 4
; VI-NEXT: s_mov_b32 s32, 0
; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; VI-NEXT: s_endpgm
-;
-; CI-LABEL: test_call_external_void_func_v4i32_imm:
-; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s50, -1
-; CI-NEXT: s_mov_b32 s51, 0xe8f000
-; CI-NEXT: s_add_u32 s48, s48, s3
-; CI-NEXT: s_addc_u32 s49, s49, 0
+; VI-NEXT: s_endpgm
+;
+; CI-LABEL: test_call_external_void_func_v4i32_imm:
+; CI: ; %bb.0:
+; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s38, -1
+; CI-NEXT: s_mov_b32 s39, 0xe8f000
+; CI-NEXT: s_add_u32 s36, s36, s3
+; CI-NEXT: s_addc_u32 s37, s37, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v4i32 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
; CI-NEXT: v_mov_b32_e32 v0, 1
; CI-NEXT: v_mov_b32_e32 v1, 2
; CI-NEXT: v_mov_b32_e32 v2, 3
@@ -3704,18 +3704,18 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() #0 {
;
; GFX9-LABEL: test_call_external_void_func_v4i32_imm:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s3
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s3
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i32 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: v_mov_b32_e32 v0, 1
; GFX9-NEXT: v_mov_b32_e32 v1, 2
; GFX9-NEXT: v_mov_b32_e32 v2, 3
@@ -3761,18 +3761,18 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() #0 {
define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 {
; VI-LABEL: test_call_external_void_func_v5i32_imm:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s50, -1
-; VI-NEXT: s_mov_b32 s51, 0xe80000
-; VI-NEXT: s_add_u32 s48, s48, s3
-; VI-NEXT: s_addc_u32 s49, s49, 0
+; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s38, -1
+; VI-NEXT: s_mov_b32 s39, 0xe80000
+; VI-NEXT: s_add_u32 s36, s36, s3
+; VI-NEXT: s_addc_u32 s37, s37, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v5i32 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v5i32 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
; VI-NEXT: v_mov_b32_e32 v0, 1
; VI-NEXT: v_mov_b32_e32 v1, 2
; VI-NEXT: v_mov_b32_e32 v2, 3
@@ -3784,18 +3784,18 @@ define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 {
;
; CI-LABEL: test_call_external_void_func_v5i32_imm:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s50, -1
-; CI-NEXT: s_mov_b32 s51, 0xe8f000
-; CI-NEXT: s_add_u32 s48, s48, s3
-; CI-NEXT: s_addc_u32 s49, s49, 0
+; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s38, -1
+; CI-NEXT: s_mov_b32 s39, 0xe8f000
+; CI-NEXT: s_add_u32 s36, s36, s3
+; CI-NEXT: s_addc_u32 s37, s37, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v5i32 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v5i32 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
; CI-NEXT: v_mov_b32_e32 v0, 1
; CI-NEXT: v_mov_b32_e32 v1, 2
; CI-NEXT: v_mov_b32_e32 v2, 3
@@ -3807,18 +3807,18 @@ define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 {
;
; GFX9-LABEL: test_call_external_void_func_v5i32_imm:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s3
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s3
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v5i32 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v5i32 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: v_mov_b32_e32 v0, 1
; GFX9-NEXT: v_mov_b32_e32 v1, 2
; GFX9-NEXT: v_mov_b32_e32 v2, 3
@@ -3867,72 +3867,72 @@ define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 {
define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 {
; VI-LABEL: test_call_external_void_func_v8i32:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
-; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s50, -1
-; VI-NEXT: s_mov_b32 s51, 0xe80000
-; VI-NEXT: s_add_u32 s48, s48, s3
+; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s38, -1
+; VI-NEXT: s_mov_b32 s39, 0xe80000
+; VI-NEXT: s_add_u32 s36, s36, s3
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
; VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16
-; VI-NEXT: s_addc_u32 s49, s49, 0
-; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; VI-NEXT: s_addc_u32 s37, s37, 0
+; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v8i32 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
; VI-NEXT: s_mov_b32 s32, 0
; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; VI-NEXT: s_endpgm
;
; CI-LABEL: test_call_external_void_func_v8i32:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
-; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s50, -1
-; CI-NEXT: s_mov_b32 s51, 0xe8f000
-; CI-NEXT: s_add_u32 s48, s48, s3
+; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s38, -1
+; CI-NEXT: s_mov_b32 s39, 0xe8f000
+; CI-NEXT: s_add_u32 s36, s36, s3
; CI-NEXT: s_mov_b32 s3, 0xf000
; CI-NEXT: s_mov_b32 s2, -1
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
; CI-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16
-; CI-NEXT: s_addc_u32 s49, s49, 0
-; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; CI-NEXT: s_addc_u32 s37, s37, 0
+; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v8i32 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
; CI-NEXT: s_mov_b32 s32, 0
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
; GFX9-LABEL: test_call_external_void_func_v8i32:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s3
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s3
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
; GFX9-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v8i32 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX9-NEXT: s_endpgm
@@ -3983,18 +3983,18 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 {
define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 {
; VI-LABEL: test_call_external_void_func_v8i32_imm:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s50, -1
-; VI-NEXT: s_mov_b32 s51, 0xe80000
-; VI-NEXT: s_add_u32 s48, s48, s3
-; VI-NEXT: s_addc_u32 s49, s49, 0
+; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s38, -1
+; VI-NEXT: s_mov_b32 s39, 0xe80000
+; VI-NEXT: s_add_u32 s36, s36, s3
+; VI-NEXT: s_addc_u32 s37, s37, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v8i32 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
; VI-NEXT: v_mov_b32_e32 v0, 1
; VI-NEXT: v_mov_b32_e32 v1, 2
; VI-NEXT: v_mov_b32_e32 v2, 3
@@ -4009,18 +4009,18 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 {
;
; CI-LABEL: test_call_external_void_func_v8i32_imm:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s50, -1
-; CI-NEXT: s_mov_b32 s51, 0xe8f000
-; CI-NEXT: s_add_u32 s48, s48, s3
-; CI-NEXT: s_addc_u32 s49, s49, 0
+; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s38, -1
+; CI-NEXT: s_mov_b32 s39, 0xe8f000
+; CI-NEXT: s_add_u32 s36, s36, s3
+; CI-NEXT: s_addc_u32 s37, s37, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v8i32 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
; CI-NEXT: v_mov_b32_e32 v0, 1
; CI-NEXT: v_mov_b32_e32 v1, 2
; CI-NEXT: v_mov_b32_e32 v2, 3
@@ -4035,18 +4035,18 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 {
;
; GFX9-LABEL: test_call_external_void_func_v8i32_imm:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s3
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s3
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v8i32 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: v_mov_b32_e32 v0, 1
; GFX9-NEXT: v_mov_b32_e32 v1, 2
; GFX9-NEXT: v_mov_b32_e32 v2, 3
@@ -4102,13 +4102,13 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 {
define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 {
; VI-LABEL: test_call_external_void_func_v16i32:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
-; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s50, -1
-; VI-NEXT: s_mov_b32 s51, 0xe80000
-; VI-NEXT: s_add_u32 s48, s48, s3
+; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s38, -1
+; VI-NEXT: s_mov_b32 s39, 0xe80000
+; VI-NEXT: s_add_u32 s36, s36, s3
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: s_waitcnt lgkmcnt(0)
@@ -4116,25 +4116,25 @@ define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 {
; VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16
; VI-NEXT: buffer_load_dwordx4 v[8:11], off, s[0:3], 0 offset:32
; VI-NEXT: buffer_load_dwordx4 v[12:15], off, s[0:3], 0 offset:48
-; VI-NEXT: s_addc_u32 s49, s49, 0
-; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; VI-NEXT: s_addc_u32 s37, s37, 0
+; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v16i32 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v16i32 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
; VI-NEXT: s_mov_b32 s32, 0
; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; VI-NEXT: s_endpgm
;
; CI-LABEL: test_call_external_void_func_v16i32:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
-; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s50, -1
-; CI-NEXT: s_mov_b32 s51, 0xe8f000
-; CI-NEXT: s_add_u32 s48, s48, s3
+; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s38, -1
+; CI-NEXT: s_mov_b32 s39, 0xe8f000
+; CI-NEXT: s_add_u32 s36, s36, s3
; CI-NEXT: s_mov_b32 s3, 0xf000
; CI-NEXT: s_mov_b32 s2, -1
; CI-NEXT: s_waitcnt lgkmcnt(0)
@@ -4142,25 +4142,25 @@ define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 {
; CI-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16
; CI-NEXT: buffer_load_dwordx4 v[8:11], off, s[0:3], 0 offset:32
; CI-NEXT: buffer_load_dwordx4 v[12:15], off, s[0:3], 0 offset:48
-; CI-NEXT: s_addc_u32 s49, s49, 0
-; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; CI-NEXT: s_addc_u32 s37, s37, 0
+; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v16i32 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v16i32 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
; CI-NEXT: s_mov_b32 s32, 0
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
; GFX9-LABEL: test_call_external_void_func_v16i32:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s3
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s3
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
@@ -4168,12 +4168,12 @@ define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 {
; GFX9-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16
; GFX9-NEXT: buffer_load_dwordx4 v[8:11], off, s[0:3], 0 offset:32
; GFX9-NEXT: buffer_load_dwordx4 v[12:15], off, s[0:3], 0 offset:48
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v16i32 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v16i32 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX9-NEXT: s_endpgm
@@ -4231,8 +4231,8 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 {
; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
; VI-NEXT: s_mov_b32 s7, 0xf000
; VI-NEXT: s_mov_b32 s6, -1
-; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112
; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0
@@ -4242,19 +4242,19 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 {
; VI-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64
; VI-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80
; VI-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96
-; VI-NEXT: s_mov_b32 s50, -1
-; VI-NEXT: s_mov_b32 s51, 0xe80000
-; VI-NEXT: s_add_u32 s48, s48, s3
-; VI-NEXT: s_addc_u32 s49, s49, 0
+; VI-NEXT: s_mov_b32 s38, -1
+; VI-NEXT: s_mov_b32 s39, 0xe80000
+; VI-NEXT: s_add_u32 s36, s36, s3
+; VI-NEXT: s_addc_u32 s37, s37, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
; VI-NEXT: s_mov_b32 s32, 0
; VI-NEXT: s_getpc_b64 s[8:9]
; VI-NEXT: s_add_u32 s8, s8, external_void_func_v32i32 at rel32@lo+4
; VI-NEXT: s_addc_u32 s9, s9, external_void_func_v32i32 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
; VI-NEXT: s_waitcnt vmcnt(7)
-; VI-NEXT: buffer_store_dword v31, off, s[48:51], s32
+; VI-NEXT: buffer_store_dword v31, off, s[36:39], s32
; VI-NEXT: s_swappc_b64 s[30:31], s[8:9]
; VI-NEXT: s_endpgm
;
@@ -4263,8 +4263,8 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 {
; CI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
-; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112
; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0
@@ -4274,19 +4274,19 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 {
; CI-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64
; CI-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80
; CI-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96
-; CI-NEXT: s_mov_b32 s50, -1
-; CI-NEXT: s_mov_b32 s51, 0xe8f000
-; CI-NEXT: s_add_u32 s48, s48, s3
-; CI-NEXT: s_addc_u32 s49, s49, 0
+; CI-NEXT: s_mov_b32 s38, -1
+; CI-NEXT: s_mov_b32 s39, 0xe8f000
+; CI-NEXT: s_add_u32 s36, s36, s3
+; CI-NEXT: s_addc_u32 s37, s37, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
; CI-NEXT: s_mov_b32 s32, 0
; CI-NEXT: s_getpc_b64 s[8:9]
; CI-NEXT: s_add_u32 s8, s8, external_void_func_v32i32 at rel32@lo+4
; CI-NEXT: s_addc_u32 s9, s9, external_void_func_v32i32 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
; CI-NEXT: s_waitcnt vmcnt(7)
-; CI-NEXT: buffer_store_dword v31, off, s[48:51], s32
+; CI-NEXT: buffer_store_dword v31, off, s[36:39], s32
; CI-NEXT: s_swappc_b64 s[30:31], s[8:9]
; CI-NEXT: s_endpgm
;
@@ -4295,8 +4295,8 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 {
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
; GFX9-NEXT: s_mov_b32 s7, 0xf000
; GFX9-NEXT: s_mov_b32 s6, -1
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112
; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0
@@ -4306,19 +4306,19 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 {
; GFX9-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64
; GFX9-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80
; GFX9-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s3
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s3
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_getpc_b64 s[8:9]
; GFX9-NEXT: s_add_u32 s8, s8, external_void_func_v32i32 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s9, s9, external_void_func_v32i32 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_waitcnt vmcnt(7)
-; GFX9-NEXT: buffer_store_dword v31, off, s[48:51], s32
+; GFX9-NEXT: buffer_store_dword v31, off, s[36:39], s32
; GFX9-NEXT: s_swappc_b64 s[30:31], s[8:9]
; GFX9-NEXT: s_endpgm
;
@@ -4384,15 +4384,15 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 {
define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 {
; VI-LABEL: test_call_external_void_func_v32i32_i32:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s50, -1
-; VI-NEXT: s_mov_b32 s51, 0xe80000
-; VI-NEXT: s_add_u32 s48, s48, s5
+; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s38, -1
+; VI-NEXT: s_mov_b32 s39, 0xe80000
+; VI-NEXT: s_add_u32 s36, s36, s5
; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
; VI-NEXT: s_mov_b32 s7, 0xf000
; VI-NEXT: s_mov_b32 s6, -1
-; VI-NEXT: s_addc_u32 s49, s49, 0
+; VI-NEXT: s_addc_u32 s37, s37, 0
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_load_dword v32, off, s[4:7], 0
; VI-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112
@@ -4404,30 +4404,30 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 {
; VI-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80
; VI-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
; VI-NEXT: s_mov_b32 s32, 0
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v32i32_i32 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v32i32_i32 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
; VI-NEXT: s_waitcnt vmcnt(8)
-; VI-NEXT: buffer_store_dword v32, off, s[48:51], s32 offset:4
+; VI-NEXT: buffer_store_dword v32, off, s[36:39], s32 offset:4
; VI-NEXT: s_waitcnt vmcnt(8)
-; VI-NEXT: buffer_store_dword v31, off, s[48:51], s32
+; VI-NEXT: buffer_store_dword v31, off, s[36:39], s32
; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; VI-NEXT: s_endpgm
;
; CI-LABEL: test_call_external_void_func_v32i32_i32:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s50, -1
-; CI-NEXT: s_mov_b32 s51, 0xe8f000
-; CI-NEXT: s_add_u32 s48, s48, s5
+; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s38, -1
+; CI-NEXT: s_mov_b32 s39, 0xe8f000
+; CI-NEXT: s_add_u32 s36, s36, s5
; CI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
; CI-NEXT: s_mov_b32 s7, 0xf000
; CI-NEXT: s_mov_b32 s6, -1
-; CI-NEXT: s_addc_u32 s49, s49, 0
+; CI-NEXT: s_addc_u32 s37, s37, 0
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: buffer_load_dword v32, off, s[4:7], 0
; CI-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112
@@ -4439,30 +4439,30 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 {
; CI-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80
; CI-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
; CI-NEXT: s_mov_b32 s32, 0
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v32i32_i32 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v32i32_i32 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
; CI-NEXT: s_waitcnt vmcnt(8)
-; CI-NEXT: buffer_store_dword v32, off, s[48:51], s32 offset:4
+; CI-NEXT: buffer_store_dword v32, off, s[36:39], s32 offset:4
; CI-NEXT: s_waitcnt vmcnt(8)
-; CI-NEXT: buffer_store_dword v31, off, s[48:51], s32
+; CI-NEXT: buffer_store_dword v31, off, s[36:39], s32
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
; GFX9-LABEL: test_call_external_void_func_v32i32_i32:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s5
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s5
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
; GFX9-NEXT: s_mov_b32 s7, 0xf000
; GFX9-NEXT: s_mov_b32 s6, -1
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: buffer_load_dword v32, off, s[4:7], 0
; GFX9-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112
@@ -4474,16 +4474,16 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 {
; GFX9-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80
; GFX9-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v32i32_i32 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v32i32_i32 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_waitcnt vmcnt(8)
-; GFX9-NEXT: buffer_store_dword v32, off, s[48:51], s32 offset:4
+; GFX9-NEXT: buffer_store_dword v32, off, s[36:39], s32 offset:4
; GFX9-NEXT: s_waitcnt vmcnt(8)
-; GFX9-NEXT: buffer_store_dword v31, off, s[48:51], s32
+; GFX9-NEXT: buffer_store_dword v31, off, s[36:39], s32
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX9-NEXT: s_endpgm
;
@@ -4557,89 +4557,89 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 {
define amdgpu_kernel void @test_call_external_i32_func_i32_imm(ptr addrspace(1) %out) #0 {
; VI-LABEL: test_call_external_i32_func_i32_imm:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s66, -1
-; VI-NEXT: s_mov_b32 s67, 0xe80000
-; VI-NEXT: s_add_u32 s64, s64, s5
-; VI-NEXT: s_load_dwordx2 s[48:49], s[2:3], 0x24
-; VI-NEXT: s_addc_u32 s65, s65, 0
+; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s50, -1
+; VI-NEXT: s_mov_b32 s51, 0xe80000
+; VI-NEXT: s_add_u32 s48, s48, s5
+; VI-NEXT: s_load_dwordx2 s[36:37], s[2:3], 0x24
+; VI-NEXT: s_addc_u32 s49, s49, 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[64:65]
+; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_i32_func_i32 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_i32_func_i32 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[66:67]
+; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
; VI-NEXT: v_mov_b32_e32 v0, 42
; VI-NEXT: s_mov_b32 s32, 0
-; VI-NEXT: s_mov_b32 s51, 0xf000
-; VI-NEXT: s_mov_b32 s50, -1
+; VI-NEXT: s_mov_b32 s39, 0xf000
+; VI-NEXT: s_mov_b32 s38, -1
; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; VI-NEXT: buffer_store_dword v0, off, s[48:51], 0
+; VI-NEXT: buffer_store_dword v0, off, s[36:39], 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: s_endpgm
;
; CI-LABEL: test_call_external_i32_func_i32_imm:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s66, -1
-; CI-NEXT: s_mov_b32 s67, 0xe8f000
-; CI-NEXT: s_add_u32 s64, s64, s5
-; CI-NEXT: s_load_dwordx2 s[48:49], s[2:3], 0x9
-; CI-NEXT: s_addc_u32 s65, s65, 0
+; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s50, -1
+; CI-NEXT: s_mov_b32 s51, 0xe8f000
+; CI-NEXT: s_add_u32 s48, s48, s5
+; CI-NEXT: s_load_dwordx2 s[36:37], s[2:3], 0x9
+; CI-NEXT: s_addc_u32 s49, s49, 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[64:65]
+; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_i32_func_i32 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_i32_func_i32 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[66:67]
+; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
; CI-NEXT: v_mov_b32_e32 v0, 42
; CI-NEXT: s_mov_b32 s32, 0
-; CI-NEXT: s_mov_b32 s51, 0xf000
-; CI-NEXT: s_mov_b32 s50, -1
+; CI-NEXT: s_mov_b32 s39, 0xf000
+; CI-NEXT: s_mov_b32 s38, -1
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; CI-NEXT: buffer_store_dword v0, off, s[48:51], 0
+; CI-NEXT: buffer_store_dword v0, off, s[36:39], 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: s_endpgm
;
; GFX9-LABEL: test_call_external_i32_func_i32_imm:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s66, -1
-; GFX9-NEXT: s_mov_b32 s67, 0xe00000
-; GFX9-NEXT: s_add_u32 s64, s64, s5
-; GFX9-NEXT: s_load_dwordx2 s[48:49], s[2:3], 0x24
-; GFX9-NEXT: s_addc_u32 s65, s65, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s5
+; GFX9-NEXT: s_load_dwordx2 s[36:37], s[2:3], 0x24
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_i32_func_i32 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_i32_func_i32 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: v_mov_b32_e32 v0, 42
; GFX9-NEXT: s_mov_b32 s32, 0
-; GFX9-NEXT: s_mov_b32 s51, 0xf000
-; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xf000
+; GFX9-NEXT: s_mov_b32 s38, -1
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: buffer_store_dword v0, off, s[48:51], 0
+; GFX9-NEXT: buffer_store_dword v0, off, s[36:39], 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_endpgm
;
; GFX11-LABEL: test_call_external_i32_func_i32_imm:
; GFX11: ; %bb.0:
-; GFX11-NEXT: s_load_b64 s[48:49], s[2:3], 0x24
+; GFX11-NEXT: s_load_b64 s[36:37], s[2:3], 0x24
; GFX11-NEXT: v_mov_b32_e32 v0, 42
; GFX11-NEXT: s_getpc_b64 s[2:3]
; GFX11-NEXT: s_add_u32 s2, s2, external_i32_func_i32 at rel32@lo+4
; GFX11-NEXT: s_addc_u32 s3, s3, external_i32_func_i32 at rel32@hi+12
; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1]
; GFX11-NEXT: s_mov_b32 s32, 0
-; GFX11-NEXT: s_mov_b32 s51, 0x31016000
-; GFX11-NEXT: s_mov_b32 s50, -1
+; GFX11-NEXT: s_mov_b32 s39, 0x31016000
+; GFX11-NEXT: s_mov_b32 s38, -1
; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
-; GFX11-NEXT: buffer_store_b32 v0, off, s[48:51], 0 dlc
+; GFX11-NEXT: buffer_store_b32 v0, off, s[36:39], 0 dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_endpgm
;
@@ -4647,7 +4647,7 @@ define amdgpu_kernel void @test_call_external_i32_func_i32_imm(ptr addrspace(1)
; HSA: ; %bb.0:
; HSA-NEXT: s_add_i32 s8, s8, s11
; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8
-; HSA-NEXT: s_load_dwordx2 s[48:49], s[6:7], 0x0
+; HSA-NEXT: s_load_dwordx2 s[36:37], s[6:7], 0x0
; HSA-NEXT: s_add_u32 s0, s0, s11
; HSA-NEXT: s_addc_u32 s1, s1, 0
; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9
@@ -4657,10 +4657,10 @@ define amdgpu_kernel void @test_call_external_i32_func_i32_imm(ptr addrspace(1)
; HSA-NEXT: s_mov_b64 s[6:7], s[4:5]
; HSA-NEXT: v_mov_b32_e32 v0, 42
; HSA-NEXT: s_mov_b32 s32, 0
-; HSA-NEXT: s_mov_b32 s51, 0x1100f000
-; HSA-NEXT: s_mov_b32 s50, -1
+; HSA-NEXT: s_mov_b32 s39, 0x1100f000
+; HSA-NEXT: s_mov_b32 s38, -1
; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
-; HSA-NEXT: buffer_store_dword v0, off, s[48:51], 0
+; HSA-NEXT: buffer_store_dword v0, off, s[36:39], 0
; HSA-NEXT: s_waitcnt vmcnt(0)
; HSA-NEXT: s_endpgm
%val = call i32 @external_i32_func_i32(i32 42)
@@ -4671,72 +4671,72 @@ define amdgpu_kernel void @test_call_external_i32_func_i32_imm(ptr addrspace(1)
define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 {
; VI-LABEL: test_call_external_void_func_struct_i8_i32:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
-; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s50, -1
-; VI-NEXT: s_mov_b32 s51, 0xe80000
-; VI-NEXT: s_add_u32 s48, s48, s3
+; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s38, -1
+; VI-NEXT: s_mov_b32 s39, 0xe80000
+; VI-NEXT: s_add_u32 s36, s36, s3
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0
; VI-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:4
-; VI-NEXT: s_addc_u32 s49, s49, 0
-; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; VI-NEXT: s_addc_u32 s37, s37, 0
+; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_struct_i8_i32 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_struct_i8_i32 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
; VI-NEXT: s_mov_b32 s32, 0
; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; VI-NEXT: s_endpgm
;
; CI-LABEL: test_call_external_void_func_struct_i8_i32:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
-; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s50, -1
-; CI-NEXT: s_mov_b32 s51, 0xe8f000
-; CI-NEXT: s_add_u32 s48, s48, s3
+; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s38, -1
+; CI-NEXT: s_mov_b32 s39, 0xe8f000
+; CI-NEXT: s_add_u32 s36, s36, s3
; CI-NEXT: s_mov_b32 s3, 0xf000
; CI-NEXT: s_mov_b32 s2, -1
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0
; CI-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:4
-; CI-NEXT: s_addc_u32 s49, s49, 0
-; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; CI-NEXT: s_addc_u32 s37, s37, 0
+; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_struct_i8_i32 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_struct_i8_i32 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
; CI-NEXT: s_mov_b32 s32, 0
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
; GFX9-LABEL: test_call_external_void_func_struct_i8_i32:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s3
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s3
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], 0
; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:4
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_struct_i8_i32 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_struct_i8_i32 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX9-NEXT: s_endpgm
@@ -4787,86 +4787,86 @@ define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 {
define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0 {
; VI-LABEL: test_call_external_void_func_byval_struct_i8_i32:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s50, -1
-; VI-NEXT: s_mov_b32 s51, 0xe80000
-; VI-NEXT: s_add_u32 s48, s48, s3
-; VI-NEXT: s_addc_u32 s49, s49, 0
+; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s38, -1
+; VI-NEXT: s_mov_b32 s39, 0xe80000
+; VI-NEXT: s_add_u32 s36, s36, s3
+; VI-NEXT: s_addc_u32 s37, s37, 0
; VI-NEXT: v_mov_b32_e32 v0, 3
-; VI-NEXT: buffer_store_byte v0, off, s[48:51], 0
+; VI-NEXT: buffer_store_byte v0, off, s[36:39], 0
; VI-NEXT: v_mov_b32_e32 v0, 8
-; VI-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:4
-; VI-NEXT: buffer_load_dword v0, off, s[48:51], 0 offset:4
-; VI-NEXT: buffer_load_dword v1, off, s[48:51], 0
+; VI-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4
+; VI-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:4
+; VI-NEXT: buffer_load_dword v1, off, s[36:39], 0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
; VI-NEXT: s_movk_i32 s32, 0x400
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_byval_struct_i8_i32 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_byval_struct_i8_i32 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
; VI-NEXT: s_waitcnt vmcnt(1)
-; VI-NEXT: buffer_store_dword v0, off, s[48:51], s32 offset:4
+; VI-NEXT: buffer_store_dword v0, off, s[36:39], s32 offset:4
; VI-NEXT: s_waitcnt vmcnt(1)
-; VI-NEXT: buffer_store_dword v1, off, s[48:51], s32
+; VI-NEXT: buffer_store_dword v1, off, s[36:39], s32
; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; VI-NEXT: s_endpgm
;
; CI-LABEL: test_call_external_void_func_byval_struct_i8_i32:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s50, -1
-; CI-NEXT: s_mov_b32 s51, 0xe8f000
-; CI-NEXT: s_add_u32 s48, s48, s3
-; CI-NEXT: s_addc_u32 s49, s49, 0
+; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s38, -1
+; CI-NEXT: s_mov_b32 s39, 0xe8f000
+; CI-NEXT: s_add_u32 s36, s36, s3
+; CI-NEXT: s_addc_u32 s37, s37, 0
; CI-NEXT: v_mov_b32_e32 v0, 3
-; CI-NEXT: buffer_store_byte v0, off, s[48:51], 0
+; CI-NEXT: buffer_store_byte v0, off, s[36:39], 0
; CI-NEXT: v_mov_b32_e32 v0, 8
-; CI-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:4
-; CI-NEXT: buffer_load_dword v0, off, s[48:51], 0 offset:4
-; CI-NEXT: buffer_load_dword v1, off, s[48:51], 0
+; CI-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4
+; CI-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:4
+; CI-NEXT: buffer_load_dword v1, off, s[36:39], 0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
; CI-NEXT: s_movk_i32 s32, 0x400
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_byval_struct_i8_i32 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_byval_struct_i8_i32 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
; CI-NEXT: s_waitcnt vmcnt(1)
-; CI-NEXT: buffer_store_dword v0, off, s[48:51], s32 offset:4
+; CI-NEXT: buffer_store_dword v0, off, s[36:39], s32 offset:4
; CI-NEXT: s_waitcnt vmcnt(1)
-; CI-NEXT: buffer_store_dword v1, off, s[48:51], s32
+; CI-NEXT: buffer_store_dword v1, off, s[36:39], s32
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CI-NEXT: s_endpgm
;
; GFX9-LABEL: test_call_external_void_func_byval_struct_i8_i32:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s3
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s3
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: v_mov_b32_e32 v0, 3
-; GFX9-NEXT: buffer_store_byte v0, off, s[48:51], 0
+; GFX9-NEXT: buffer_store_byte v0, off, s[36:39], 0
; GFX9-NEXT: v_mov_b32_e32 v0, 8
-; GFX9-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:4
-; GFX9-NEXT: buffer_load_dword v0, off, s[48:51], 0 offset:4
+; GFX9-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4
+; GFX9-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:4
; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: buffer_load_dword v1, off, s[48:51], 0
+; GFX9-NEXT: buffer_load_dword v1, off, s[36:39], 0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: s_movk_i32 s32, 0x400
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_byval_struct_i8_i32 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_byval_struct_i8_i32 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_waitcnt vmcnt(1)
-; GFX9-NEXT: buffer_store_dword v0, off, s[48:51], s32 offset:4
+; GFX9-NEXT: buffer_store_dword v0, off, s[36:39], s32 offset:4
; GFX9-NEXT: s_waitcnt vmcnt(1)
-; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], s32
+; GFX9-NEXT: buffer_store_dword v1, off, s[36:39], s32
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX9-NEXT: s_endpgm
;
@@ -4923,33 +4923,33 @@ define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0
define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(i32) #0 {
; VI-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s50, -1
-; VI-NEXT: s_mov_b32 s51, 0xe80000
-; VI-NEXT: s_add_u32 s48, s48, s5
-; VI-NEXT: s_addc_u32 s49, s49, 0
+; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s38, -1
+; VI-NEXT: s_mov_b32 s39, 0xe80000
+; VI-NEXT: s_add_u32 s36, s36, s5
+; VI-NEXT: s_addc_u32 s37, s37, 0
; VI-NEXT: v_mov_b32_e32 v0, 3
-; VI-NEXT: buffer_store_byte v0, off, s[48:51], 0
+; VI-NEXT: buffer_store_byte v0, off, s[36:39], 0
; VI-NEXT: v_mov_b32_e32 v0, 8
-; VI-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:4
-; VI-NEXT: buffer_load_dword v0, off, s[48:51], 0 offset:4
-; VI-NEXT: buffer_load_dword v1, off, s[48:51], 0
+; VI-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4
+; VI-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:4
+; VI-NEXT: buffer_load_dword v1, off, s[36:39], 0
; VI-NEXT: s_movk_i32 s32, 0x800
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
; VI-NEXT: s_waitcnt vmcnt(1)
-; VI-NEXT: buffer_store_dword v0, off, s[48:51], s32 offset:4
+; VI-NEXT: buffer_store_dword v0, off, s[36:39], s32 offset:4
; VI-NEXT: s_waitcnt vmcnt(1)
-; VI-NEXT: buffer_store_dword v1, off, s[48:51], s32
+; VI-NEXT: buffer_store_dword v1, off, s[36:39], s32
; VI-NEXT: v_mov_b32_e32 v0, 8
; VI-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; VI-NEXT: buffer_load_ubyte v0, off, s[48:51], 0 offset:8
-; VI-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:12
+; VI-NEXT: buffer_load_ubyte v0, off, s[36:39], 0 offset:8
+; VI-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:12
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: s_waitcnt vmcnt(1)
@@ -4961,33 +4961,33 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval
;
; CI-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s50, -1
-; CI-NEXT: s_mov_b32 s51, 0xe8f000
-; CI-NEXT: s_add_u32 s48, s48, s5
-; CI-NEXT: s_addc_u32 s49, s49, 0
+; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s38, -1
+; CI-NEXT: s_mov_b32 s39, 0xe8f000
+; CI-NEXT: s_add_u32 s36, s36, s5
+; CI-NEXT: s_addc_u32 s37, s37, 0
; CI-NEXT: v_mov_b32_e32 v0, 3
-; CI-NEXT: buffer_store_byte v0, off, s[48:51], 0
+; CI-NEXT: buffer_store_byte v0, off, s[36:39], 0
; CI-NEXT: v_mov_b32_e32 v0, 8
-; CI-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:4
-; CI-NEXT: buffer_load_dword v0, off, s[48:51], 0 offset:4
-; CI-NEXT: buffer_load_dword v1, off, s[48:51], 0
+; CI-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4
+; CI-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:4
+; CI-NEXT: buffer_load_dword v1, off, s[36:39], 0
; CI-NEXT: s_movk_i32 s32, 0x800
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
; CI-NEXT: s_waitcnt vmcnt(1)
-; CI-NEXT: buffer_store_dword v0, off, s[48:51], s32 offset:4
+; CI-NEXT: buffer_store_dword v0, off, s[36:39], s32 offset:4
; CI-NEXT: s_waitcnt vmcnt(1)
-; CI-NEXT: buffer_store_dword v1, off, s[48:51], s32
+; CI-NEXT: buffer_store_dword v1, off, s[36:39], s32
; CI-NEXT: v_mov_b32_e32 v0, 8
; CI-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; CI-NEXT: buffer_load_ubyte v0, off, s[48:51], 0 offset:8
-; CI-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:12
+; CI-NEXT: buffer_load_ubyte v0, off, s[36:39], 0 offset:8
+; CI-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:12
; CI-NEXT: s_mov_b32 s3, 0xf000
; CI-NEXT: s_mov_b32 s2, -1
; CI-NEXT: s_waitcnt vmcnt(1)
@@ -4999,34 +4999,34 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval
;
; GFX9-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s5
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s5
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: v_mov_b32_e32 v0, 3
-; GFX9-NEXT: buffer_store_byte v0, off, s[48:51], 0
+; GFX9-NEXT: buffer_store_byte v0, off, s[36:39], 0
; GFX9-NEXT: v_mov_b32_e32 v0, 8
-; GFX9-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:4
-; GFX9-NEXT: buffer_load_dword v0, off, s[48:51], 0 offset:4
+; GFX9-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4
+; GFX9-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:4
; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: buffer_load_dword v1, off, s[48:51], 0
+; GFX9-NEXT: buffer_load_dword v1, off, s[36:39], 0
; GFX9-NEXT: s_movk_i32 s32, 0x800
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_waitcnt vmcnt(1)
-; GFX9-NEXT: buffer_store_dword v0, off, s[48:51], s32 offset:4
+; GFX9-NEXT: buffer_store_dword v0, off, s[36:39], s32 offset:4
; GFX9-NEXT: s_waitcnt vmcnt(1)
-; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], s32
+; GFX9-NEXT: buffer_store_dword v1, off, s[36:39], s32
; GFX9-NEXT: v_mov_b32_e32 v0, 8
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: buffer_load_ubyte v0, off, s[48:51], 0 offset:8
-; GFX9-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:12
+; GFX9-NEXT: buffer_load_ubyte v0, off, s[36:39], 0 offset:8
+; GFX9-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:12
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: s_waitcnt vmcnt(1)
@@ -5121,23 +5121,23 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval
define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 {
; VI-LABEL: test_call_external_void_func_v16i8:
; VI: ; %bb.0:
-; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
-; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s50, -1
-; VI-NEXT: s_mov_b32 s51, 0xe80000
-; VI-NEXT: s_add_u32 s48, s48, s3
+; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s38, -1
+; VI-NEXT: s_mov_b32 s39, 0xe80000
+; VI-NEXT: s_add_u32 s36, s36, s3
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
-; VI-NEXT: s_addc_u32 s49, s49, 0
-; VI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; VI-NEXT: s_addc_u32 s37, s37, 0
+; VI-NEXT: s_mov_b64 s[0:1], s[36:37]
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, external_void_func_v16i8 at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v16i8 at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; VI-NEXT: s_mov_b64 s[2:3], s[38:39]
; VI-NEXT: s_mov_b32 s32, 0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: v_lshrrev_b32_e32 v16, 8, v0
@@ -5163,23 +5163,23 @@ define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 {
;
; CI-LABEL: test_call_external_void_func_v16i8:
; CI: ; %bb.0:
-; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
-; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s50, -1
-; CI-NEXT: s_mov_b32 s51, 0xe8f000
-; CI-NEXT: s_add_u32 s48, s48, s3
+; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s38, -1
+; CI-NEXT: s_mov_b32 s39, 0xe8f000
+; CI-NEXT: s_add_u32 s36, s36, s3
; CI-NEXT: s_mov_b32 s3, 0xf000
; CI-NEXT: s_mov_b32 s2, -1
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
-; CI-NEXT: s_addc_u32 s49, s49, 0
-; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
+; CI-NEXT: s_addc_u32 s37, s37, 0
+; CI-NEXT: s_mov_b64 s[0:1], s[36:37]
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, external_void_func_v16i8 at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v16i8 at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
+; CI-NEXT: s_mov_b64 s[2:3], s[38:39]
; CI-NEXT: s_mov_b32 s32, 0
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: v_lshrrev_b32_e32 v16, 8, v0
@@ -5205,23 +5205,23 @@ define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 {
;
; GFX9-LABEL: test_call_external_void_func_v16i8:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s3
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s3
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v16i8 at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v16i8 at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_lshrrev_b32_e32 v16, 8, v0
@@ -5324,29 +5324,29 @@ define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 {
define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, double %tmp) #0 {
; VI-LABEL: stack_passed_arg_alignment_v32i32_f64:
; VI: ; %bb.0: ; %entry
-; VI-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
-; VI-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
-; VI-NEXT: s_mov_b32 s66, -1
-; VI-NEXT: s_mov_b32 s67, 0xe80000
-; VI-NEXT: s_add_u32 s64, s64, s5
+; VI-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s54, -1
+; VI-NEXT: s_mov_b32 s55, 0xe80000
+; VI-NEXT: s_add_u32 s52, s52, s5
; VI-NEXT: s_load_dwordx16 s[8:23], s[2:3], 0x64
; VI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0xa4
; VI-NEXT: s_load_dwordx16 s[36:51], s[2:3], 0x24
; VI-NEXT: s_mov_b32 s32, 0
-; VI-NEXT: s_addc_u32 s65, s65, 0
+; VI-NEXT: s_addc_u32 s53, s53, 0
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_mov_b32_e32 v0, s23
-; VI-NEXT: buffer_store_dword v0, off, s[64:67], s32
+; VI-NEXT: buffer_store_dword v0, off, s[52:55], s32
; VI-NEXT: v_mov_b32_e32 v0, s4
-; VI-NEXT: buffer_store_dword v0, off, s[64:67], s32 offset:4
+; VI-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:4
; VI-NEXT: v_mov_b32_e32 v0, s5
; VI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; VI-NEXT: s_mov_b64 s[0:1], s[64:65]
-; VI-NEXT: buffer_store_dword v0, off, s[64:67], s32 offset:8
+; VI-NEXT: s_mov_b64 s[0:1], s[52:53]
+; VI-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:8
; VI-NEXT: s_getpc_b64 s[4:5]
; VI-NEXT: s_add_u32 s4, s4, stack_passed_f64_arg at rel32@lo+4
; VI-NEXT: s_addc_u32 s5, s5, stack_passed_f64_arg at rel32@hi+12
-; VI-NEXT: s_mov_b64 s[2:3], s[66:67]
+; VI-NEXT: s_mov_b64 s[2:3], s[54:55]
; VI-NEXT: v_mov_b32_e32 v0, s36
; VI-NEXT: v_mov_b32_e32 v1, s37
; VI-NEXT: v_mov_b32_e32 v2, s38
@@ -5383,29 +5383,29 @@ define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val
;
; CI-LABEL: stack_passed_arg_alignment_v32i32_f64:
; CI: ; %bb.0: ; %entry
-; CI-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
-; CI-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
-; CI-NEXT: s_mov_b32 s66, -1
-; CI-NEXT: s_mov_b32 s67, 0xe8f000
-; CI-NEXT: s_add_u32 s64, s64, s5
+; CI-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0
+; CI-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1
+; CI-NEXT: s_mov_b32 s54, -1
+; CI-NEXT: s_mov_b32 s55, 0xe8f000
+; CI-NEXT: s_add_u32 s52, s52, s5
; CI-NEXT: s_load_dwordx16 s[8:23], s[2:3], 0x19
; CI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x29
; CI-NEXT: s_load_dwordx16 s[36:51], s[2:3], 0x9
; CI-NEXT: s_mov_b32 s32, 0
-; CI-NEXT: s_addc_u32 s65, s65, 0
+; CI-NEXT: s_addc_u32 s53, s53, 0
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: v_mov_b32_e32 v0, s23
-; CI-NEXT: buffer_store_dword v0, off, s[64:67], s32
+; CI-NEXT: buffer_store_dword v0, off, s[52:55], s32
; CI-NEXT: v_mov_b32_e32 v0, s4
-; CI-NEXT: buffer_store_dword v0, off, s[64:67], s32 offset:4
+; CI-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:4
; CI-NEXT: v_mov_b32_e32 v0, s5
; CI-NEXT: s_mov_b64 s[6:7], s[0:1]
-; CI-NEXT: s_mov_b64 s[0:1], s[64:65]
-; CI-NEXT: buffer_store_dword v0, off, s[64:67], s32 offset:8
+; CI-NEXT: s_mov_b64 s[0:1], s[52:53]
+; CI-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:8
; CI-NEXT: s_getpc_b64 s[4:5]
; CI-NEXT: s_add_u32 s4, s4, stack_passed_f64_arg at rel32@lo+4
; CI-NEXT: s_addc_u32 s5, s5, stack_passed_f64_arg at rel32@hi+12
-; CI-NEXT: s_mov_b64 s[2:3], s[66:67]
+; CI-NEXT: s_mov_b64 s[2:3], s[54:55]
; CI-NEXT: v_mov_b32_e32 v0, s36
; CI-NEXT: v_mov_b32_e32 v1, s37
; CI-NEXT: v_mov_b32_e32 v2, s38
@@ -5442,29 +5442,29 @@ define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val
;
; GFX9-LABEL: stack_passed_arg_alignment_v32i32_f64:
; GFX9: ; %bb.0: ; %entry
-; GFX9-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s66, -1
-; GFX9-NEXT: s_mov_b32 s67, 0xe00000
-; GFX9-NEXT: s_add_u32 s64, s64, s5
+; GFX9-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s54, -1
+; GFX9-NEXT: s_mov_b32 s55, 0xe00000
+; GFX9-NEXT: s_add_u32 s52, s52, s5
; GFX9-NEXT: s_load_dwordx16 s[8:23], s[2:3], 0x64
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0xa4
; GFX9-NEXT: s_load_dwordx16 s[36:51], s[2:3], 0x24
; GFX9-NEXT: s_mov_b32 s32, 0
-; GFX9-NEXT: s_addc_u32 s65, s65, 0
+; GFX9-NEXT: s_addc_u32 s53, s53, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s23
-; GFX9-NEXT: buffer_store_dword v0, off, s[64:67], s32
+; GFX9-NEXT: buffer_store_dword v0, off, s[52:55], s32
; GFX9-NEXT: v_mov_b32_e32 v0, s4
-; GFX9-NEXT: buffer_store_dword v0, off, s[64:67], s32 offset:4
+; GFX9-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:4
; GFX9-NEXT: v_mov_b32_e32 v0, s5
; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65]
-; GFX9-NEXT: buffer_store_dword v0, off, s[64:67], s32 offset:8
+; GFX9-NEXT: s_mov_b64 s[0:1], s[52:53]
+; GFX9-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:8
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, stack_passed_f64_arg at rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, stack_passed_f64_arg at rel32@hi+12
-; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[54:55]
; GFX9-NEXT: v_mov_b32_e32 v0, s36
; GFX9-NEXT: v_mov_b32_e32 v1, s37
; GFX9-NEXT: v_mov_b32_e32 v2, s38
diff --git a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
index 002e82f676e8b..9561aa555c80e 100644
--- a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
+++ b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
@@ -255,24 +255,24 @@ define void @callee_func_sgpr_spill_no_calls(i32 %in) #0 {
; MUBUF-NEXT: s_mov_b64 exec, s[4:5]
; MUBUF-NEXT: v_writelane_b32 v40, s36, 0
; MUBUF-NEXT: v_writelane_b32 v40, s37, 1
-; MUBUF-NEXT: v_writelane_b32 v40, s46, 2
-; MUBUF-NEXT: v_writelane_b32 v40, s47, 3
+; MUBUF-NEXT: v_writelane_b32 v40, s38, 2
+; MUBUF-NEXT: v_writelane_b32 v40, s39, 3
; MUBUF-NEXT: v_writelane_b32 v40, s48, 4
; MUBUF-NEXT: v_writelane_b32 v40, s49, 5
; MUBUF-NEXT: v_writelane_b32 v40, s50, 6
; MUBUF-NEXT: v_writelane_b32 v40, s51, 7
; MUBUF-NEXT: v_writelane_b32 v40, s52, 8
; MUBUF-NEXT: v_writelane_b32 v40, s53, 9
-; MUBUF-NEXT: v_writelane_b32 v40, s62, 10
-; MUBUF-NEXT: v_writelane_b32 v40, s63, 11
+; MUBUF-NEXT: v_writelane_b32 v40, s54, 10
+; MUBUF-NEXT: v_writelane_b32 v40, s55, 11
; MUBUF-NEXT: v_writelane_b32 v40, s64, 12
; MUBUF-NEXT: v_writelane_b32 v40, s65, 13
; MUBUF-NEXT: v_writelane_b32 v40, s66, 14
; MUBUF-NEXT: v_writelane_b32 v40, s67, 15
; MUBUF-NEXT: v_writelane_b32 v40, s68, 16
; MUBUF-NEXT: v_writelane_b32 v40, s69, 17
-; MUBUF-NEXT: v_writelane_b32 v40, s78, 18
-; MUBUF-NEXT: v_writelane_b32 v40, s79, 19
+; MUBUF-NEXT: v_writelane_b32 v40, s70, 18
+; MUBUF-NEXT: v_writelane_b32 v40, s71, 19
; MUBUF-NEXT: v_writelane_b32 v40, s80, 20
; MUBUF-NEXT: v_writelane_b32 v40, s81, 21
; MUBUF-NEXT: v_writelane_b32 v40, s82, 22
@@ -327,24 +327,24 @@ define void @callee_func_sgpr_spill_no_calls(i32 %in) #0 {
; MUBUF-NEXT: v_readlane_b32 s82, v40, 22
; MUBUF-NEXT: v_readlane_b32 s81, v40, 21
; MUBUF-NEXT: v_readlane_b32 s80, v40, 20
-; MUBUF-NEXT: v_readlane_b32 s79, v40, 19
-; MUBUF-NEXT: v_readlane_b32 s78, v40, 18
+; MUBUF-NEXT: v_readlane_b32 s71, v40, 19
+; MUBUF-NEXT: v_readlane_b32 s70, v40, 18
; MUBUF-NEXT: v_readlane_b32 s69, v40, 17
; MUBUF-NEXT: v_readlane_b32 s68, v40, 16
; MUBUF-NEXT: v_readlane_b32 s67, v40, 15
; MUBUF-NEXT: v_readlane_b32 s66, v40, 14
; MUBUF-NEXT: v_readlane_b32 s65, v40, 13
; MUBUF-NEXT: v_readlane_b32 s64, v40, 12
-; MUBUF-NEXT: v_readlane_b32 s63, v40, 11
-; MUBUF-NEXT: v_readlane_b32 s62, v40, 10
+; MUBUF-NEXT: v_readlane_b32 s55, v40, 11
+; MUBUF-NEXT: v_readlane_b32 s54, v40, 10
; MUBUF-NEXT: v_readlane_b32 s53, v40, 9
; MUBUF-NEXT: v_readlane_b32 s52, v40, 8
; MUBUF-NEXT: v_readlane_b32 s51, v40, 7
; MUBUF-NEXT: v_readlane_b32 s50, v40, 6
; MUBUF-NEXT: v_readlane_b32 s49, v40, 5
; MUBUF-NEXT: v_readlane_b32 s48, v40, 4
-; MUBUF-NEXT: v_readlane_b32 s47, v40, 3
-; MUBUF-NEXT: v_readlane_b32 s46, v40, 2
+; MUBUF-NEXT: v_readlane_b32 s39, v40, 3
+; MUBUF-NEXT: v_readlane_b32 s38, v40, 2
; MUBUF-NEXT: v_readlane_b32 s37, v40, 1
; MUBUF-NEXT: v_readlane_b32 s36, v40, 0
; MUBUF-NEXT: s_or_saveexec_b64 s[4:5], -1
@@ -363,22 +363,20 @@ define void @callee_func_sgpr_spill_no_calls(i32 %in) #0 {
; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1
; FLATSCR-NEXT: v_writelane_b32 v40, s36, 2
; FLATSCR-NEXT: v_writelane_b32 v40, s37, 3
-; FLATSCR-NEXT: v_writelane_b32 v40, s46, 4
-; FLATSCR-NEXT: v_writelane_b32 v40, s47, 5
+; FLATSCR-NEXT: v_writelane_b32 v40, s38, 4
+; FLATSCR-NEXT: v_writelane_b32 v40, s39, 5
; FLATSCR-NEXT: v_writelane_b32 v40, s48, 6
; FLATSCR-NEXT: v_writelane_b32 v40, s49, 7
; FLATSCR-NEXT: v_writelane_b32 v40, s50, 8
; FLATSCR-NEXT: v_writelane_b32 v40, s51, 9
; FLATSCR-NEXT: v_writelane_b32 v40, s52, 10
; FLATSCR-NEXT: v_writelane_b32 v40, s53, 11
-; FLATSCR-NEXT: v_writelane_b32 v40, s62, 12
-; FLATSCR-NEXT: v_writelane_b32 v40, s63, 13
+; FLATSCR-NEXT: v_writelane_b32 v40, s54, 12
+; FLATSCR-NEXT: v_writelane_b32 v40, s55, 13
; FLATSCR-NEXT: v_writelane_b32 v40, s64, 14
; FLATSCR-NEXT: v_writelane_b32 v40, s65, 15
; FLATSCR-NEXT: v_writelane_b32 v40, s66, 16
; FLATSCR-NEXT: v_writelane_b32 v40, s67, 17
-; FLATSCR-NEXT: v_writelane_b32 v40, s68, 18
-; FLATSCR-NEXT: v_writelane_b32 v40, s69, 19
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: ;;#ASMSTART
@@ -402,10 +400,10 @@ define void @callee_func_sgpr_spill_no_calls(i32 %in) #0 {
; FLATSCR-NEXT: ; def s[0:15]
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: ;;#ASMSTART
-; FLATSCR-NEXT: ; def s[68:75]
+; FLATSCR-NEXT: ; def s[72:79]
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: ;;#ASMSTART
-; FLATSCR-NEXT: ; def s[76:77]
+; FLATSCR-NEXT: ; def s[88:89]
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ; use s[52:67]
@@ -417,30 +415,28 @@ define void @callee_func_sgpr_spill_no_calls(i32 %in) #0 {
; FLATSCR-NEXT: ; use s[16:31]
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: ;;#ASMSTART
-; FLATSCR-NEXT: ; use s[68:75]
+; FLATSCR-NEXT: ; use s[72:79]
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: ;;#ASMSTART
-; FLATSCR-NEXT: ; use s[76:77]
+; FLATSCR-NEXT: ; use s[88:89]
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ; use s[0:15]
; FLATSCR-NEXT: ;;#ASMEND
-; FLATSCR-NEXT: v_readlane_b32 s69, v40, 19
-; FLATSCR-NEXT: v_readlane_b32 s68, v40, 18
; FLATSCR-NEXT: v_readlane_b32 s67, v40, 17
; FLATSCR-NEXT: v_readlane_b32 s66, v40, 16
; FLATSCR-NEXT: v_readlane_b32 s65, v40, 15
; FLATSCR-NEXT: v_readlane_b32 s64, v40, 14
-; FLATSCR-NEXT: v_readlane_b32 s63, v40, 13
-; FLATSCR-NEXT: v_readlane_b32 s62, v40, 12
+; FLATSCR-NEXT: v_readlane_b32 s55, v40, 13
+; FLATSCR-NEXT: v_readlane_b32 s54, v40, 12
; FLATSCR-NEXT: v_readlane_b32 s53, v40, 11
; FLATSCR-NEXT: v_readlane_b32 s52, v40, 10
; FLATSCR-NEXT: v_readlane_b32 s51, v40, 9
; FLATSCR-NEXT: v_readlane_b32 s50, v40, 8
; FLATSCR-NEXT: v_readlane_b32 s49, v40, 7
; FLATSCR-NEXT: v_readlane_b32 s48, v40, 6
-; FLATSCR-NEXT: v_readlane_b32 s47, v40, 5
-; FLATSCR-NEXT: v_readlane_b32 s46, v40, 4
+; FLATSCR-NEXT: v_readlane_b32 s39, v40, 5
+; FLATSCR-NEXT: v_readlane_b32 s38, v40, 4
; FLATSCR-NEXT: v_readlane_b32 s37, v40, 3
; FLATSCR-NEXT: v_readlane_b32 s36, v40, 2
; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1
@@ -541,83 +537,79 @@ define void @last_lane_vgpr_for_fp_csr() #1 {
; MUBUF-NEXT: s_xor_saveexec_b64 s[6:7], -1
; MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; MUBUF-NEXT: s_mov_b64 exec, s[6:7]
-; MUBUF-NEXT: v_writelane_b32 v1, s46, 0
-; MUBUF-NEXT: v_writelane_b32 v1, s47, 1
-; MUBUF-NEXT: v_writelane_b32 v1, s48, 2
-; MUBUF-NEXT: v_writelane_b32 v1, s49, 3
-; MUBUF-NEXT: v_writelane_b32 v1, s50, 4
-; MUBUF-NEXT: v_writelane_b32 v1, s51, 5
-; MUBUF-NEXT: v_writelane_b32 v1, s52, 6
-; MUBUF-NEXT: v_writelane_b32 v1, s53, 7
-; MUBUF-NEXT: v_writelane_b32 v1, s62, 8
-; MUBUF-NEXT: v_writelane_b32 v1, s63, 9
-; MUBUF-NEXT: v_writelane_b32 v1, s64, 10
-; MUBUF-NEXT: v_writelane_b32 v1, s65, 11
-; MUBUF-NEXT: v_writelane_b32 v1, s66, 12
-; MUBUF-NEXT: v_writelane_b32 v1, s67, 13
-; MUBUF-NEXT: v_writelane_b32 v1, s68, 14
-; MUBUF-NEXT: v_writelane_b32 v1, s69, 15
-; MUBUF-NEXT: v_writelane_b32 v1, s78, 16
-; MUBUF-NEXT: v_writelane_b32 v1, s79, 17
-; MUBUF-NEXT: v_writelane_b32 v1, s80, 18
-; MUBUF-NEXT: v_writelane_b32 v1, s81, 19
-; MUBUF-NEXT: v_writelane_b32 v1, s82, 20
-; MUBUF-NEXT: v_writelane_b32 v1, s83, 21
-; MUBUF-NEXT: v_writelane_b32 v1, s84, 22
-; MUBUF-NEXT: v_writelane_b32 v1, s85, 23
-; MUBUF-NEXT: v_writelane_b32 v1, s94, 24
-; MUBUF-NEXT: v_writelane_b32 v1, s95, 25
-; MUBUF-NEXT: v_writelane_b32 v1, s96, 26
-; MUBUF-NEXT: v_writelane_b32 v1, s97, 27
-; MUBUF-NEXT: v_writelane_b32 v1, s98, 28
-; MUBUF-NEXT: v_writelane_b32 v1, s99, 29
-; MUBUF-NEXT: v_writelane_b32 v1, s100, 30
+; MUBUF-NEXT: v_writelane_b32 v1, s48, 0
+; MUBUF-NEXT: v_writelane_b32 v1, s49, 1
+; MUBUF-NEXT: v_writelane_b32 v1, s50, 2
+; MUBUF-NEXT: v_writelane_b32 v1, s51, 3
+; MUBUF-NEXT: v_writelane_b32 v1, s52, 4
+; MUBUF-NEXT: v_writelane_b32 v1, s53, 5
+; MUBUF-NEXT: v_writelane_b32 v1, s54, 6
+; MUBUF-NEXT: v_writelane_b32 v1, s55, 7
+; MUBUF-NEXT: v_writelane_b32 v1, s64, 8
+; MUBUF-NEXT: v_writelane_b32 v1, s65, 9
+; MUBUF-NEXT: v_writelane_b32 v1, s66, 10
+; MUBUF-NEXT: v_writelane_b32 v1, s67, 11
+; MUBUF-NEXT: v_writelane_b32 v1, s68, 12
+; MUBUF-NEXT: v_writelane_b32 v1, s69, 13
+; MUBUF-NEXT: v_writelane_b32 v1, s70, 14
+; MUBUF-NEXT: v_writelane_b32 v1, s71, 15
+; MUBUF-NEXT: v_writelane_b32 v1, s80, 16
+; MUBUF-NEXT: v_writelane_b32 v1, s81, 17
+; MUBUF-NEXT: v_writelane_b32 v1, s82, 18
+; MUBUF-NEXT: v_writelane_b32 v1, s83, 19
+; MUBUF-NEXT: v_writelane_b32 v1, s84, 20
+; MUBUF-NEXT: v_writelane_b32 v1, s85, 21
+; MUBUF-NEXT: v_writelane_b32 v1, s86, 22
+; MUBUF-NEXT: v_writelane_b32 v1, s87, 23
+; MUBUF-NEXT: v_writelane_b32 v1, s96, 24
+; MUBUF-NEXT: v_writelane_b32 v1, s97, 25
+; MUBUF-NEXT: v_writelane_b32 v1, s98, 26
+; MUBUF-NEXT: v_writelane_b32 v1, s99, 27
+; MUBUF-NEXT: v_writelane_b32 v1, s100, 28
; MUBUF-NEXT: v_mov_b32_e32 v0, 0
; MUBUF-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
-; MUBUF-NEXT: v_writelane_b32 v1, s101, 31
+; MUBUF-NEXT: v_writelane_b32 v1, s101, 29
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: ;;#ASMSTART
; MUBUF-NEXT: ; clobber v41
; MUBUF-NEXT: ;;#ASMEND
-; MUBUF-NEXT: v_writelane_b32 v1, s102, 32
+; MUBUF-NEXT: v_writelane_b32 v1, s102, 30
; MUBUF-NEXT: ;;#ASMSTART
; MUBUF-NEXT: ;;#ASMEND
; MUBUF-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
; MUBUF-NEXT: s_addk_i32 s32, 0x400
-; MUBUF-NEXT: v_readlane_b32 s102, v1, 32
-; MUBUF-NEXT: v_readlane_b32 s101, v1, 31
-; MUBUF-NEXT: v_readlane_b32 s100, v1, 30
-; MUBUF-NEXT: v_readlane_b32 s99, v1, 29
-; MUBUF-NEXT: v_readlane_b32 s98, v1, 28
-; MUBUF-NEXT: v_readlane_b32 s97, v1, 27
-; MUBUF-NEXT: v_readlane_b32 s96, v1, 26
-; MUBUF-NEXT: v_readlane_b32 s95, v1, 25
-; MUBUF-NEXT: v_readlane_b32 s94, v1, 24
-; MUBUF-NEXT: v_readlane_b32 s85, v1, 23
-; MUBUF-NEXT: v_readlane_b32 s84, v1, 22
-; MUBUF-NEXT: v_readlane_b32 s83, v1, 21
-; MUBUF-NEXT: v_readlane_b32 s82, v1, 20
-; MUBUF-NEXT: v_readlane_b32 s81, v1, 19
-; MUBUF-NEXT: v_readlane_b32 s80, v1, 18
-; MUBUF-NEXT: v_readlane_b32 s79, v1, 17
-; MUBUF-NEXT: v_readlane_b32 s78, v1, 16
-; MUBUF-NEXT: v_readlane_b32 s69, v1, 15
-; MUBUF-NEXT: v_readlane_b32 s68, v1, 14
-; MUBUF-NEXT: v_readlane_b32 s67, v1, 13
-; MUBUF-NEXT: v_readlane_b32 s66, v1, 12
-; MUBUF-NEXT: v_readlane_b32 s65, v1, 11
-; MUBUF-NEXT: v_readlane_b32 s64, v1, 10
-; MUBUF-NEXT: v_readlane_b32 s63, v1, 9
-; MUBUF-NEXT: v_readlane_b32 s62, v1, 8
-; MUBUF-NEXT: v_readlane_b32 s53, v1, 7
-; MUBUF-NEXT: v_readlane_b32 s52, v1, 6
-; MUBUF-NEXT: v_readlane_b32 s51, v1, 5
-; MUBUF-NEXT: v_readlane_b32 s50, v1, 4
-; MUBUF-NEXT: v_readlane_b32 s49, v1, 3
-; MUBUF-NEXT: v_readlane_b32 s48, v1, 2
-; MUBUF-NEXT: v_readlane_b32 s47, v1, 1
-; MUBUF-NEXT: v_readlane_b32 s46, v1, 0
+; MUBUF-NEXT: v_readlane_b32 s102, v1, 30
+; MUBUF-NEXT: v_readlane_b32 s101, v1, 29
+; MUBUF-NEXT: v_readlane_b32 s100, v1, 28
+; MUBUF-NEXT: v_readlane_b32 s99, v1, 27
+; MUBUF-NEXT: v_readlane_b32 s98, v1, 26
+; MUBUF-NEXT: v_readlane_b32 s97, v1, 25
+; MUBUF-NEXT: v_readlane_b32 s96, v1, 24
+; MUBUF-NEXT: v_readlane_b32 s87, v1, 23
+; MUBUF-NEXT: v_readlane_b32 s86, v1, 22
+; MUBUF-NEXT: v_readlane_b32 s85, v1, 21
+; MUBUF-NEXT: v_readlane_b32 s84, v1, 20
+; MUBUF-NEXT: v_readlane_b32 s83, v1, 19
+; MUBUF-NEXT: v_readlane_b32 s82, v1, 18
+; MUBUF-NEXT: v_readlane_b32 s81, v1, 17
+; MUBUF-NEXT: v_readlane_b32 s80, v1, 16
+; MUBUF-NEXT: v_readlane_b32 s71, v1, 15
+; MUBUF-NEXT: v_readlane_b32 s70, v1, 14
+; MUBUF-NEXT: v_readlane_b32 s69, v1, 13
+; MUBUF-NEXT: v_readlane_b32 s68, v1, 12
+; MUBUF-NEXT: v_readlane_b32 s67, v1, 11
+; MUBUF-NEXT: v_readlane_b32 s66, v1, 10
+; MUBUF-NEXT: v_readlane_b32 s65, v1, 9
+; MUBUF-NEXT: v_readlane_b32 s64, v1, 8
+; MUBUF-NEXT: v_readlane_b32 s55, v1, 7
+; MUBUF-NEXT: v_readlane_b32 s54, v1, 6
+; MUBUF-NEXT: v_readlane_b32 s53, v1, 5
+; MUBUF-NEXT: v_readlane_b32 s52, v1, 4
+; MUBUF-NEXT: v_readlane_b32 s51, v1, 3
+; MUBUF-NEXT: v_readlane_b32 s50, v1, 2
+; MUBUF-NEXT: v_readlane_b32 s49, v1, 1
+; MUBUF-NEXT: v_readlane_b32 s48, v1, 0
; MUBUF-NEXT: s_mov_b32 s32, s33
; MUBUF-NEXT: s_xor_saveexec_b64 s[6:7], -1
; MUBUF-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
@@ -634,83 +626,79 @@ define void @last_lane_vgpr_for_fp_csr() #1 {
; FLATSCR-NEXT: s_xor_saveexec_b64 s[2:3], -1
; FLATSCR-NEXT: scratch_store_dword off, v1, s33 offset:8 ; 4-byte Folded Spill
; FLATSCR-NEXT: s_mov_b64 exec, s[2:3]
-; FLATSCR-NEXT: v_writelane_b32 v1, s46, 0
-; FLATSCR-NEXT: v_writelane_b32 v1, s47, 1
-; FLATSCR-NEXT: v_writelane_b32 v1, s48, 2
-; FLATSCR-NEXT: v_writelane_b32 v1, s49, 3
-; FLATSCR-NEXT: v_writelane_b32 v1, s50, 4
-; FLATSCR-NEXT: v_writelane_b32 v1, s51, 5
-; FLATSCR-NEXT: v_writelane_b32 v1, s52, 6
-; FLATSCR-NEXT: v_writelane_b32 v1, s53, 7
-; FLATSCR-NEXT: v_writelane_b32 v1, s62, 8
-; FLATSCR-NEXT: v_writelane_b32 v1, s63, 9
-; FLATSCR-NEXT: v_writelane_b32 v1, s64, 10
-; FLATSCR-NEXT: v_writelane_b32 v1, s65, 11
-; FLATSCR-NEXT: v_writelane_b32 v1, s66, 12
-; FLATSCR-NEXT: v_writelane_b32 v1, s67, 13
-; FLATSCR-NEXT: v_writelane_b32 v1, s68, 14
-; FLATSCR-NEXT: v_writelane_b32 v1, s69, 15
-; FLATSCR-NEXT: v_writelane_b32 v1, s78, 16
-; FLATSCR-NEXT: v_writelane_b32 v1, s79, 17
-; FLATSCR-NEXT: v_writelane_b32 v1, s80, 18
-; FLATSCR-NEXT: v_writelane_b32 v1, s81, 19
-; FLATSCR-NEXT: v_writelane_b32 v1, s82, 20
-; FLATSCR-NEXT: v_writelane_b32 v1, s83, 21
-; FLATSCR-NEXT: v_writelane_b32 v1, s84, 22
-; FLATSCR-NEXT: v_writelane_b32 v1, s85, 23
-; FLATSCR-NEXT: v_writelane_b32 v1, s94, 24
-; FLATSCR-NEXT: v_writelane_b32 v1, s95, 25
-; FLATSCR-NEXT: v_writelane_b32 v1, s96, 26
-; FLATSCR-NEXT: v_writelane_b32 v1, s97, 27
-; FLATSCR-NEXT: v_writelane_b32 v1, s98, 28
-; FLATSCR-NEXT: v_writelane_b32 v1, s99, 29
-; FLATSCR-NEXT: v_writelane_b32 v1, s100, 30
+; FLATSCR-NEXT: v_writelane_b32 v1, s48, 0
+; FLATSCR-NEXT: v_writelane_b32 v1, s49, 1
+; FLATSCR-NEXT: v_writelane_b32 v1, s50, 2
+; FLATSCR-NEXT: v_writelane_b32 v1, s51, 3
+; FLATSCR-NEXT: v_writelane_b32 v1, s52, 4
+; FLATSCR-NEXT: v_writelane_b32 v1, s53, 5
+; FLATSCR-NEXT: v_writelane_b32 v1, s54, 6
+; FLATSCR-NEXT: v_writelane_b32 v1, s55, 7
+; FLATSCR-NEXT: v_writelane_b32 v1, s64, 8
+; FLATSCR-NEXT: v_writelane_b32 v1, s65, 9
+; FLATSCR-NEXT: v_writelane_b32 v1, s66, 10
+; FLATSCR-NEXT: v_writelane_b32 v1, s67, 11
+; FLATSCR-NEXT: v_writelane_b32 v1, s68, 12
+; FLATSCR-NEXT: v_writelane_b32 v1, s69, 13
+; FLATSCR-NEXT: v_writelane_b32 v1, s70, 14
+; FLATSCR-NEXT: v_writelane_b32 v1, s71, 15
+; FLATSCR-NEXT: v_writelane_b32 v1, s80, 16
+; FLATSCR-NEXT: v_writelane_b32 v1, s81, 17
+; FLATSCR-NEXT: v_writelane_b32 v1, s82, 18
+; FLATSCR-NEXT: v_writelane_b32 v1, s83, 19
+; FLATSCR-NEXT: v_writelane_b32 v1, s84, 20
+; FLATSCR-NEXT: v_writelane_b32 v1, s85, 21
+; FLATSCR-NEXT: v_writelane_b32 v1, s86, 22
+; FLATSCR-NEXT: v_writelane_b32 v1, s87, 23
+; FLATSCR-NEXT: v_writelane_b32 v1, s96, 24
+; FLATSCR-NEXT: v_writelane_b32 v1, s97, 25
+; FLATSCR-NEXT: v_writelane_b32 v1, s98, 26
+; FLATSCR-NEXT: v_writelane_b32 v1, s99, 27
+; FLATSCR-NEXT: v_writelane_b32 v1, s100, 28
; FLATSCR-NEXT: v_mov_b32_e32 v0, 0
; FLATSCR-NEXT: scratch_store_dword off, v41, s33 ; 4-byte Folded Spill
-; FLATSCR-NEXT: v_writelane_b32 v1, s101, 31
+; FLATSCR-NEXT: v_writelane_b32 v1, s101, 29
; FLATSCR-NEXT: scratch_store_dword off, v0, s33 offset:4
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ; clobber v41
; FLATSCR-NEXT: ;;#ASMEND
-; FLATSCR-NEXT: v_writelane_b32 v1, s102, 32
+; FLATSCR-NEXT: v_writelane_b32 v1, s102, 30
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: scratch_load_dword v41, off, s33 ; 4-byte Folded Reload
; FLATSCR-NEXT: s_add_i32 s32, s32, 16
-; FLATSCR-NEXT: v_readlane_b32 s102, v1, 32
-; FLATSCR-NEXT: v_readlane_b32 s101, v1, 31
-; FLATSCR-NEXT: v_readlane_b32 s100, v1, 30
-; FLATSCR-NEXT: v_readlane_b32 s99, v1, 29
-; FLATSCR-NEXT: v_readlane_b32 s98, v1, 28
-; FLATSCR-NEXT: v_readlane_b32 s97, v1, 27
-; FLATSCR-NEXT: v_readlane_b32 s96, v1, 26
-; FLATSCR-NEXT: v_readlane_b32 s95, v1, 25
-; FLATSCR-NEXT: v_readlane_b32 s94, v1, 24
-; FLATSCR-NEXT: v_readlane_b32 s85, v1, 23
-; FLATSCR-NEXT: v_readlane_b32 s84, v1, 22
-; FLATSCR-NEXT: v_readlane_b32 s83, v1, 21
-; FLATSCR-NEXT: v_readlane_b32 s82, v1, 20
-; FLATSCR-NEXT: v_readlane_b32 s81, v1, 19
-; FLATSCR-NEXT: v_readlane_b32 s80, v1, 18
-; FLATSCR-NEXT: v_readlane_b32 s79, v1, 17
-; FLATSCR-NEXT: v_readlane_b32 s78, v1, 16
-; FLATSCR-NEXT: v_readlane_b32 s69, v1, 15
-; FLATSCR-NEXT: v_readlane_b32 s68, v1, 14
-; FLATSCR-NEXT: v_readlane_b32 s67, v1, 13
-; FLATSCR-NEXT: v_readlane_b32 s66, v1, 12
-; FLATSCR-NEXT: v_readlane_b32 s65, v1, 11
-; FLATSCR-NEXT: v_readlane_b32 s64, v1, 10
-; FLATSCR-NEXT: v_readlane_b32 s63, v1, 9
-; FLATSCR-NEXT: v_readlane_b32 s62, v1, 8
-; FLATSCR-NEXT: v_readlane_b32 s53, v1, 7
-; FLATSCR-NEXT: v_readlane_b32 s52, v1, 6
-; FLATSCR-NEXT: v_readlane_b32 s51, v1, 5
-; FLATSCR-NEXT: v_readlane_b32 s50, v1, 4
-; FLATSCR-NEXT: v_readlane_b32 s49, v1, 3
-; FLATSCR-NEXT: v_readlane_b32 s48, v1, 2
-; FLATSCR-NEXT: v_readlane_b32 s47, v1, 1
-; FLATSCR-NEXT: v_readlane_b32 s46, v1, 0
+; FLATSCR-NEXT: v_readlane_b32 s102, v1, 30
+; FLATSCR-NEXT: v_readlane_b32 s101, v1, 29
+; FLATSCR-NEXT: v_readlane_b32 s100, v1, 28
+; FLATSCR-NEXT: v_readlane_b32 s99, v1, 27
+; FLATSCR-NEXT: v_readlane_b32 s98, v1, 26
+; FLATSCR-NEXT: v_readlane_b32 s97, v1, 25
+; FLATSCR-NEXT: v_readlane_b32 s96, v1, 24
+; FLATSCR-NEXT: v_readlane_b32 s87, v1, 23
+; FLATSCR-NEXT: v_readlane_b32 s86, v1, 22
+; FLATSCR-NEXT: v_readlane_b32 s85, v1, 21
+; FLATSCR-NEXT: v_readlane_b32 s84, v1, 20
+; FLATSCR-NEXT: v_readlane_b32 s83, v1, 19
+; FLATSCR-NEXT: v_readlane_b32 s82, v1, 18
+; FLATSCR-NEXT: v_readlane_b32 s81, v1, 17
+; FLATSCR-NEXT: v_readlane_b32 s80, v1, 16
+; FLATSCR-NEXT: v_readlane_b32 s71, v1, 15
+; FLATSCR-NEXT: v_readlane_b32 s70, v1, 14
+; FLATSCR-NEXT: v_readlane_b32 s69, v1, 13
+; FLATSCR-NEXT: v_readlane_b32 s68, v1, 12
+; FLATSCR-NEXT: v_readlane_b32 s67, v1, 11
+; FLATSCR-NEXT: v_readlane_b32 s66, v1, 10
+; FLATSCR-NEXT: v_readlane_b32 s65, v1, 9
+; FLATSCR-NEXT: v_readlane_b32 s64, v1, 8
+; FLATSCR-NEXT: v_readlane_b32 s55, v1, 7
+; FLATSCR-NEXT: v_readlane_b32 s54, v1, 6
+; FLATSCR-NEXT: v_readlane_b32 s53, v1, 5
+; FLATSCR-NEXT: v_readlane_b32 s52, v1, 4
+; FLATSCR-NEXT: v_readlane_b32 s51, v1, 3
+; FLATSCR-NEXT: v_readlane_b32 s50, v1, 2
+; FLATSCR-NEXT: v_readlane_b32 s49, v1, 1
+; FLATSCR-NEXT: v_readlane_b32 s48, v1, 0
; FLATSCR-NEXT: s_mov_b32 s32, s33
; FLATSCR-NEXT: s_xor_saveexec_b64 s[2:3], -1
; FLATSCR-NEXT: scratch_load_dword v1, off, s33 offset:8 ; 4-byte Folded Reload
@@ -743,83 +731,81 @@ define void @no_new_vgpr_for_fp_csr() #1 {
; MUBUF-NEXT: s_xor_saveexec_b64 s[6:7], -1
; MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; MUBUF-NEXT: s_mov_b64 exec, s[6:7]
-; MUBUF-NEXT: v_writelane_b32 v1, s46, 0
-; MUBUF-NEXT: v_writelane_b32 v1, s47, 1
-; MUBUF-NEXT: v_writelane_b32 v1, s48, 2
-; MUBUF-NEXT: v_writelane_b32 v1, s49, 3
-; MUBUF-NEXT: v_writelane_b32 v1, s50, 4
-; MUBUF-NEXT: v_writelane_b32 v1, s51, 5
-; MUBUF-NEXT: v_writelane_b32 v1, s52, 6
-; MUBUF-NEXT: v_writelane_b32 v1, s53, 7
-; MUBUF-NEXT: v_writelane_b32 v1, s62, 8
-; MUBUF-NEXT: v_writelane_b32 v1, s63, 9
-; MUBUF-NEXT: v_writelane_b32 v1, s64, 10
-; MUBUF-NEXT: v_writelane_b32 v1, s65, 11
-; MUBUF-NEXT: v_writelane_b32 v1, s66, 12
-; MUBUF-NEXT: v_writelane_b32 v1, s67, 13
-; MUBUF-NEXT: v_writelane_b32 v1, s68, 14
-; MUBUF-NEXT: v_writelane_b32 v1, s69, 15
-; MUBUF-NEXT: v_writelane_b32 v1, s78, 16
-; MUBUF-NEXT: v_writelane_b32 v1, s79, 17
-; MUBUF-NEXT: v_writelane_b32 v1, s80, 18
-; MUBUF-NEXT: v_writelane_b32 v1, s81, 19
-; MUBUF-NEXT: v_writelane_b32 v1, s82, 20
-; MUBUF-NEXT: v_writelane_b32 v1, s83, 21
-; MUBUF-NEXT: v_writelane_b32 v1, s84, 22
-; MUBUF-NEXT: v_writelane_b32 v1, s85, 23
-; MUBUF-NEXT: v_writelane_b32 v1, s94, 24
-; MUBUF-NEXT: v_writelane_b32 v1, s95, 25
-; MUBUF-NEXT: v_writelane_b32 v1, s96, 26
-; MUBUF-NEXT: v_writelane_b32 v1, s97, 27
-; MUBUF-NEXT: v_writelane_b32 v1, s98, 28
-; MUBUF-NEXT: v_writelane_b32 v1, s99, 29
-; MUBUF-NEXT: v_writelane_b32 v1, s100, 30
+; MUBUF-NEXT: v_writelane_b32 v1, s39, 0
+; MUBUF-NEXT: v_writelane_b32 v1, s48, 1
+; MUBUF-NEXT: v_writelane_b32 v1, s49, 2
+; MUBUF-NEXT: v_writelane_b32 v1, s50, 3
+; MUBUF-NEXT: v_writelane_b32 v1, s51, 4
+; MUBUF-NEXT: v_writelane_b32 v1, s52, 5
+; MUBUF-NEXT: v_writelane_b32 v1, s53, 6
+; MUBUF-NEXT: v_writelane_b32 v1, s54, 7
+; MUBUF-NEXT: v_writelane_b32 v1, s55, 8
+; MUBUF-NEXT: v_writelane_b32 v1, s64, 9
+; MUBUF-NEXT: v_writelane_b32 v1, s65, 10
+; MUBUF-NEXT: v_writelane_b32 v1, s66, 11
+; MUBUF-NEXT: v_writelane_b32 v1, s67, 12
+; MUBUF-NEXT: v_writelane_b32 v1, s68, 13
+; MUBUF-NEXT: v_writelane_b32 v1, s69, 14
+; MUBUF-NEXT: v_writelane_b32 v1, s70, 15
+; MUBUF-NEXT: v_writelane_b32 v1, s71, 16
+; MUBUF-NEXT: v_writelane_b32 v1, s80, 17
+; MUBUF-NEXT: v_writelane_b32 v1, s81, 18
+; MUBUF-NEXT: v_writelane_b32 v1, s82, 19
+; MUBUF-NEXT: v_writelane_b32 v1, s83, 20
+; MUBUF-NEXT: v_writelane_b32 v1, s84, 21
+; MUBUF-NEXT: v_writelane_b32 v1, s85, 22
+; MUBUF-NEXT: v_writelane_b32 v1, s86, 23
+; MUBUF-NEXT: v_writelane_b32 v1, s87, 24
+; MUBUF-NEXT: v_writelane_b32 v1, s96, 25
+; MUBUF-NEXT: v_writelane_b32 v1, s97, 26
+; MUBUF-NEXT: v_writelane_b32 v1, s98, 27
+; MUBUF-NEXT: v_writelane_b32 v1, s99, 28
+; MUBUF-NEXT: v_writelane_b32 v1, s100, 29
; MUBUF-NEXT: v_mov_b32_e32 v0, 0
; MUBUF-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
-; MUBUF-NEXT: v_writelane_b32 v1, s101, 31
+; MUBUF-NEXT: v_writelane_b32 v1, s101, 30
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: ;;#ASMSTART
; MUBUF-NEXT: ; clobber v41
; MUBUF-NEXT: ;;#ASMEND
-; MUBUF-NEXT: v_writelane_b32 v1, s102, 32
+; MUBUF-NEXT: v_writelane_b32 v1, s102, 31
; MUBUF-NEXT: ;;#ASMSTART
; MUBUF-NEXT: ;;#ASMEND
; MUBUF-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
; MUBUF-NEXT: s_addk_i32 s32, 0x400
-; MUBUF-NEXT: v_readlane_b32 s102, v1, 32
-; MUBUF-NEXT: v_readlane_b32 s101, v1, 31
-; MUBUF-NEXT: v_readlane_b32 s100, v1, 30
-; MUBUF-NEXT: v_readlane_b32 s99, v1, 29
-; MUBUF-NEXT: v_readlane_b32 s98, v1, 28
-; MUBUF-NEXT: v_readlane_b32 s97, v1, 27
-; MUBUF-NEXT: v_readlane_b32 s96, v1, 26
-; MUBUF-NEXT: v_readlane_b32 s95, v1, 25
-; MUBUF-NEXT: v_readlane_b32 s94, v1, 24
-; MUBUF-NEXT: v_readlane_b32 s85, v1, 23
-; MUBUF-NEXT: v_readlane_b32 s84, v1, 22
-; MUBUF-NEXT: v_readlane_b32 s83, v1, 21
-; MUBUF-NEXT: v_readlane_b32 s82, v1, 20
-; MUBUF-NEXT: v_readlane_b32 s81, v1, 19
-; MUBUF-NEXT: v_readlane_b32 s80, v1, 18
-; MUBUF-NEXT: v_readlane_b32 s79, v1, 17
-; MUBUF-NEXT: v_readlane_b32 s78, v1, 16
-; MUBUF-NEXT: v_readlane_b32 s69, v1, 15
-; MUBUF-NEXT: v_readlane_b32 s68, v1, 14
-; MUBUF-NEXT: v_readlane_b32 s67, v1, 13
-; MUBUF-NEXT: v_readlane_b32 s66, v1, 12
-; MUBUF-NEXT: v_readlane_b32 s65, v1, 11
-; MUBUF-NEXT: v_readlane_b32 s64, v1, 10
-; MUBUF-NEXT: v_readlane_b32 s63, v1, 9
-; MUBUF-NEXT: v_readlane_b32 s62, v1, 8
-; MUBUF-NEXT: v_readlane_b32 s53, v1, 7
-; MUBUF-NEXT: v_readlane_b32 s52, v1, 6
-; MUBUF-NEXT: v_readlane_b32 s51, v1, 5
-; MUBUF-NEXT: v_readlane_b32 s50, v1, 4
-; MUBUF-NEXT: v_readlane_b32 s49, v1, 3
-; MUBUF-NEXT: v_readlane_b32 s48, v1, 2
-; MUBUF-NEXT: v_readlane_b32 s47, v1, 1
-; MUBUF-NEXT: v_readlane_b32 s46, v1, 0
+; MUBUF-NEXT: v_readlane_b32 s102, v1, 31
+; MUBUF-NEXT: v_readlane_b32 s101, v1, 30
+; MUBUF-NEXT: v_readlane_b32 s100, v1, 29
+; MUBUF-NEXT: v_readlane_b32 s99, v1, 28
+; MUBUF-NEXT: v_readlane_b32 s98, v1, 27
+; MUBUF-NEXT: v_readlane_b32 s97, v1, 26
+; MUBUF-NEXT: v_readlane_b32 s96, v1, 25
+; MUBUF-NEXT: v_readlane_b32 s87, v1, 24
+; MUBUF-NEXT: v_readlane_b32 s86, v1, 23
+; MUBUF-NEXT: v_readlane_b32 s85, v1, 22
+; MUBUF-NEXT: v_readlane_b32 s84, v1, 21
+; MUBUF-NEXT: v_readlane_b32 s83, v1, 20
+; MUBUF-NEXT: v_readlane_b32 s82, v1, 19
+; MUBUF-NEXT: v_readlane_b32 s81, v1, 18
+; MUBUF-NEXT: v_readlane_b32 s80, v1, 17
+; MUBUF-NEXT: v_readlane_b32 s71, v1, 16
+; MUBUF-NEXT: v_readlane_b32 s70, v1, 15
+; MUBUF-NEXT: v_readlane_b32 s69, v1, 14
+; MUBUF-NEXT: v_readlane_b32 s68, v1, 13
+; MUBUF-NEXT: v_readlane_b32 s67, v1, 12
+; MUBUF-NEXT: v_readlane_b32 s66, v1, 11
+; MUBUF-NEXT: v_readlane_b32 s65, v1, 10
+; MUBUF-NEXT: v_readlane_b32 s64, v1, 9
+; MUBUF-NEXT: v_readlane_b32 s55, v1, 8
+; MUBUF-NEXT: v_readlane_b32 s54, v1, 7
+; MUBUF-NEXT: v_readlane_b32 s53, v1, 6
+; MUBUF-NEXT: v_readlane_b32 s52, v1, 5
+; MUBUF-NEXT: v_readlane_b32 s51, v1, 4
+; MUBUF-NEXT: v_readlane_b32 s50, v1, 3
+; MUBUF-NEXT: v_readlane_b32 s49, v1, 2
+; MUBUF-NEXT: v_readlane_b32 s48, v1, 1
+; MUBUF-NEXT: v_readlane_b32 s39, v1, 0
; MUBUF-NEXT: s_mov_b32 s32, s33
; MUBUF-NEXT: s_xor_saveexec_b64 s[6:7], -1
; MUBUF-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
@@ -836,83 +822,81 @@ define void @no_new_vgpr_for_fp_csr() #1 {
; FLATSCR-NEXT: s_xor_saveexec_b64 s[2:3], -1
; FLATSCR-NEXT: scratch_store_dword off, v1, s33 offset:8 ; 4-byte Folded Spill
; FLATSCR-NEXT: s_mov_b64 exec, s[2:3]
-; FLATSCR-NEXT: v_writelane_b32 v1, s46, 0
-; FLATSCR-NEXT: v_writelane_b32 v1, s47, 1
-; FLATSCR-NEXT: v_writelane_b32 v1, s48, 2
-; FLATSCR-NEXT: v_writelane_b32 v1, s49, 3
-; FLATSCR-NEXT: v_writelane_b32 v1, s50, 4
-; FLATSCR-NEXT: v_writelane_b32 v1, s51, 5
-; FLATSCR-NEXT: v_writelane_b32 v1, s52, 6
-; FLATSCR-NEXT: v_writelane_b32 v1, s53, 7
-; FLATSCR-NEXT: v_writelane_b32 v1, s62, 8
-; FLATSCR-NEXT: v_writelane_b32 v1, s63, 9
-; FLATSCR-NEXT: v_writelane_b32 v1, s64, 10
-; FLATSCR-NEXT: v_writelane_b32 v1, s65, 11
-; FLATSCR-NEXT: v_writelane_b32 v1, s66, 12
-; FLATSCR-NEXT: v_writelane_b32 v1, s67, 13
-; FLATSCR-NEXT: v_writelane_b32 v1, s68, 14
-; FLATSCR-NEXT: v_writelane_b32 v1, s69, 15
-; FLATSCR-NEXT: v_writelane_b32 v1, s78, 16
-; FLATSCR-NEXT: v_writelane_b32 v1, s79, 17
-; FLATSCR-NEXT: v_writelane_b32 v1, s80, 18
-; FLATSCR-NEXT: v_writelane_b32 v1, s81, 19
-; FLATSCR-NEXT: v_writelane_b32 v1, s82, 20
-; FLATSCR-NEXT: v_writelane_b32 v1, s83, 21
-; FLATSCR-NEXT: v_writelane_b32 v1, s84, 22
-; FLATSCR-NEXT: v_writelane_b32 v1, s85, 23
-; FLATSCR-NEXT: v_writelane_b32 v1, s94, 24
-; FLATSCR-NEXT: v_writelane_b32 v1, s95, 25
-; FLATSCR-NEXT: v_writelane_b32 v1, s96, 26
-; FLATSCR-NEXT: v_writelane_b32 v1, s97, 27
-; FLATSCR-NEXT: v_writelane_b32 v1, s98, 28
-; FLATSCR-NEXT: v_writelane_b32 v1, s99, 29
-; FLATSCR-NEXT: v_writelane_b32 v1, s100, 30
+; FLATSCR-NEXT: v_writelane_b32 v1, s39, 0
+; FLATSCR-NEXT: v_writelane_b32 v1, s48, 1
+; FLATSCR-NEXT: v_writelane_b32 v1, s49, 2
+; FLATSCR-NEXT: v_writelane_b32 v1, s50, 3
+; FLATSCR-NEXT: v_writelane_b32 v1, s51, 4
+; FLATSCR-NEXT: v_writelane_b32 v1, s52, 5
+; FLATSCR-NEXT: v_writelane_b32 v1, s53, 6
+; FLATSCR-NEXT: v_writelane_b32 v1, s54, 7
+; FLATSCR-NEXT: v_writelane_b32 v1, s55, 8
+; FLATSCR-NEXT: v_writelane_b32 v1, s64, 9
+; FLATSCR-NEXT: v_writelane_b32 v1, s65, 10
+; FLATSCR-NEXT: v_writelane_b32 v1, s66, 11
+; FLATSCR-NEXT: v_writelane_b32 v1, s67, 12
+; FLATSCR-NEXT: v_writelane_b32 v1, s68, 13
+; FLATSCR-NEXT: v_writelane_b32 v1, s69, 14
+; FLATSCR-NEXT: v_writelane_b32 v1, s70, 15
+; FLATSCR-NEXT: v_writelane_b32 v1, s71, 16
+; FLATSCR-NEXT: v_writelane_b32 v1, s80, 17
+; FLATSCR-NEXT: v_writelane_b32 v1, s81, 18
+; FLATSCR-NEXT: v_writelane_b32 v1, s82, 19
+; FLATSCR-NEXT: v_writelane_b32 v1, s83, 20
+; FLATSCR-NEXT: v_writelane_b32 v1, s84, 21
+; FLATSCR-NEXT: v_writelane_b32 v1, s85, 22
+; FLATSCR-NEXT: v_writelane_b32 v1, s86, 23
+; FLATSCR-NEXT: v_writelane_b32 v1, s87, 24
+; FLATSCR-NEXT: v_writelane_b32 v1, s96, 25
+; FLATSCR-NEXT: v_writelane_b32 v1, s97, 26
+; FLATSCR-NEXT: v_writelane_b32 v1, s98, 27
+; FLATSCR-NEXT: v_writelane_b32 v1, s99, 28
+; FLATSCR-NEXT: v_writelane_b32 v1, s100, 29
; FLATSCR-NEXT: v_mov_b32_e32 v0, 0
; FLATSCR-NEXT: scratch_store_dword off, v41, s33 ; 4-byte Folded Spill
-; FLATSCR-NEXT: v_writelane_b32 v1, s101, 31
+; FLATSCR-NEXT: v_writelane_b32 v1, s101, 30
; FLATSCR-NEXT: scratch_store_dword off, v0, s33 offset:4
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ; clobber v41
; FLATSCR-NEXT: ;;#ASMEND
-; FLATSCR-NEXT: v_writelane_b32 v1, s102, 32
+; FLATSCR-NEXT: v_writelane_b32 v1, s102, 31
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: scratch_load_dword v41, off, s33 ; 4-byte Folded Reload
; FLATSCR-NEXT: s_add_i32 s32, s32, 16
-; FLATSCR-NEXT: v_readlane_b32 s102, v1, 32
-; FLATSCR-NEXT: v_readlane_b32 s101, v1, 31
-; FLATSCR-NEXT: v_readlane_b32 s100, v1, 30
-; FLATSCR-NEXT: v_readlane_b32 s99, v1, 29
-; FLATSCR-NEXT: v_readlane_b32 s98, v1, 28
-; FLATSCR-NEXT: v_readlane_b32 s97, v1, 27
-; FLATSCR-NEXT: v_readlane_b32 s96, v1, 26
-; FLATSCR-NEXT: v_readlane_b32 s95, v1, 25
-; FLATSCR-NEXT: v_readlane_b32 s94, v1, 24
-; FLATSCR-NEXT: v_readlane_b32 s85, v1, 23
-; FLATSCR-NEXT: v_readlane_b32 s84, v1, 22
-; FLATSCR-NEXT: v_readlane_b32 s83, v1, 21
-; FLATSCR-NEXT: v_readlane_b32 s82, v1, 20
-; FLATSCR-NEXT: v_readlane_b32 s81, v1, 19
-; FLATSCR-NEXT: v_readlane_b32 s80, v1, 18
-; FLATSCR-NEXT: v_readlane_b32 s79, v1, 17
-; FLATSCR-NEXT: v_readlane_b32 s78, v1, 16
-; FLATSCR-NEXT: v_readlane_b32 s69, v1, 15
-; FLATSCR-NEXT: v_readlane_b32 s68, v1, 14
-; FLATSCR-NEXT: v_readlane_b32 s67, v1, 13
-; FLATSCR-NEXT: v_readlane_b32 s66, v1, 12
-; FLATSCR-NEXT: v_readlane_b32 s65, v1, 11
-; FLATSCR-NEXT: v_readlane_b32 s64, v1, 10
-; FLATSCR-NEXT: v_readlane_b32 s63, v1, 9
-; FLATSCR-NEXT: v_readlane_b32 s62, v1, 8
-; FLATSCR-NEXT: v_readlane_b32 s53, v1, 7
-; FLATSCR-NEXT: v_readlane_b32 s52, v1, 6
-; FLATSCR-NEXT: v_readlane_b32 s51, v1, 5
-; FLATSCR-NEXT: v_readlane_b32 s50, v1, 4
-; FLATSCR-NEXT: v_readlane_b32 s49, v1, 3
-; FLATSCR-NEXT: v_readlane_b32 s48, v1, 2
-; FLATSCR-NEXT: v_readlane_b32 s47, v1, 1
-; FLATSCR-NEXT: v_readlane_b32 s46, v1, 0
+; FLATSCR-NEXT: v_readlane_b32 s102, v1, 31
+; FLATSCR-NEXT: v_readlane_b32 s101, v1, 30
+; FLATSCR-NEXT: v_readlane_b32 s100, v1, 29
+; FLATSCR-NEXT: v_readlane_b32 s99, v1, 28
+; FLATSCR-NEXT: v_readlane_b32 s98, v1, 27
+; FLATSCR-NEXT: v_readlane_b32 s97, v1, 26
+; FLATSCR-NEXT: v_readlane_b32 s96, v1, 25
+; FLATSCR-NEXT: v_readlane_b32 s87, v1, 24
+; FLATSCR-NEXT: v_readlane_b32 s86, v1, 23
+; FLATSCR-NEXT: v_readlane_b32 s85, v1, 22
+; FLATSCR-NEXT: v_readlane_b32 s84, v1, 21
+; FLATSCR-NEXT: v_readlane_b32 s83, v1, 20
+; FLATSCR-NEXT: v_readlane_b32 s82, v1, 19
+; FLATSCR-NEXT: v_readlane_b32 s81, v1, 18
+; FLATSCR-NEXT: v_readlane_b32 s80, v1, 17
+; FLATSCR-NEXT: v_readlane_b32 s71, v1, 16
+; FLATSCR-NEXT: v_readlane_b32 s70, v1, 15
+; FLATSCR-NEXT: v_readlane_b32 s69, v1, 14
+; FLATSCR-NEXT: v_readlane_b32 s68, v1, 13
+; FLATSCR-NEXT: v_readlane_b32 s67, v1, 12
+; FLATSCR-NEXT: v_readlane_b32 s66, v1, 11
+; FLATSCR-NEXT: v_readlane_b32 s65, v1, 10
+; FLATSCR-NEXT: v_readlane_b32 s64, v1, 9
+; FLATSCR-NEXT: v_readlane_b32 s55, v1, 8
+; FLATSCR-NEXT: v_readlane_b32 s54, v1, 7
+; FLATSCR-NEXT: v_readlane_b32 s53, v1, 6
+; FLATSCR-NEXT: v_readlane_b32 s52, v1, 5
+; FLATSCR-NEXT: v_readlane_b32 s51, v1, 4
+; FLATSCR-NEXT: v_readlane_b32 s50, v1, 3
+; FLATSCR-NEXT: v_readlane_b32 s49, v1, 2
+; FLATSCR-NEXT: v_readlane_b32 s48, v1, 1
+; FLATSCR-NEXT: v_readlane_b32 s39, v1, 0
; FLATSCR-NEXT: s_mov_b32 s32, s33
; FLATSCR-NEXT: s_xor_saveexec_b64 s[2:3], -1
; FLATSCR-NEXT: scratch_load_dword v1, off, s33 offset:8 ; 4-byte Folded Reload
@@ -980,7 +964,7 @@ define void @no_unused_non_csr_sgpr_for_fp() #1 {
; MUBUF-LABEL: no_unused_non_csr_sgpr_for_fp:
; MUBUF: ; %bb.0:
; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; MUBUF-NEXT: s_mov_b32 s38, s33
+; MUBUF-NEXT: s_mov_b32 s40, s33
; MUBUF-NEXT: s_mov_b32 s33, s32
; MUBUF-NEXT: s_xor_saveexec_b64 s[4:5], -1
; MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
@@ -999,14 +983,14 @@ define void @no_unused_non_csr_sgpr_for_fp() #1 {
; MUBUF-NEXT: s_xor_saveexec_b64 s[4:5], -1
; MUBUF-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; MUBUF-NEXT: s_mov_b64 exec, s[4:5]
-; MUBUF-NEXT: s_mov_b32 s33, s38
+; MUBUF-NEXT: s_mov_b32 s33, s40
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: s_setpc_b64 s[30:31]
;
; FLATSCR-LABEL: no_unused_non_csr_sgpr_for_fp:
; FLATSCR: ; %bb.0:
; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; FLATSCR-NEXT: s_mov_b32 s38, s33
+; FLATSCR-NEXT: s_mov_b32 s40, s33
; FLATSCR-NEXT: s_mov_b32 s33, s32
; FLATSCR-NEXT: s_xor_saveexec_b64 s[0:1], -1
; FLATSCR-NEXT: scratch_store_dword off, v1, s33 offset:4 ; 4-byte Folded Spill
@@ -1025,7 +1009,7 @@ define void @no_unused_non_csr_sgpr_for_fp() #1 {
; FLATSCR-NEXT: s_xor_saveexec_b64 s[0:1], -1
; FLATSCR-NEXT: scratch_load_dword v1, off, s33 offset:4 ; 4-byte Folded Reload
; FLATSCR-NEXT: s_mov_b64 exec, s[0:1]
-; FLATSCR-NEXT: s_mov_b32 s33, s38
+; FLATSCR-NEXT: s_mov_b32 s33, s40
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: s_setpc_b64 s[30:31]
%alloca = alloca i32, addrspace(5)
@@ -1046,7 +1030,7 @@ define void @no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr() #1 {
; MUBUF-LABEL: no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr:
; MUBUF: ; %bb.0:
; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; MUBUF-NEXT: s_mov_b32 s38, s33
+; MUBUF-NEXT: s_mov_b32 s40, s33
; MUBUF-NEXT: s_mov_b32 s33, s32
; MUBUF-NEXT: s_or_saveexec_b64 s[4:5], -1
; MUBUF-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
@@ -1068,14 +1052,14 @@ define void @no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr() #1 {
; MUBUF-NEXT: s_or_saveexec_b64 s[4:5], -1
; MUBUF-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; MUBUF-NEXT: s_mov_b64 exec, s[4:5]
-; MUBUF-NEXT: s_mov_b32 s33, s38
+; MUBUF-NEXT: s_mov_b32 s33, s40
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: s_setpc_b64 s[30:31]
;
; FLATSCR-LABEL: no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr:
; FLATSCR: ; %bb.0:
; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; FLATSCR-NEXT: s_mov_b32 s38, s33
+; FLATSCR-NEXT: s_mov_b32 s40, s33
; FLATSCR-NEXT: s_mov_b32 s33, s32
; FLATSCR-NEXT: s_or_saveexec_b64 s[0:1], -1
; FLATSCR-NEXT: scratch_store_dword off, v40, s33 offset:4 ; 4-byte Folded Spill
@@ -1097,7 +1081,7 @@ define void @no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr() #1 {
; FLATSCR-NEXT: s_or_saveexec_b64 s[0:1], -1
; FLATSCR-NEXT: scratch_load_dword v40, off, s33 offset:4 ; 4-byte Folded Reload
; FLATSCR-NEXT: s_mov_b64 exec, s[0:1]
-; FLATSCR-NEXT: s_mov_b32 s33, s38
+; FLATSCR-NEXT: s_mov_b32 s33, s40
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: s_setpc_b64 s[30:31]
%alloca = alloca i32, addrspace(5)
@@ -1125,7 +1109,7 @@ define void @scratch_reg_needed_mubuf_offset(ptr addrspace(5) byval([4096 x i8])
; MUBUF-LABEL: scratch_reg_needed_mubuf_offset:
; MUBUF: ; %bb.0:
; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; MUBUF-NEXT: s_mov_b32 s38, s33
+; MUBUF-NEXT: s_mov_b32 s40, s33
; MUBUF-NEXT: s_mov_b32 s33, s32
; MUBUF-NEXT: s_or_saveexec_b64 s[4:5], -1
; MUBUF-NEXT: s_add_i32 s6, s33, 0x40100
@@ -1151,14 +1135,14 @@ define void @scratch_reg_needed_mubuf_offset(ptr addrspace(5) byval([4096 x i8])
; MUBUF-NEXT: s_add_i32 s6, s33, 0x40100
; MUBUF-NEXT: buffer_load_dword v40, off, s[0:3], s6 ; 4-byte Folded Reload
; MUBUF-NEXT: s_mov_b64 exec, s[4:5]
-; MUBUF-NEXT: s_mov_b32 s33, s38
+; MUBUF-NEXT: s_mov_b32 s33, s40
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: s_setpc_b64 s[30:31]
;
; FLATSCR-LABEL: scratch_reg_needed_mubuf_offset:
; FLATSCR: ; %bb.0:
; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; FLATSCR-NEXT: s_mov_b32 s38, s33
+; FLATSCR-NEXT: s_mov_b32 s40, s33
; FLATSCR-NEXT: s_mov_b32 s33, s32
; FLATSCR-NEXT: s_or_saveexec_b64 s[0:1], -1
; FLATSCR-NEXT: s_add_i32 s2, s33, 0x1004
@@ -1184,7 +1168,7 @@ define void @scratch_reg_needed_mubuf_offset(ptr addrspace(5) byval([4096 x i8])
; FLATSCR-NEXT: s_add_i32 s2, s33, 0x1004
; FLATSCR-NEXT: scratch_load_dword v40, off, s2 ; 4-byte Folded Reload
; FLATSCR-NEXT: s_mov_b64 exec, s[0:1]
-; FLATSCR-NEXT: s_mov_b32 s33, s38
+; FLATSCR-NEXT: s_mov_b32 s33, s40
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: s_setpc_b64 s[30:31]
%alloca = alloca i32, addrspace(5)
@@ -1284,7 +1268,7 @@ define void @callee_need_to_spill_fp_to_memory() #3 {
; MUBUF-LABEL: callee_need_to_spill_fp_to_memory:
; MUBUF: ; %bb.0:
; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; MUBUF-NEXT: s_mov_b32 s38, s33
+; MUBUF-NEXT: s_mov_b32 s40, s33
; MUBUF-NEXT: s_mov_b32 s33, s32
; MUBUF-NEXT: ;;#ASMSTART
; MUBUF-NEXT: ; clobber nonpreserved SGPRs
@@ -1292,7 +1276,7 @@ define void @callee_need_to_spill_fp_to_memory() #3 {
; MUBUF-NEXT: ;;#ASMSTART
; MUBUF-NEXT: ; clobber all VGPRs
; MUBUF-NEXT: ;;#ASMEND
-; MUBUF-NEXT: s_mov_b32 s33, s38
+; MUBUF-NEXT: s_mov_b32 s33, s40
; MUBUF-NEXT: s_setpc_b64 s[30:31]
;
; FLATSCR-LABEL: callee_need_to_spill_fp_to_memory:
@@ -1329,89 +1313,89 @@ define void @callee_need_to_spill_fp_to_memory_full_reserved_vgpr() #3 {
; MUBUF-LABEL: callee_need_to_spill_fp_to_memory_full_reserved_vgpr:
; MUBUF: ; %bb.0:
; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; MUBUF-NEXT: s_mov_b32 s38, s33
+; MUBUF-NEXT: s_mov_b32 s4, s33
; MUBUF-NEXT: s_mov_b32 s33, s32
-; MUBUF-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; MUBUF-NEXT: s_xor_saveexec_b64 s[6:7], -1
; MUBUF-NEXT: buffer_store_dword v39, off, s[0:3], s33 ; 4-byte Folded Spill
-; MUBUF-NEXT: s_mov_b64 exec, s[4:5]
-; MUBUF-NEXT: v_writelane_b32 v39, s46, 0
-; MUBUF-NEXT: v_writelane_b32 v39, s47, 1
-; MUBUF-NEXT: v_writelane_b32 v39, s48, 2
-; MUBUF-NEXT: v_writelane_b32 v39, s49, 3
-; MUBUF-NEXT: v_writelane_b32 v39, s50, 4
-; MUBUF-NEXT: v_writelane_b32 v39, s51, 5
-; MUBUF-NEXT: v_writelane_b32 v39, s52, 6
-; MUBUF-NEXT: v_writelane_b32 v39, s53, 7
-; MUBUF-NEXT: v_writelane_b32 v39, s62, 8
-; MUBUF-NEXT: v_writelane_b32 v39, s63, 9
-; MUBUF-NEXT: v_writelane_b32 v39, s64, 10
-; MUBUF-NEXT: v_writelane_b32 v39, s65, 11
-; MUBUF-NEXT: v_writelane_b32 v39, s66, 12
-; MUBUF-NEXT: v_writelane_b32 v39, s67, 13
-; MUBUF-NEXT: v_writelane_b32 v39, s68, 14
-; MUBUF-NEXT: v_writelane_b32 v39, s69, 15
-; MUBUF-NEXT: v_writelane_b32 v39, s78, 16
-; MUBUF-NEXT: v_writelane_b32 v39, s79, 17
-; MUBUF-NEXT: v_writelane_b32 v39, s80, 18
-; MUBUF-NEXT: v_writelane_b32 v39, s81, 19
-; MUBUF-NEXT: v_writelane_b32 v39, s82, 20
-; MUBUF-NEXT: v_writelane_b32 v39, s83, 21
-; MUBUF-NEXT: v_writelane_b32 v39, s84, 22
-; MUBUF-NEXT: v_writelane_b32 v39, s85, 23
-; MUBUF-NEXT: v_writelane_b32 v39, s94, 24
-; MUBUF-NEXT: v_writelane_b32 v39, s95, 25
-; MUBUF-NEXT: v_writelane_b32 v39, s96, 26
-; MUBUF-NEXT: v_writelane_b32 v39, s97, 27
-; MUBUF-NEXT: v_writelane_b32 v39, s98, 28
-; MUBUF-NEXT: v_writelane_b32 v39, s99, 29
-; MUBUF-NEXT: v_writelane_b32 v39, s100, 30
-; MUBUF-NEXT: v_writelane_b32 v39, s101, 31
+; MUBUF-NEXT: s_mov_b64 exec, s[6:7]
+; MUBUF-NEXT: v_writelane_b32 v39, s4, 32
+; MUBUF-NEXT: v_writelane_b32 v39, s39, 0
+; MUBUF-NEXT: v_writelane_b32 v39, s48, 1
+; MUBUF-NEXT: v_writelane_b32 v39, s49, 2
+; MUBUF-NEXT: v_writelane_b32 v39, s50, 3
+; MUBUF-NEXT: v_writelane_b32 v39, s51, 4
+; MUBUF-NEXT: v_writelane_b32 v39, s52, 5
+; MUBUF-NEXT: v_writelane_b32 v39, s53, 6
+; MUBUF-NEXT: v_writelane_b32 v39, s54, 7
+; MUBUF-NEXT: v_writelane_b32 v39, s55, 8
+; MUBUF-NEXT: v_writelane_b32 v39, s64, 9
+; MUBUF-NEXT: v_writelane_b32 v39, s65, 10
+; MUBUF-NEXT: v_writelane_b32 v39, s66, 11
+; MUBUF-NEXT: v_writelane_b32 v39, s67, 12
+; MUBUF-NEXT: v_writelane_b32 v39, s68, 13
+; MUBUF-NEXT: v_writelane_b32 v39, s69, 14
+; MUBUF-NEXT: v_writelane_b32 v39, s70, 15
+; MUBUF-NEXT: v_writelane_b32 v39, s71, 16
+; MUBUF-NEXT: v_writelane_b32 v39, s80, 17
+; MUBUF-NEXT: v_writelane_b32 v39, s81, 18
+; MUBUF-NEXT: v_writelane_b32 v39, s82, 19
+; MUBUF-NEXT: v_writelane_b32 v39, s83, 20
+; MUBUF-NEXT: v_writelane_b32 v39, s84, 21
+; MUBUF-NEXT: v_writelane_b32 v39, s85, 22
+; MUBUF-NEXT: v_writelane_b32 v39, s86, 23
+; MUBUF-NEXT: v_writelane_b32 v39, s87, 24
+; MUBUF-NEXT: v_writelane_b32 v39, s96, 25
+; MUBUF-NEXT: v_writelane_b32 v39, s97, 26
+; MUBUF-NEXT: v_writelane_b32 v39, s98, 27
+; MUBUF-NEXT: v_writelane_b32 v39, s99, 28
+; MUBUF-NEXT: v_writelane_b32 v39, s100, 29
+; MUBUF-NEXT: v_writelane_b32 v39, s101, 30
; MUBUF-NEXT: s_addk_i32 s32, 0x200
-; MUBUF-NEXT: v_writelane_b32 v39, s102, 32
+; MUBUF-NEXT: v_writelane_b32 v39, s102, 31
; MUBUF-NEXT: ;;#ASMSTART
; MUBUF-NEXT: ; clobber nonpreserved SGPRs and 64 CSRs
; MUBUF-NEXT: ;;#ASMEND
; MUBUF-NEXT: ;;#ASMSTART
; MUBUF-NEXT: ; clobber all VGPRs except CSR v40
; MUBUF-NEXT: ;;#ASMEND
-; MUBUF-NEXT: v_readlane_b32 s102, v39, 32
-; MUBUF-NEXT: v_readlane_b32 s101, v39, 31
-; MUBUF-NEXT: v_readlane_b32 s100, v39, 30
-; MUBUF-NEXT: v_readlane_b32 s99, v39, 29
-; MUBUF-NEXT: v_readlane_b32 s98, v39, 28
-; MUBUF-NEXT: v_readlane_b32 s97, v39, 27
-; MUBUF-NEXT: v_readlane_b32 s96, v39, 26
-; MUBUF-NEXT: v_readlane_b32 s95, v39, 25
-; MUBUF-NEXT: v_readlane_b32 s94, v39, 24
-; MUBUF-NEXT: v_readlane_b32 s85, v39, 23
-; MUBUF-NEXT: v_readlane_b32 s84, v39, 22
-; MUBUF-NEXT: v_readlane_b32 s83, v39, 21
-; MUBUF-NEXT: v_readlane_b32 s82, v39, 20
-; MUBUF-NEXT: v_readlane_b32 s81, v39, 19
-; MUBUF-NEXT: v_readlane_b32 s80, v39, 18
-; MUBUF-NEXT: v_readlane_b32 s79, v39, 17
-; MUBUF-NEXT: v_readlane_b32 s78, v39, 16
-; MUBUF-NEXT: v_readlane_b32 s69, v39, 15
-; MUBUF-NEXT: v_readlane_b32 s68, v39, 14
-; MUBUF-NEXT: v_readlane_b32 s67, v39, 13
-; MUBUF-NEXT: v_readlane_b32 s66, v39, 12
-; MUBUF-NEXT: v_readlane_b32 s65, v39, 11
-; MUBUF-NEXT: v_readlane_b32 s64, v39, 10
-; MUBUF-NEXT: v_readlane_b32 s63, v39, 9
-; MUBUF-NEXT: v_readlane_b32 s62, v39, 8
-; MUBUF-NEXT: v_readlane_b32 s53, v39, 7
-; MUBUF-NEXT: v_readlane_b32 s52, v39, 6
-; MUBUF-NEXT: v_readlane_b32 s51, v39, 5
-; MUBUF-NEXT: v_readlane_b32 s50, v39, 4
-; MUBUF-NEXT: v_readlane_b32 s49, v39, 3
-; MUBUF-NEXT: v_readlane_b32 s48, v39, 2
-; MUBUF-NEXT: v_readlane_b32 s47, v39, 1
-; MUBUF-NEXT: v_readlane_b32 s46, v39, 0
+; MUBUF-NEXT: v_readlane_b32 s102, v39, 31
+; MUBUF-NEXT: v_readlane_b32 s101, v39, 30
+; MUBUF-NEXT: v_readlane_b32 s100, v39, 29
+; MUBUF-NEXT: v_readlane_b32 s99, v39, 28
+; MUBUF-NEXT: v_readlane_b32 s98, v39, 27
+; MUBUF-NEXT: v_readlane_b32 s97, v39, 26
+; MUBUF-NEXT: v_readlane_b32 s96, v39, 25
+; MUBUF-NEXT: v_readlane_b32 s87, v39, 24
+; MUBUF-NEXT: v_readlane_b32 s86, v39, 23
+; MUBUF-NEXT: v_readlane_b32 s85, v39, 22
+; MUBUF-NEXT: v_readlane_b32 s84, v39, 21
+; MUBUF-NEXT: v_readlane_b32 s83, v39, 20
+; MUBUF-NEXT: v_readlane_b32 s82, v39, 19
+; MUBUF-NEXT: v_readlane_b32 s81, v39, 18
+; MUBUF-NEXT: v_readlane_b32 s80, v39, 17
+; MUBUF-NEXT: v_readlane_b32 s71, v39, 16
+; MUBUF-NEXT: v_readlane_b32 s70, v39, 15
+; MUBUF-NEXT: v_readlane_b32 s69, v39, 14
+; MUBUF-NEXT: v_readlane_b32 s68, v39, 13
+; MUBUF-NEXT: v_readlane_b32 s67, v39, 12
+; MUBUF-NEXT: v_readlane_b32 s66, v39, 11
+; MUBUF-NEXT: v_readlane_b32 s65, v39, 10
+; MUBUF-NEXT: v_readlane_b32 s64, v39, 9
+; MUBUF-NEXT: v_readlane_b32 s55, v39, 8
+; MUBUF-NEXT: v_readlane_b32 s54, v39, 7
+; MUBUF-NEXT: v_readlane_b32 s53, v39, 6
+; MUBUF-NEXT: v_readlane_b32 s52, v39, 5
+; MUBUF-NEXT: v_readlane_b32 s51, v39, 4
+; MUBUF-NEXT: v_readlane_b32 s50, v39, 3
+; MUBUF-NEXT: v_readlane_b32 s49, v39, 2
+; MUBUF-NEXT: v_readlane_b32 s48, v39, 1
+; MUBUF-NEXT: v_readlane_b32 s39, v39, 0
; MUBUF-NEXT: s_mov_b32 s32, s33
-; MUBUF-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; MUBUF-NEXT: v_readlane_b32 s4, v39, 32
+; MUBUF-NEXT: s_xor_saveexec_b64 s[6:7], -1
; MUBUF-NEXT: buffer_load_dword v39, off, s[0:3], s33 ; 4-byte Folded Reload
-; MUBUF-NEXT: s_mov_b64 exec, s[4:5]
-; MUBUF-NEXT: s_mov_b32 s33, s38
+; MUBUF-NEXT: s_mov_b64 exec, s[6:7]
+; MUBUF-NEXT: s_mov_b32 s33, s4
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: s_setpc_b64 s[30:31]
;
@@ -1423,79 +1407,77 @@ define void @callee_need_to_spill_fp_to_memory_full_reserved_vgpr() #3 {
; FLATSCR-NEXT: s_xor_saveexec_b64 s[2:3], -1
; FLATSCR-NEXT: scratch_store_dword off, v39, s33 ; 4-byte Folded Spill
; FLATSCR-NEXT: s_mov_b64 exec, s[2:3]
-; FLATSCR-NEXT: v_writelane_b32 v39, s46, 0
-; FLATSCR-NEXT: v_writelane_b32 v39, s47, 1
-; FLATSCR-NEXT: v_writelane_b32 v39, s48, 2
-; FLATSCR-NEXT: v_writelane_b32 v39, s49, 3
-; FLATSCR-NEXT: v_writelane_b32 v39, s50, 4
-; FLATSCR-NEXT: v_writelane_b32 v39, s51, 5
-; FLATSCR-NEXT: v_writelane_b32 v39, s52, 6
-; FLATSCR-NEXT: v_writelane_b32 v39, s53, 7
-; FLATSCR-NEXT: v_writelane_b32 v39, s62, 8
-; FLATSCR-NEXT: v_writelane_b32 v39, s63, 9
-; FLATSCR-NEXT: v_writelane_b32 v39, s64, 10
-; FLATSCR-NEXT: v_writelane_b32 v39, s65, 11
-; FLATSCR-NEXT: v_writelane_b32 v39, s66, 12
-; FLATSCR-NEXT: v_writelane_b32 v39, s67, 13
-; FLATSCR-NEXT: v_writelane_b32 v39, s68, 14
-; FLATSCR-NEXT: v_writelane_b32 v39, s69, 15
-; FLATSCR-NEXT: v_writelane_b32 v39, s78, 16
-; FLATSCR-NEXT: v_writelane_b32 v39, s79, 17
-; FLATSCR-NEXT: v_writelane_b32 v39, s80, 18
-; FLATSCR-NEXT: v_writelane_b32 v39, s81, 19
-; FLATSCR-NEXT: v_writelane_b32 v39, s82, 20
-; FLATSCR-NEXT: v_writelane_b32 v39, s83, 21
-; FLATSCR-NEXT: v_writelane_b32 v39, s84, 22
-; FLATSCR-NEXT: v_writelane_b32 v39, s85, 23
-; FLATSCR-NEXT: v_writelane_b32 v39, s94, 24
-; FLATSCR-NEXT: v_writelane_b32 v39, s95, 25
-; FLATSCR-NEXT: v_writelane_b32 v39, s96, 26
-; FLATSCR-NEXT: v_writelane_b32 v39, s97, 27
-; FLATSCR-NEXT: v_writelane_b32 v39, s98, 28
-; FLATSCR-NEXT: v_writelane_b32 v39, s99, 29
-; FLATSCR-NEXT: v_writelane_b32 v39, s100, 30
-; FLATSCR-NEXT: v_writelane_b32 v39, s101, 31
+; FLATSCR-NEXT: v_writelane_b32 v39, s39, 0
+; FLATSCR-NEXT: v_writelane_b32 v39, s48, 1
+; FLATSCR-NEXT: v_writelane_b32 v39, s49, 2
+; FLATSCR-NEXT: v_writelane_b32 v39, s50, 3
+; FLATSCR-NEXT: v_writelane_b32 v39, s51, 4
+; FLATSCR-NEXT: v_writelane_b32 v39, s52, 5
+; FLATSCR-NEXT: v_writelane_b32 v39, s53, 6
+; FLATSCR-NEXT: v_writelane_b32 v39, s54, 7
+; FLATSCR-NEXT: v_writelane_b32 v39, s55, 8
+; FLATSCR-NEXT: v_writelane_b32 v39, s64, 9
+; FLATSCR-NEXT: v_writelane_b32 v39, s65, 10
+; FLATSCR-NEXT: v_writelane_b32 v39, s66, 11
+; FLATSCR-NEXT: v_writelane_b32 v39, s67, 12
+; FLATSCR-NEXT: v_writelane_b32 v39, s68, 13
+; FLATSCR-NEXT: v_writelane_b32 v39, s69, 14
+; FLATSCR-NEXT: v_writelane_b32 v39, s70, 15
+; FLATSCR-NEXT: v_writelane_b32 v39, s71, 16
+; FLATSCR-NEXT: v_writelane_b32 v39, s80, 17
+; FLATSCR-NEXT: v_writelane_b32 v39, s81, 18
+; FLATSCR-NEXT: v_writelane_b32 v39, s82, 19
+; FLATSCR-NEXT: v_writelane_b32 v39, s83, 20
+; FLATSCR-NEXT: v_writelane_b32 v39, s84, 21
+; FLATSCR-NEXT: v_writelane_b32 v39, s85, 22
+; FLATSCR-NEXT: v_writelane_b32 v39, s86, 23
+; FLATSCR-NEXT: v_writelane_b32 v39, s87, 24
+; FLATSCR-NEXT: v_writelane_b32 v39, s96, 25
+; FLATSCR-NEXT: v_writelane_b32 v39, s97, 26
+; FLATSCR-NEXT: v_writelane_b32 v39, s98, 27
+; FLATSCR-NEXT: v_writelane_b32 v39, s99, 28
+; FLATSCR-NEXT: v_writelane_b32 v39, s100, 29
+; FLATSCR-NEXT: v_writelane_b32 v39, s101, 30
; FLATSCR-NEXT: s_add_i32 s32, s32, 8
-; FLATSCR-NEXT: v_writelane_b32 v39, s102, 32
+; FLATSCR-NEXT: v_writelane_b32 v39, s102, 31
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ; clobber nonpreserved SGPRs and 64 CSRs
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ; clobber all VGPRs except CSR v40
; FLATSCR-NEXT: ;;#ASMEND
-; FLATSCR-NEXT: v_readlane_b32 s102, v39, 32
-; FLATSCR-NEXT: v_readlane_b32 s101, v39, 31
-; FLATSCR-NEXT: v_readlane_b32 s100, v39, 30
-; FLATSCR-NEXT: v_readlane_b32 s99, v39, 29
-; FLATSCR-NEXT: v_readlane_b32 s98, v39, 28
-; FLATSCR-NEXT: v_readlane_b32 s97, v39, 27
-; FLATSCR-NEXT: v_readlane_b32 s96, v39, 26
-; FLATSCR-NEXT: v_readlane_b32 s95, v39, 25
-; FLATSCR-NEXT: v_readlane_b32 s94, v39, 24
-; FLATSCR-NEXT: v_readlane_b32 s85, v39, 23
-; FLATSCR-NEXT: v_readlane_b32 s84, v39, 22
-; FLATSCR-NEXT: v_readlane_b32 s83, v39, 21
-; FLATSCR-NEXT: v_readlane_b32 s82, v39, 20
-; FLATSCR-NEXT: v_readlane_b32 s81, v39, 19
-; FLATSCR-NEXT: v_readlane_b32 s80, v39, 18
-; FLATSCR-NEXT: v_readlane_b32 s79, v39, 17
-; FLATSCR-NEXT: v_readlane_b32 s78, v39, 16
-; FLATSCR-NEXT: v_readlane_b32 s69, v39, 15
-; FLATSCR-NEXT: v_readlane_b32 s68, v39, 14
-; FLATSCR-NEXT: v_readlane_b32 s67, v39, 13
-; FLATSCR-NEXT: v_readlane_b32 s66, v39, 12
-; FLATSCR-NEXT: v_readlane_b32 s65, v39, 11
-; FLATSCR-NEXT: v_readlane_b32 s64, v39, 10
-; FLATSCR-NEXT: v_readlane_b32 s63, v39, 9
-; FLATSCR-NEXT: v_readlane_b32 s62, v39, 8
-; FLATSCR-NEXT: v_readlane_b32 s53, v39, 7
-; FLATSCR-NEXT: v_readlane_b32 s52, v39, 6
-; FLATSCR-NEXT: v_readlane_b32 s51, v39, 5
-; FLATSCR-NEXT: v_readlane_b32 s50, v39, 4
-; FLATSCR-NEXT: v_readlane_b32 s49, v39, 3
-; FLATSCR-NEXT: v_readlane_b32 s48, v39, 2
-; FLATSCR-NEXT: v_readlane_b32 s47, v39, 1
-; FLATSCR-NEXT: v_readlane_b32 s46, v39, 0
+; FLATSCR-NEXT: v_readlane_b32 s102, v39, 31
+; FLATSCR-NEXT: v_readlane_b32 s101, v39, 30
+; FLATSCR-NEXT: v_readlane_b32 s100, v39, 29
+; FLATSCR-NEXT: v_readlane_b32 s99, v39, 28
+; FLATSCR-NEXT: v_readlane_b32 s98, v39, 27
+; FLATSCR-NEXT: v_readlane_b32 s97, v39, 26
+; FLATSCR-NEXT: v_readlane_b32 s96, v39, 25
+; FLATSCR-NEXT: v_readlane_b32 s87, v39, 24
+; FLATSCR-NEXT: v_readlane_b32 s86, v39, 23
+; FLATSCR-NEXT: v_readlane_b32 s85, v39, 22
+; FLATSCR-NEXT: v_readlane_b32 s84, v39, 21
+; FLATSCR-NEXT: v_readlane_b32 s83, v39, 20
+; FLATSCR-NEXT: v_readlane_b32 s82, v39, 19
+; FLATSCR-NEXT: v_readlane_b32 s81, v39, 18
+; FLATSCR-NEXT: v_readlane_b32 s80, v39, 17
+; FLATSCR-NEXT: v_readlane_b32 s71, v39, 16
+; FLATSCR-NEXT: v_readlane_b32 s70, v39, 15
+; FLATSCR-NEXT: v_readlane_b32 s69, v39, 14
+; FLATSCR-NEXT: v_readlane_b32 s68, v39, 13
+; FLATSCR-NEXT: v_readlane_b32 s67, v39, 12
+; FLATSCR-NEXT: v_readlane_b32 s66, v39, 11
+; FLATSCR-NEXT: v_readlane_b32 s65, v39, 10
+; FLATSCR-NEXT: v_readlane_b32 s64, v39, 9
+; FLATSCR-NEXT: v_readlane_b32 s55, v39, 8
+; FLATSCR-NEXT: v_readlane_b32 s54, v39, 7
+; FLATSCR-NEXT: v_readlane_b32 s53, v39, 6
+; FLATSCR-NEXT: v_readlane_b32 s52, v39, 5
+; FLATSCR-NEXT: v_readlane_b32 s51, v39, 4
+; FLATSCR-NEXT: v_readlane_b32 s50, v39, 3
+; FLATSCR-NEXT: v_readlane_b32 s49, v39, 2
+; FLATSCR-NEXT: v_readlane_b32 s48, v39, 1
+; FLATSCR-NEXT: v_readlane_b32 s39, v39, 0
; FLATSCR-NEXT: s_mov_b32 s32, s33
; FLATSCR-NEXT: s_xor_saveexec_b64 s[2:3], -1
; FLATSCR-NEXT: scratch_load_dword v39, off, s33 ; 4-byte Folded Reload
@@ -1531,89 +1513,89 @@ define void @callee_need_to_spill_fp_to_reg() #1 {
; MUBUF-LABEL: callee_need_to_spill_fp_to_reg:
; MUBUF: ; %bb.0:
; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; MUBUF-NEXT: s_mov_b32 s38, s33
+; MUBUF-NEXT: s_mov_b32 s4, s33
; MUBUF-NEXT: s_mov_b32 s33, s32
-; MUBUF-NEXT: s_or_saveexec_b64 s[4:5], -1
+; MUBUF-NEXT: s_or_saveexec_b64 s[6:7], -1
; MUBUF-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; MUBUF-NEXT: s_mov_b64 exec, s[4:5]
-; MUBUF-NEXT: v_writelane_b32 v40, s46, 0
-; MUBUF-NEXT: v_writelane_b32 v40, s47, 1
-; MUBUF-NEXT: v_writelane_b32 v40, s48, 2
-; MUBUF-NEXT: v_writelane_b32 v40, s49, 3
-; MUBUF-NEXT: v_writelane_b32 v40, s50, 4
-; MUBUF-NEXT: v_writelane_b32 v40, s51, 5
-; MUBUF-NEXT: v_writelane_b32 v40, s52, 6
-; MUBUF-NEXT: v_writelane_b32 v40, s53, 7
-; MUBUF-NEXT: v_writelane_b32 v40, s62, 8
-; MUBUF-NEXT: v_writelane_b32 v40, s63, 9
-; MUBUF-NEXT: v_writelane_b32 v40, s64, 10
-; MUBUF-NEXT: v_writelane_b32 v40, s65, 11
-; MUBUF-NEXT: v_writelane_b32 v40, s66, 12
-; MUBUF-NEXT: v_writelane_b32 v40, s67, 13
-; MUBUF-NEXT: v_writelane_b32 v40, s68, 14
-; MUBUF-NEXT: v_writelane_b32 v40, s69, 15
-; MUBUF-NEXT: v_writelane_b32 v40, s78, 16
-; MUBUF-NEXT: v_writelane_b32 v40, s79, 17
-; MUBUF-NEXT: v_writelane_b32 v40, s80, 18
-; MUBUF-NEXT: v_writelane_b32 v40, s81, 19
-; MUBUF-NEXT: v_writelane_b32 v40, s82, 20
-; MUBUF-NEXT: v_writelane_b32 v40, s83, 21
-; MUBUF-NEXT: v_writelane_b32 v40, s84, 22
-; MUBUF-NEXT: v_writelane_b32 v40, s85, 23
-; MUBUF-NEXT: v_writelane_b32 v40, s94, 24
-; MUBUF-NEXT: v_writelane_b32 v40, s95, 25
-; MUBUF-NEXT: v_writelane_b32 v40, s96, 26
-; MUBUF-NEXT: v_writelane_b32 v40, s97, 27
-; MUBUF-NEXT: v_writelane_b32 v40, s98, 28
-; MUBUF-NEXT: v_writelane_b32 v40, s99, 29
-; MUBUF-NEXT: v_writelane_b32 v40, s100, 30
-; MUBUF-NEXT: v_writelane_b32 v40, s101, 31
+; MUBUF-NEXT: s_mov_b64 exec, s[6:7]
+; MUBUF-NEXT: v_writelane_b32 v40, s4, 32
+; MUBUF-NEXT: v_writelane_b32 v40, s39, 0
+; MUBUF-NEXT: v_writelane_b32 v40, s48, 1
+; MUBUF-NEXT: v_writelane_b32 v40, s49, 2
+; MUBUF-NEXT: v_writelane_b32 v40, s50, 3
+; MUBUF-NEXT: v_writelane_b32 v40, s51, 4
+; MUBUF-NEXT: v_writelane_b32 v40, s52, 5
+; MUBUF-NEXT: v_writelane_b32 v40, s53, 6
+; MUBUF-NEXT: v_writelane_b32 v40, s54, 7
+; MUBUF-NEXT: v_writelane_b32 v40, s55, 8
+; MUBUF-NEXT: v_writelane_b32 v40, s64, 9
+; MUBUF-NEXT: v_writelane_b32 v40, s65, 10
+; MUBUF-NEXT: v_writelane_b32 v40, s66, 11
+; MUBUF-NEXT: v_writelane_b32 v40, s67, 12
+; MUBUF-NEXT: v_writelane_b32 v40, s68, 13
+; MUBUF-NEXT: v_writelane_b32 v40, s69, 14
+; MUBUF-NEXT: v_writelane_b32 v40, s70, 15
+; MUBUF-NEXT: v_writelane_b32 v40, s71, 16
+; MUBUF-NEXT: v_writelane_b32 v40, s80, 17
+; MUBUF-NEXT: v_writelane_b32 v40, s81, 18
+; MUBUF-NEXT: v_writelane_b32 v40, s82, 19
+; MUBUF-NEXT: v_writelane_b32 v40, s83, 20
+; MUBUF-NEXT: v_writelane_b32 v40, s84, 21
+; MUBUF-NEXT: v_writelane_b32 v40, s85, 22
+; MUBUF-NEXT: v_writelane_b32 v40, s86, 23
+; MUBUF-NEXT: v_writelane_b32 v40, s87, 24
+; MUBUF-NEXT: v_writelane_b32 v40, s96, 25
+; MUBUF-NEXT: v_writelane_b32 v40, s97, 26
+; MUBUF-NEXT: v_writelane_b32 v40, s98, 27
+; MUBUF-NEXT: v_writelane_b32 v40, s99, 28
+; MUBUF-NEXT: v_writelane_b32 v40, s100, 29
+; MUBUF-NEXT: v_writelane_b32 v40, s101, 30
; MUBUF-NEXT: s_addk_i32 s32, 0x200
-; MUBUF-NEXT: v_writelane_b32 v40, s102, 32
+; MUBUF-NEXT: v_writelane_b32 v40, s102, 31
; MUBUF-NEXT: ;;#ASMSTART
; MUBUF-NEXT: ; clobber nonpreserved SGPRs and 64 CSRs
; MUBUF-NEXT: ;;#ASMEND
; MUBUF-NEXT: ;;#ASMSTART
; MUBUF-NEXT: ; clobber all VGPRs except CSR v40
; MUBUF-NEXT: ;;#ASMEND
-; MUBUF-NEXT: v_readlane_b32 s102, v40, 32
-; MUBUF-NEXT: v_readlane_b32 s101, v40, 31
-; MUBUF-NEXT: v_readlane_b32 s100, v40, 30
-; MUBUF-NEXT: v_readlane_b32 s99, v40, 29
-; MUBUF-NEXT: v_readlane_b32 s98, v40, 28
-; MUBUF-NEXT: v_readlane_b32 s97, v40, 27
-; MUBUF-NEXT: v_readlane_b32 s96, v40, 26
-; MUBUF-NEXT: v_readlane_b32 s95, v40, 25
-; MUBUF-NEXT: v_readlane_b32 s94, v40, 24
-; MUBUF-NEXT: v_readlane_b32 s85, v40, 23
-; MUBUF-NEXT: v_readlane_b32 s84, v40, 22
-; MUBUF-NEXT: v_readlane_b32 s83, v40, 21
-; MUBUF-NEXT: v_readlane_b32 s82, v40, 20
-; MUBUF-NEXT: v_readlane_b32 s81, v40, 19
-; MUBUF-NEXT: v_readlane_b32 s80, v40, 18
-; MUBUF-NEXT: v_readlane_b32 s79, v40, 17
-; MUBUF-NEXT: v_readlane_b32 s78, v40, 16
-; MUBUF-NEXT: v_readlane_b32 s69, v40, 15
-; MUBUF-NEXT: v_readlane_b32 s68, v40, 14
-; MUBUF-NEXT: v_readlane_b32 s67, v40, 13
-; MUBUF-NEXT: v_readlane_b32 s66, v40, 12
-; MUBUF-NEXT: v_readlane_b32 s65, v40, 11
-; MUBUF-NEXT: v_readlane_b32 s64, v40, 10
-; MUBUF-NEXT: v_readlane_b32 s63, v40, 9
-; MUBUF-NEXT: v_readlane_b32 s62, v40, 8
-; MUBUF-NEXT: v_readlane_b32 s53, v40, 7
-; MUBUF-NEXT: v_readlane_b32 s52, v40, 6
-; MUBUF-NEXT: v_readlane_b32 s51, v40, 5
-; MUBUF-NEXT: v_readlane_b32 s50, v40, 4
-; MUBUF-NEXT: v_readlane_b32 s49, v40, 3
-; MUBUF-NEXT: v_readlane_b32 s48, v40, 2
-; MUBUF-NEXT: v_readlane_b32 s47, v40, 1
-; MUBUF-NEXT: v_readlane_b32 s46, v40, 0
+; MUBUF-NEXT: v_readlane_b32 s102, v40, 31
+; MUBUF-NEXT: v_readlane_b32 s101, v40, 30
+; MUBUF-NEXT: v_readlane_b32 s100, v40, 29
+; MUBUF-NEXT: v_readlane_b32 s99, v40, 28
+; MUBUF-NEXT: v_readlane_b32 s98, v40, 27
+; MUBUF-NEXT: v_readlane_b32 s97, v40, 26
+; MUBUF-NEXT: v_readlane_b32 s96, v40, 25
+; MUBUF-NEXT: v_readlane_b32 s87, v40, 24
+; MUBUF-NEXT: v_readlane_b32 s86, v40, 23
+; MUBUF-NEXT: v_readlane_b32 s85, v40, 22
+; MUBUF-NEXT: v_readlane_b32 s84, v40, 21
+; MUBUF-NEXT: v_readlane_b32 s83, v40, 20
+; MUBUF-NEXT: v_readlane_b32 s82, v40, 19
+; MUBUF-NEXT: v_readlane_b32 s81, v40, 18
+; MUBUF-NEXT: v_readlane_b32 s80, v40, 17
+; MUBUF-NEXT: v_readlane_b32 s71, v40, 16
+; MUBUF-NEXT: v_readlane_b32 s70, v40, 15
+; MUBUF-NEXT: v_readlane_b32 s69, v40, 14
+; MUBUF-NEXT: v_readlane_b32 s68, v40, 13
+; MUBUF-NEXT: v_readlane_b32 s67, v40, 12
+; MUBUF-NEXT: v_readlane_b32 s66, v40, 11
+; MUBUF-NEXT: v_readlane_b32 s65, v40, 10
+; MUBUF-NEXT: v_readlane_b32 s64, v40, 9
+; MUBUF-NEXT: v_readlane_b32 s55, v40, 8
+; MUBUF-NEXT: v_readlane_b32 s54, v40, 7
+; MUBUF-NEXT: v_readlane_b32 s53, v40, 6
+; MUBUF-NEXT: v_readlane_b32 s52, v40, 5
+; MUBUF-NEXT: v_readlane_b32 s51, v40, 4
+; MUBUF-NEXT: v_readlane_b32 s50, v40, 3
+; MUBUF-NEXT: v_readlane_b32 s49, v40, 2
+; MUBUF-NEXT: v_readlane_b32 s48, v40, 1
+; MUBUF-NEXT: v_readlane_b32 s39, v40, 0
; MUBUF-NEXT: s_mov_b32 s32, s33
-; MUBUF-NEXT: s_or_saveexec_b64 s[4:5], -1
+; MUBUF-NEXT: v_readlane_b32 s4, v40, 32
+; MUBUF-NEXT: s_or_saveexec_b64 s[6:7], -1
; MUBUF-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; MUBUF-NEXT: s_mov_b64 exec, s[4:5]
-; MUBUF-NEXT: s_mov_b32 s33, s38
+; MUBUF-NEXT: s_mov_b64 exec, s[6:7]
+; MUBUF-NEXT: s_mov_b32 s33, s4
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: s_setpc_b64 s[30:31]
;
@@ -1625,79 +1607,77 @@ define void @callee_need_to_spill_fp_to_reg() #1 {
; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1
; FLATSCR-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
; FLATSCR-NEXT: s_mov_b64 exec, s[2:3]
-; FLATSCR-NEXT: v_writelane_b32 v40, s46, 0
-; FLATSCR-NEXT: v_writelane_b32 v40, s47, 1
-; FLATSCR-NEXT: v_writelane_b32 v40, s48, 2
-; FLATSCR-NEXT: v_writelane_b32 v40, s49, 3
-; FLATSCR-NEXT: v_writelane_b32 v40, s50, 4
-; FLATSCR-NEXT: v_writelane_b32 v40, s51, 5
-; FLATSCR-NEXT: v_writelane_b32 v40, s52, 6
-; FLATSCR-NEXT: v_writelane_b32 v40, s53, 7
-; FLATSCR-NEXT: v_writelane_b32 v40, s62, 8
-; FLATSCR-NEXT: v_writelane_b32 v40, s63, 9
-; FLATSCR-NEXT: v_writelane_b32 v40, s64, 10
-; FLATSCR-NEXT: v_writelane_b32 v40, s65, 11
-; FLATSCR-NEXT: v_writelane_b32 v40, s66, 12
-; FLATSCR-NEXT: v_writelane_b32 v40, s67, 13
-; FLATSCR-NEXT: v_writelane_b32 v40, s68, 14
-; FLATSCR-NEXT: v_writelane_b32 v40, s69, 15
-; FLATSCR-NEXT: v_writelane_b32 v40, s78, 16
-; FLATSCR-NEXT: v_writelane_b32 v40, s79, 17
-; FLATSCR-NEXT: v_writelane_b32 v40, s80, 18
-; FLATSCR-NEXT: v_writelane_b32 v40, s81, 19
-; FLATSCR-NEXT: v_writelane_b32 v40, s82, 20
-; FLATSCR-NEXT: v_writelane_b32 v40, s83, 21
-; FLATSCR-NEXT: v_writelane_b32 v40, s84, 22
-; FLATSCR-NEXT: v_writelane_b32 v40, s85, 23
-; FLATSCR-NEXT: v_writelane_b32 v40, s94, 24
-; FLATSCR-NEXT: v_writelane_b32 v40, s95, 25
-; FLATSCR-NEXT: v_writelane_b32 v40, s96, 26
-; FLATSCR-NEXT: v_writelane_b32 v40, s97, 27
-; FLATSCR-NEXT: v_writelane_b32 v40, s98, 28
-; FLATSCR-NEXT: v_writelane_b32 v40, s99, 29
-; FLATSCR-NEXT: v_writelane_b32 v40, s100, 30
-; FLATSCR-NEXT: v_writelane_b32 v40, s101, 31
+; FLATSCR-NEXT: v_writelane_b32 v40, s39, 0
+; FLATSCR-NEXT: v_writelane_b32 v40, s48, 1
+; FLATSCR-NEXT: v_writelane_b32 v40, s49, 2
+; FLATSCR-NEXT: v_writelane_b32 v40, s50, 3
+; FLATSCR-NEXT: v_writelane_b32 v40, s51, 4
+; FLATSCR-NEXT: v_writelane_b32 v40, s52, 5
+; FLATSCR-NEXT: v_writelane_b32 v40, s53, 6
+; FLATSCR-NEXT: v_writelane_b32 v40, s54, 7
+; FLATSCR-NEXT: v_writelane_b32 v40, s55, 8
+; FLATSCR-NEXT: v_writelane_b32 v40, s64, 9
+; FLATSCR-NEXT: v_writelane_b32 v40, s65, 10
+; FLATSCR-NEXT: v_writelane_b32 v40, s66, 11
+; FLATSCR-NEXT: v_writelane_b32 v40, s67, 12
+; FLATSCR-NEXT: v_writelane_b32 v40, s68, 13
+; FLATSCR-NEXT: v_writelane_b32 v40, s69, 14
+; FLATSCR-NEXT: v_writelane_b32 v40, s70, 15
+; FLATSCR-NEXT: v_writelane_b32 v40, s71, 16
+; FLATSCR-NEXT: v_writelane_b32 v40, s80, 17
+; FLATSCR-NEXT: v_writelane_b32 v40, s81, 18
+; FLATSCR-NEXT: v_writelane_b32 v40, s82, 19
+; FLATSCR-NEXT: v_writelane_b32 v40, s83, 20
+; FLATSCR-NEXT: v_writelane_b32 v40, s84, 21
+; FLATSCR-NEXT: v_writelane_b32 v40, s85, 22
+; FLATSCR-NEXT: v_writelane_b32 v40, s86, 23
+; FLATSCR-NEXT: v_writelane_b32 v40, s87, 24
+; FLATSCR-NEXT: v_writelane_b32 v40, s96, 25
+; FLATSCR-NEXT: v_writelane_b32 v40, s97, 26
+; FLATSCR-NEXT: v_writelane_b32 v40, s98, 27
+; FLATSCR-NEXT: v_writelane_b32 v40, s99, 28
+; FLATSCR-NEXT: v_writelane_b32 v40, s100, 29
+; FLATSCR-NEXT: v_writelane_b32 v40, s101, 30
; FLATSCR-NEXT: s_add_i32 s32, s32, 8
-; FLATSCR-NEXT: v_writelane_b32 v40, s102, 32
+; FLATSCR-NEXT: v_writelane_b32 v40, s102, 31
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ; clobber nonpreserved SGPRs and 64 CSRs
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ; clobber all VGPRs except CSR v40
; FLATSCR-NEXT: ;;#ASMEND
-; FLATSCR-NEXT: v_readlane_b32 s102, v40, 32
-; FLATSCR-NEXT: v_readlane_b32 s101, v40, 31
-; FLATSCR-NEXT: v_readlane_b32 s100, v40, 30
-; FLATSCR-NEXT: v_readlane_b32 s99, v40, 29
-; FLATSCR-NEXT: v_readlane_b32 s98, v40, 28
-; FLATSCR-NEXT: v_readlane_b32 s97, v40, 27
-; FLATSCR-NEXT: v_readlane_b32 s96, v40, 26
-; FLATSCR-NEXT: v_readlane_b32 s95, v40, 25
-; FLATSCR-NEXT: v_readlane_b32 s94, v40, 24
-; FLATSCR-NEXT: v_readlane_b32 s85, v40, 23
-; FLATSCR-NEXT: v_readlane_b32 s84, v40, 22
-; FLATSCR-NEXT: v_readlane_b32 s83, v40, 21
-; FLATSCR-NEXT: v_readlane_b32 s82, v40, 20
-; FLATSCR-NEXT: v_readlane_b32 s81, v40, 19
-; FLATSCR-NEXT: v_readlane_b32 s80, v40, 18
-; FLATSCR-NEXT: v_readlane_b32 s79, v40, 17
-; FLATSCR-NEXT: v_readlane_b32 s78, v40, 16
-; FLATSCR-NEXT: v_readlane_b32 s69, v40, 15
-; FLATSCR-NEXT: v_readlane_b32 s68, v40, 14
-; FLATSCR-NEXT: v_readlane_b32 s67, v40, 13
-; FLATSCR-NEXT: v_readlane_b32 s66, v40, 12
-; FLATSCR-NEXT: v_readlane_b32 s65, v40, 11
-; FLATSCR-NEXT: v_readlane_b32 s64, v40, 10
-; FLATSCR-NEXT: v_readlane_b32 s63, v40, 9
-; FLATSCR-NEXT: v_readlane_b32 s62, v40, 8
-; FLATSCR-NEXT: v_readlane_b32 s53, v40, 7
-; FLATSCR-NEXT: v_readlane_b32 s52, v40, 6
-; FLATSCR-NEXT: v_readlane_b32 s51, v40, 5
-; FLATSCR-NEXT: v_readlane_b32 s50, v40, 4
-; FLATSCR-NEXT: v_readlane_b32 s49, v40, 3
-; FLATSCR-NEXT: v_readlane_b32 s48, v40, 2
-; FLATSCR-NEXT: v_readlane_b32 s47, v40, 1
-; FLATSCR-NEXT: v_readlane_b32 s46, v40, 0
+; FLATSCR-NEXT: v_readlane_b32 s102, v40, 31
+; FLATSCR-NEXT: v_readlane_b32 s101, v40, 30
+; FLATSCR-NEXT: v_readlane_b32 s100, v40, 29
+; FLATSCR-NEXT: v_readlane_b32 s99, v40, 28
+; FLATSCR-NEXT: v_readlane_b32 s98, v40, 27
+; FLATSCR-NEXT: v_readlane_b32 s97, v40, 26
+; FLATSCR-NEXT: v_readlane_b32 s96, v40, 25
+; FLATSCR-NEXT: v_readlane_b32 s87, v40, 24
+; FLATSCR-NEXT: v_readlane_b32 s86, v40, 23
+; FLATSCR-NEXT: v_readlane_b32 s85, v40, 22
+; FLATSCR-NEXT: v_readlane_b32 s84, v40, 21
+; FLATSCR-NEXT: v_readlane_b32 s83, v40, 20
+; FLATSCR-NEXT: v_readlane_b32 s82, v40, 19
+; FLATSCR-NEXT: v_readlane_b32 s81, v40, 18
+; FLATSCR-NEXT: v_readlane_b32 s80, v40, 17
+; FLATSCR-NEXT: v_readlane_b32 s71, v40, 16
+; FLATSCR-NEXT: v_readlane_b32 s70, v40, 15
+; FLATSCR-NEXT: v_readlane_b32 s69, v40, 14
+; FLATSCR-NEXT: v_readlane_b32 s68, v40, 13
+; FLATSCR-NEXT: v_readlane_b32 s67, v40, 12
+; FLATSCR-NEXT: v_readlane_b32 s66, v40, 11
+; FLATSCR-NEXT: v_readlane_b32 s65, v40, 10
+; FLATSCR-NEXT: v_readlane_b32 s64, v40, 9
+; FLATSCR-NEXT: v_readlane_b32 s55, v40, 8
+; FLATSCR-NEXT: v_readlane_b32 s54, v40, 7
+; FLATSCR-NEXT: v_readlane_b32 s53, v40, 6
+; FLATSCR-NEXT: v_readlane_b32 s52, v40, 5
+; FLATSCR-NEXT: v_readlane_b32 s51, v40, 4
+; FLATSCR-NEXT: v_readlane_b32 s50, v40, 3
+; FLATSCR-NEXT: v_readlane_b32 s49, v40, 2
+; FLATSCR-NEXT: v_readlane_b32 s48, v40, 1
+; FLATSCR-NEXT: v_readlane_b32 s39, v40, 0
; FLATSCR-NEXT: s_mov_b32 s32, s33
; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1
; FLATSCR-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
@@ -1731,48 +1711,48 @@ define void @spill_fp_to_memory_scratch_reg_needed_mubuf_offset(ptr addrspace(5)
; MUBUF-LABEL: spill_fp_to_memory_scratch_reg_needed_mubuf_offset:
; MUBUF: ; %bb.0:
; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; MUBUF-NEXT: s_mov_b32 s38, s33
+; MUBUF-NEXT: s_mov_b32 s4, s33
; MUBUF-NEXT: s_mov_b32 s33, s32
-; MUBUF-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; MUBUF-NEXT: s_add_i32 s6, s33, 0x40100
-; MUBUF-NEXT: buffer_store_dword v39, off, s[0:3], s6 ; 4-byte Folded Spill
-; MUBUF-NEXT: s_mov_b64 exec, s[4:5]
-; MUBUF-NEXT: v_writelane_b32 v39, s46, 0
-; MUBUF-NEXT: v_writelane_b32 v39, s47, 1
-; MUBUF-NEXT: v_writelane_b32 v39, s48, 2
-; MUBUF-NEXT: v_writelane_b32 v39, s49, 3
-; MUBUF-NEXT: v_writelane_b32 v39, s50, 4
-; MUBUF-NEXT: v_writelane_b32 v39, s51, 5
-; MUBUF-NEXT: v_writelane_b32 v39, s52, 6
-; MUBUF-NEXT: v_writelane_b32 v39, s53, 7
-; MUBUF-NEXT: v_writelane_b32 v39, s62, 8
-; MUBUF-NEXT: v_writelane_b32 v39, s63, 9
-; MUBUF-NEXT: v_writelane_b32 v39, s64, 10
-; MUBUF-NEXT: v_writelane_b32 v39, s65, 11
-; MUBUF-NEXT: v_writelane_b32 v39, s66, 12
-; MUBUF-NEXT: v_writelane_b32 v39, s67, 13
-; MUBUF-NEXT: v_writelane_b32 v39, s68, 14
-; MUBUF-NEXT: v_writelane_b32 v39, s69, 15
-; MUBUF-NEXT: v_writelane_b32 v39, s78, 16
-; MUBUF-NEXT: v_writelane_b32 v39, s79, 17
-; MUBUF-NEXT: v_writelane_b32 v39, s80, 18
-; MUBUF-NEXT: v_writelane_b32 v39, s81, 19
-; MUBUF-NEXT: v_writelane_b32 v39, s82, 20
-; MUBUF-NEXT: v_writelane_b32 v39, s83, 21
-; MUBUF-NEXT: v_writelane_b32 v39, s84, 22
-; MUBUF-NEXT: v_writelane_b32 v39, s85, 23
-; MUBUF-NEXT: v_writelane_b32 v39, s94, 24
-; MUBUF-NEXT: v_writelane_b32 v39, s95, 25
-; MUBUF-NEXT: v_writelane_b32 v39, s96, 26
-; MUBUF-NEXT: v_writelane_b32 v39, s97, 27
-; MUBUF-NEXT: v_writelane_b32 v39, s98, 28
-; MUBUF-NEXT: v_writelane_b32 v39, s99, 29
-; MUBUF-NEXT: v_writelane_b32 v39, s100, 30
-; MUBUF-NEXT: v_writelane_b32 v39, s101, 31
+; MUBUF-NEXT: s_xor_saveexec_b64 s[6:7], -1
+; MUBUF-NEXT: s_add_i32 s5, s33, 0x40100
+; MUBUF-NEXT: buffer_store_dword v39, off, s[0:3], s5 ; 4-byte Folded Spill
+; MUBUF-NEXT: s_mov_b64 exec, s[6:7]
+; MUBUF-NEXT: v_writelane_b32 v39, s4, 32
+; MUBUF-NEXT: v_writelane_b32 v39, s39, 0
+; MUBUF-NEXT: v_writelane_b32 v39, s48, 1
+; MUBUF-NEXT: v_writelane_b32 v39, s49, 2
+; MUBUF-NEXT: v_writelane_b32 v39, s50, 3
+; MUBUF-NEXT: v_writelane_b32 v39, s51, 4
+; MUBUF-NEXT: v_writelane_b32 v39, s52, 5
+; MUBUF-NEXT: v_writelane_b32 v39, s53, 6
+; MUBUF-NEXT: v_writelane_b32 v39, s54, 7
+; MUBUF-NEXT: v_writelane_b32 v39, s55, 8
+; MUBUF-NEXT: v_writelane_b32 v39, s64, 9
+; MUBUF-NEXT: v_writelane_b32 v39, s65, 10
+; MUBUF-NEXT: v_writelane_b32 v39, s66, 11
+; MUBUF-NEXT: v_writelane_b32 v39, s67, 12
+; MUBUF-NEXT: v_writelane_b32 v39, s68, 13
+; MUBUF-NEXT: v_writelane_b32 v39, s69, 14
+; MUBUF-NEXT: v_writelane_b32 v39, s70, 15
+; MUBUF-NEXT: v_writelane_b32 v39, s71, 16
+; MUBUF-NEXT: v_writelane_b32 v39, s80, 17
+; MUBUF-NEXT: v_writelane_b32 v39, s81, 18
+; MUBUF-NEXT: v_writelane_b32 v39, s82, 19
+; MUBUF-NEXT: v_writelane_b32 v39, s83, 20
+; MUBUF-NEXT: v_writelane_b32 v39, s84, 21
+; MUBUF-NEXT: v_writelane_b32 v39, s85, 22
+; MUBUF-NEXT: v_writelane_b32 v39, s86, 23
+; MUBUF-NEXT: v_writelane_b32 v39, s87, 24
+; MUBUF-NEXT: v_writelane_b32 v39, s96, 25
+; MUBUF-NEXT: v_writelane_b32 v39, s97, 26
+; MUBUF-NEXT: v_writelane_b32 v39, s98, 27
+; MUBUF-NEXT: v_writelane_b32 v39, s99, 28
+; MUBUF-NEXT: v_writelane_b32 v39, s100, 29
+; MUBUF-NEXT: v_writelane_b32 v39, s101, 30
; MUBUF-NEXT: v_mov_b32_e32 v0, 0
; MUBUF-NEXT: v_mov_b32_e32 v1, 0x1000
; MUBUF-NEXT: s_add_i32 s32, s32, 0x40300
-; MUBUF-NEXT: v_writelane_b32 v39, s102, 32
+; MUBUF-NEXT: v_writelane_b32 v39, s102, 31
; MUBUF-NEXT: buffer_store_dword v0, v1, s[0:3], s33 offen
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: ;;#ASMSTART
@@ -1781,45 +1761,45 @@ define void @spill_fp_to_memory_scratch_reg_needed_mubuf_offset(ptr addrspace(5)
; MUBUF-NEXT: ;;#ASMSTART
; MUBUF-NEXT: ; clobber all VGPRs except CSR v40
; MUBUF-NEXT: ;;#ASMEND
-; MUBUF-NEXT: v_readlane_b32 s102, v39, 32
-; MUBUF-NEXT: v_readlane_b32 s101, v39, 31
-; MUBUF-NEXT: v_readlane_b32 s100, v39, 30
-; MUBUF-NEXT: v_readlane_b32 s99, v39, 29
-; MUBUF-NEXT: v_readlane_b32 s98, v39, 28
-; MUBUF-NEXT: v_readlane_b32 s97, v39, 27
-; MUBUF-NEXT: v_readlane_b32 s96, v39, 26
-; MUBUF-NEXT: v_readlane_b32 s95, v39, 25
-; MUBUF-NEXT: v_readlane_b32 s94, v39, 24
-; MUBUF-NEXT: v_readlane_b32 s85, v39, 23
-; MUBUF-NEXT: v_readlane_b32 s84, v39, 22
-; MUBUF-NEXT: v_readlane_b32 s83, v39, 21
-; MUBUF-NEXT: v_readlane_b32 s82, v39, 20
-; MUBUF-NEXT: v_readlane_b32 s81, v39, 19
-; MUBUF-NEXT: v_readlane_b32 s80, v39, 18
-; MUBUF-NEXT: v_readlane_b32 s79, v39, 17
-; MUBUF-NEXT: v_readlane_b32 s78, v39, 16
-; MUBUF-NEXT: v_readlane_b32 s69, v39, 15
-; MUBUF-NEXT: v_readlane_b32 s68, v39, 14
-; MUBUF-NEXT: v_readlane_b32 s67, v39, 13
-; MUBUF-NEXT: v_readlane_b32 s66, v39, 12
-; MUBUF-NEXT: v_readlane_b32 s65, v39, 11
-; MUBUF-NEXT: v_readlane_b32 s64, v39, 10
-; MUBUF-NEXT: v_readlane_b32 s63, v39, 9
-; MUBUF-NEXT: v_readlane_b32 s62, v39, 8
-; MUBUF-NEXT: v_readlane_b32 s53, v39, 7
-; MUBUF-NEXT: v_readlane_b32 s52, v39, 6
-; MUBUF-NEXT: v_readlane_b32 s51, v39, 5
-; MUBUF-NEXT: v_readlane_b32 s50, v39, 4
-; MUBUF-NEXT: v_readlane_b32 s49, v39, 3
-; MUBUF-NEXT: v_readlane_b32 s48, v39, 2
-; MUBUF-NEXT: v_readlane_b32 s47, v39, 1
-; MUBUF-NEXT: v_readlane_b32 s46, v39, 0
+; MUBUF-NEXT: v_readlane_b32 s102, v39, 31
+; MUBUF-NEXT: v_readlane_b32 s101, v39, 30
+; MUBUF-NEXT: v_readlane_b32 s100, v39, 29
+; MUBUF-NEXT: v_readlane_b32 s99, v39, 28
+; MUBUF-NEXT: v_readlane_b32 s98, v39, 27
+; MUBUF-NEXT: v_readlane_b32 s97, v39, 26
+; MUBUF-NEXT: v_readlane_b32 s96, v39, 25
+; MUBUF-NEXT: v_readlane_b32 s87, v39, 24
+; MUBUF-NEXT: v_readlane_b32 s86, v39, 23
+; MUBUF-NEXT: v_readlane_b32 s85, v39, 22
+; MUBUF-NEXT: v_readlane_b32 s84, v39, 21
+; MUBUF-NEXT: v_readlane_b32 s83, v39, 20
+; MUBUF-NEXT: v_readlane_b32 s82, v39, 19
+; MUBUF-NEXT: v_readlane_b32 s81, v39, 18
+; MUBUF-NEXT: v_readlane_b32 s80, v39, 17
+; MUBUF-NEXT: v_readlane_b32 s71, v39, 16
+; MUBUF-NEXT: v_readlane_b32 s70, v39, 15
+; MUBUF-NEXT: v_readlane_b32 s69, v39, 14
+; MUBUF-NEXT: v_readlane_b32 s68, v39, 13
+; MUBUF-NEXT: v_readlane_b32 s67, v39, 12
+; MUBUF-NEXT: v_readlane_b32 s66, v39, 11
+; MUBUF-NEXT: v_readlane_b32 s65, v39, 10
+; MUBUF-NEXT: v_readlane_b32 s64, v39, 9
+; MUBUF-NEXT: v_readlane_b32 s55, v39, 8
+; MUBUF-NEXT: v_readlane_b32 s54, v39, 7
+; MUBUF-NEXT: v_readlane_b32 s53, v39, 6
+; MUBUF-NEXT: v_readlane_b32 s52, v39, 5
+; MUBUF-NEXT: v_readlane_b32 s51, v39, 4
+; MUBUF-NEXT: v_readlane_b32 s50, v39, 3
+; MUBUF-NEXT: v_readlane_b32 s49, v39, 2
+; MUBUF-NEXT: v_readlane_b32 s48, v39, 1
+; MUBUF-NEXT: v_readlane_b32 s39, v39, 0
; MUBUF-NEXT: s_mov_b32 s32, s33
-; MUBUF-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; MUBUF-NEXT: s_add_i32 s6, s33, 0x40100
-; MUBUF-NEXT: buffer_load_dword v39, off, s[0:3], s6 ; 4-byte Folded Reload
-; MUBUF-NEXT: s_mov_b64 exec, s[4:5]
-; MUBUF-NEXT: s_mov_b32 s33, s38
+; MUBUF-NEXT: v_readlane_b32 s4, v39, 32
+; MUBUF-NEXT: s_xor_saveexec_b64 s[6:7], -1
+; MUBUF-NEXT: s_add_i32 s5, s33, 0x40100
+; MUBUF-NEXT: buffer_load_dword v39, off, s[0:3], s5 ; 4-byte Folded Reload
+; MUBUF-NEXT: s_mov_b64 exec, s[6:7]
+; MUBUF-NEXT: s_mov_b32 s33, s4
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: s_setpc_b64 s[30:31]
;
@@ -1832,42 +1812,41 @@ define void @spill_fp_to_memory_scratch_reg_needed_mubuf_offset(ptr addrspace(5)
; FLATSCR-NEXT: s_add_i32 s1, s33, 0x1004
; FLATSCR-NEXT: scratch_store_dword off, v39, s1 ; 4-byte Folded Spill
; FLATSCR-NEXT: s_mov_b64 exec, s[2:3]
-; FLATSCR-NEXT: v_writelane_b32 v39, s46, 0
-; FLATSCR-NEXT: v_writelane_b32 v39, s47, 1
-; FLATSCR-NEXT: v_writelane_b32 v39, s48, 2
-; FLATSCR-NEXT: v_writelane_b32 v39, s49, 3
-; FLATSCR-NEXT: v_writelane_b32 v39, s50, 4
-; FLATSCR-NEXT: v_writelane_b32 v39, s51, 5
-; FLATSCR-NEXT: v_writelane_b32 v39, s52, 6
-; FLATSCR-NEXT: v_writelane_b32 v39, s53, 7
-; FLATSCR-NEXT: v_writelane_b32 v39, s62, 8
-; FLATSCR-NEXT: v_writelane_b32 v39, s63, 9
-; FLATSCR-NEXT: v_writelane_b32 v39, s64, 10
-; FLATSCR-NEXT: v_writelane_b32 v39, s65, 11
-; FLATSCR-NEXT: v_writelane_b32 v39, s66, 12
-; FLATSCR-NEXT: v_writelane_b32 v39, s67, 13
-; FLATSCR-NEXT: v_writelane_b32 v39, s68, 14
-; FLATSCR-NEXT: v_writelane_b32 v39, s69, 15
-; FLATSCR-NEXT: v_writelane_b32 v39, s78, 16
-; FLATSCR-NEXT: v_writelane_b32 v39, s79, 17
-; FLATSCR-NEXT: v_writelane_b32 v39, s80, 18
-; FLATSCR-NEXT: v_writelane_b32 v39, s81, 19
-; FLATSCR-NEXT: v_writelane_b32 v39, s82, 20
-; FLATSCR-NEXT: v_writelane_b32 v39, s83, 21
-; FLATSCR-NEXT: v_writelane_b32 v39, s84, 22
-; FLATSCR-NEXT: v_writelane_b32 v39, s85, 23
-; FLATSCR-NEXT: v_writelane_b32 v39, s94, 24
-; FLATSCR-NEXT: v_writelane_b32 v39, s95, 25
-; FLATSCR-NEXT: v_writelane_b32 v39, s96, 26
-; FLATSCR-NEXT: v_writelane_b32 v39, s97, 27
-; FLATSCR-NEXT: v_writelane_b32 v39, s98, 28
-; FLATSCR-NEXT: v_writelane_b32 v39, s99, 29
+; FLATSCR-NEXT: v_writelane_b32 v39, s39, 0
+; FLATSCR-NEXT: v_writelane_b32 v39, s48, 1
+; FLATSCR-NEXT: v_writelane_b32 v39, s49, 2
+; FLATSCR-NEXT: v_writelane_b32 v39, s50, 3
+; FLATSCR-NEXT: v_writelane_b32 v39, s51, 4
+; FLATSCR-NEXT: v_writelane_b32 v39, s52, 5
+; FLATSCR-NEXT: v_writelane_b32 v39, s53, 6
+; FLATSCR-NEXT: v_writelane_b32 v39, s54, 7
+; FLATSCR-NEXT: v_writelane_b32 v39, s55, 8
+; FLATSCR-NEXT: v_writelane_b32 v39, s64, 9
+; FLATSCR-NEXT: v_writelane_b32 v39, s65, 10
+; FLATSCR-NEXT: v_writelane_b32 v39, s66, 11
+; FLATSCR-NEXT: v_writelane_b32 v39, s67, 12
+; FLATSCR-NEXT: v_writelane_b32 v39, s68, 13
+; FLATSCR-NEXT: v_writelane_b32 v39, s69, 14
+; FLATSCR-NEXT: v_writelane_b32 v39, s70, 15
+; FLATSCR-NEXT: v_writelane_b32 v39, s71, 16
+; FLATSCR-NEXT: v_writelane_b32 v39, s80, 17
+; FLATSCR-NEXT: v_writelane_b32 v39, s81, 18
+; FLATSCR-NEXT: v_writelane_b32 v39, s82, 19
+; FLATSCR-NEXT: v_writelane_b32 v39, s83, 20
+; FLATSCR-NEXT: v_writelane_b32 v39, s84, 21
+; FLATSCR-NEXT: v_writelane_b32 v39, s85, 22
+; FLATSCR-NEXT: v_writelane_b32 v39, s86, 23
+; FLATSCR-NEXT: v_writelane_b32 v39, s87, 24
+; FLATSCR-NEXT: v_writelane_b32 v39, s96, 25
+; FLATSCR-NEXT: v_writelane_b32 v39, s97, 26
+; FLATSCR-NEXT: v_writelane_b32 v39, s98, 27
+; FLATSCR-NEXT: v_writelane_b32 v39, s99, 28
; FLATSCR-NEXT: s_addk_i32 s32, 0x100c
-; FLATSCR-NEXT: v_writelane_b32 v39, s100, 30
-; FLATSCR-NEXT: v_writelane_b32 v39, s101, 31
+; FLATSCR-NEXT: v_writelane_b32 v39, s100, 29
+; FLATSCR-NEXT: v_writelane_b32 v39, s101, 30
; FLATSCR-NEXT: v_mov_b32_e32 v0, 0
; FLATSCR-NEXT: s_add_i32 s1, s33, 0x1000
-; FLATSCR-NEXT: v_writelane_b32 v39, s102, 32
+; FLATSCR-NEXT: v_writelane_b32 v39, s102, 31
; FLATSCR-NEXT: scratch_store_dword off, v0, s1
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: ;;#ASMSTART
@@ -1876,39 +1855,38 @@ define void @spill_fp_to_memory_scratch_reg_needed_mubuf_offset(ptr addrspace(5)
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ; clobber all VGPRs except CSR v40
; FLATSCR-NEXT: ;;#ASMEND
-; FLATSCR-NEXT: v_readlane_b32 s102, v39, 32
-; FLATSCR-NEXT: v_readlane_b32 s101, v39, 31
-; FLATSCR-NEXT: v_readlane_b32 s100, v39, 30
-; FLATSCR-NEXT: v_readlane_b32 s99, v39, 29
-; FLATSCR-NEXT: v_readlane_b32 s98, v39, 28
-; FLATSCR-NEXT: v_readlane_b32 s97, v39, 27
-; FLATSCR-NEXT: v_readlane_b32 s96, v39, 26
-; FLATSCR-NEXT: v_readlane_b32 s95, v39, 25
-; FLATSCR-NEXT: v_readlane_b32 s94, v39, 24
-; FLATSCR-NEXT: v_readlane_b32 s85, v39, 23
-; FLATSCR-NEXT: v_readlane_b32 s84, v39, 22
-; FLATSCR-NEXT: v_readlane_b32 s83, v39, 21
-; FLATSCR-NEXT: v_readlane_b32 s82, v39, 20
-; FLATSCR-NEXT: v_readlane_b32 s81, v39, 19
-; FLATSCR-NEXT: v_readlane_b32 s80, v39, 18
-; FLATSCR-NEXT: v_readlane_b32 s79, v39, 17
-; FLATSCR-NEXT: v_readlane_b32 s78, v39, 16
-; FLATSCR-NEXT: v_readlane_b32 s69, v39, 15
-; FLATSCR-NEXT: v_readlane_b32 s68, v39, 14
-; FLATSCR-NEXT: v_readlane_b32 s67, v39, 13
-; FLATSCR-NEXT: v_readlane_b32 s66, v39, 12
-; FLATSCR-NEXT: v_readlane_b32 s65, v39, 11
-; FLATSCR-NEXT: v_readlane_b32 s64, v39, 10
-; FLATSCR-NEXT: v_readlane_b32 s63, v39, 9
-; FLATSCR-NEXT: v_readlane_b32 s62, v39, 8
-; FLATSCR-NEXT: v_readlane_b32 s53, v39, 7
-; FLATSCR-NEXT: v_readlane_b32 s52, v39, 6
-; FLATSCR-NEXT: v_readlane_b32 s51, v39, 5
-; FLATSCR-NEXT: v_readlane_b32 s50, v39, 4
-; FLATSCR-NEXT: v_readlane_b32 s49, v39, 3
-; FLATSCR-NEXT: v_readlane_b32 s48, v39, 2
-; FLATSCR-NEXT: v_readlane_b32 s47, v39, 1
-; FLATSCR-NEXT: v_readlane_b32 s46, v39, 0
+; FLATSCR-NEXT: v_readlane_b32 s102, v39, 31
+; FLATSCR-NEXT: v_readlane_b32 s101, v39, 30
+; FLATSCR-NEXT: v_readlane_b32 s100, v39, 29
+; FLATSCR-NEXT: v_readlane_b32 s99, v39, 28
+; FLATSCR-NEXT: v_readlane_b32 s98, v39, 27
+; FLATSCR-NEXT: v_readlane_b32 s97, v39, 26
+; FLATSCR-NEXT: v_readlane_b32 s96, v39, 25
+; FLATSCR-NEXT: v_readlane_b32 s87, v39, 24
+; FLATSCR-NEXT: v_readlane_b32 s86, v39, 23
+; FLATSCR-NEXT: v_readlane_b32 s85, v39, 22
+; FLATSCR-NEXT: v_readlane_b32 s84, v39, 21
+; FLATSCR-NEXT: v_readlane_b32 s83, v39, 20
+; FLATSCR-NEXT: v_readlane_b32 s82, v39, 19
+; FLATSCR-NEXT: v_readlane_b32 s81, v39, 18
+; FLATSCR-NEXT: v_readlane_b32 s80, v39, 17
+; FLATSCR-NEXT: v_readlane_b32 s71, v39, 16
+; FLATSCR-NEXT: v_readlane_b32 s70, v39, 15
+; FLATSCR-NEXT: v_readlane_b32 s69, v39, 14
+; FLATSCR-NEXT: v_readlane_b32 s68, v39, 13
+; FLATSCR-NEXT: v_readlane_b32 s67, v39, 12
+; FLATSCR-NEXT: v_readlane_b32 s66, v39, 11
+; FLATSCR-NEXT: v_readlane_b32 s65, v39, 10
+; FLATSCR-NEXT: v_readlane_b32 s64, v39, 9
+; FLATSCR-NEXT: v_readlane_b32 s55, v39, 8
+; FLATSCR-NEXT: v_readlane_b32 s54, v39, 7
+; FLATSCR-NEXT: v_readlane_b32 s53, v39, 6
+; FLATSCR-NEXT: v_readlane_b32 s52, v39, 5
+; FLATSCR-NEXT: v_readlane_b32 s51, v39, 4
+; FLATSCR-NEXT: v_readlane_b32 s50, v39, 3
+; FLATSCR-NEXT: v_readlane_b32 s49, v39, 2
+; FLATSCR-NEXT: v_readlane_b32 s48, v39, 1
+; FLATSCR-NEXT: v_readlane_b32 s39, v39, 0
; FLATSCR-NEXT: s_mov_b32 s32, s33
; FLATSCR-NEXT: s_xor_saveexec_b64 s[2:3], -1
; FLATSCR-NEXT: s_add_i32 s1, s33, 0x1004
diff --git a/llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir b/llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir
index 0a3bf35427e24..6504f48333485 100644
--- a/llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir
+++ b/llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir
@@ -14,13 +14,7 @@ body: |
; CHECK-LABEL: name: def_csr_sgpr
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: liveins: $sgpr46, $sgpr47
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
- ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
- ; CHECK-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr46, 0, $vgpr0
- ; CHECK-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr47, 1, $vgpr0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
diff --git a/llvm/test/CodeGen/AMDGPU/ds_read2.ll b/llvm/test/CodeGen/AMDGPU/ds_read2.ll
index 4c2e3f426d29f..9b91a3dc9b6e4 100644
--- a/llvm/test/CodeGen/AMDGPU/ds_read2.ll
+++ b/llvm/test/CodeGen/AMDGPU/ds_read2.ll
@@ -1321,19 +1321,19 @@ bb:
define amdgpu_kernel void @ds_read_call_read(ptr addrspace(1) %out, ptr addrspace(3) %arg) {
; CI-LABEL: ds_read_call_read:
; CI: ; %bb.0:
-; CI-NEXT: s_getpc_b64 s[64:65]
-; CI-NEXT: s_mov_b32 s64, s0
-; CI-NEXT: s_load_dwordx4 s[64:67], s[64:65], 0x0
+; CI-NEXT: s_getpc_b64 s[48:49]
+; CI-NEXT: s_mov_b32 s48, s0
+; CI-NEXT: s_load_dwordx4 s[48:51], s[48:49], 0x0
; CI-NEXT: s_mov_b32 s14, s10
; CI-NEXT: v_lshlrev_b32_e32 v3, 2, v0
; CI-NEXT: s_mov_b32 m0, -1
; CI-NEXT: s_mov_b32 s12, s8
; CI-NEXT: s_waitcnt lgkmcnt(0)
-; CI-NEXT: s_add_u32 s64, s64, s11
+; CI-NEXT: s_add_u32 s48, s48, s11
; CI-NEXT: s_mov_b64 s[10:11], s[6:7]
-; CI-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x0
+; CI-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x0
; CI-NEXT: s_load_dword s6, s[4:5], 0x2
-; CI-NEXT: s_addc_u32 s65, s65, 0
+; CI-NEXT: s_addc_u32 s49, s49, 0
; CI-NEXT: s_add_u32 s8, s4, 12
; CI-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; CI-NEXT: s_mov_b32 s13, s9
@@ -1345,36 +1345,36 @@ define amdgpu_kernel void @ds_read_call_read(ptr addrspace(1) %out, ptr addrspac
; CI-NEXT: v_or_b32_e32 v0, v0, v1
; CI-NEXT: s_mov_b64 s[4:5], s[0:1]
; CI-NEXT: s_mov_b64 s[6:7], s[2:3]
-; CI-NEXT: s_mov_b64 s[0:1], s[64:65]
+; CI-NEXT: s_mov_b64 s[0:1], s[48:49]
; CI-NEXT: s_mov_b32 s17, void_func_void at abs32@hi
; CI-NEXT: s_mov_b32 s16, void_func_void at abs32@lo
; CI-NEXT: v_or_b32_e32 v31, v0, v2
-; CI-NEXT: s_mov_b64 s[2:3], s[66:67]
+; CI-NEXT: s_mov_b64 s[2:3], s[50:51]
; CI-NEXT: s_mov_b32 s32, 0
-; CI-NEXT: s_mov_b32 s51, 0xf000
-; CI-NEXT: s_mov_b32 s50, -1
+; CI-NEXT: s_mov_b32 s39, 0xf000
+; CI-NEXT: s_mov_b32 s38, -1
; CI-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CI-NEXT: ds_read_b32 v0, v40 offset:4
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: v_add_i32_e32 v0, vcc, v41, v0
-; CI-NEXT: buffer_store_dword v0, off, s[48:51], 0
+; CI-NEXT: buffer_store_dword v0, off, s[36:39], 0
; CI-NEXT: s_endpgm
;
; GFX9-LABEL: ds_read_call_read:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_getpc_b64 s[48:49]
-; GFX9-NEXT: s_mov_b32 s48, s0
-; GFX9-NEXT: s_load_dwordx4 s[48:51], s[48:49], 0x0
+; GFX9-NEXT: s_getpc_b64 s[36:37]
+; GFX9-NEXT: s_mov_b32 s36, s0
+; GFX9-NEXT: s_load_dwordx4 s[36:39], s[36:37], 0x0
; GFX9-NEXT: s_mov_b32 s14, s10
; GFX9-NEXT: s_mov_b32 s12, s8
; GFX9-NEXT: s_mov_b32 s13, s9
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_add_u32 s48, s48, s11
+; GFX9-NEXT: s_add_u32 s36, s36, s11
; GFX9-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX9-NEXT: s_load_dword s6, s[4:5], 0x8
; GFX9-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x0
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_add_u32 s8, s4, 12
; GFX9-NEXT: s_addc_u32 s9, s5, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
@@ -1383,11 +1383,11 @@ define amdgpu_kernel void @ds_read_call_read(ptr addrspace(1) %out, ptr addrspac
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: s_mov_b32 s17, void_func_void at abs32@hi
; GFX9-NEXT: s_mov_b32 s16, void_func_void at abs32@lo
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: v_mov_b32_e32 v40, 0
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
diff --git a/llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll b/llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll
index f671ea5f10cd8..40cdfd76d6af6 100644
--- a/llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll
+++ b/llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll
@@ -28,8 +28,8 @@ define weak_odr void @test(i32 %0) !dbg !34 {
; CHECK-NEXT: v_writelane_b32 v41, s35, 3
; CHECK-NEXT: v_writelane_b32 v41, s36, 4
; CHECK-NEXT: v_writelane_b32 v41, s37, 5
-; CHECK-NEXT: v_writelane_b32 v41, s46, 6
-; CHECK-NEXT: v_writelane_b32 v41, s47, 7
+; CHECK-NEXT: v_writelane_b32 v41, s38, 6
+; CHECK-NEXT: v_writelane_b32 v41, s39, 7
; CHECK-NEXT: v_writelane_b32 v41, s48, 8
; CHECK-NEXT: v_writelane_b32 v41, s49, 9
; CHECK-NEXT: v_writelane_b32 v41, s50, 10
@@ -37,7 +37,7 @@ define weak_odr void @test(i32 %0) !dbg !34 {
; CHECK-NEXT: v_writelane_b32 v41, s52, 12
; CHECK-NEXT: s_addk_i32 s32, 0x400
; CHECK-NEXT: v_writelane_b32 v41, s53, 13
-; CHECK-NEXT: v_writelane_b32 v41, s62, 14
+; CHECK-NEXT: v_writelane_b32 v41, s54, 14
; CHECK-NEXT: s_mov_b64 s[48:49], s[4:5]
; CHECK-NEXT: ;DEBUG_VALUE: dummy:dummy <- undef
; CHECK-NEXT: .Ltmp0:
@@ -45,8 +45,8 @@ define weak_odr void @test(i32 %0) !dbg !34 {
; CHECK-NEXT: s_getpc_b64 s[4:5]
; CHECK-NEXT: s_add_u32 s4, s4, __kmpc_alloc_shared at gotpcrel32@lo+4
; CHECK-NEXT: s_addc_u32 s5, s5, __kmpc_alloc_shared at gotpcrel32@hi+12
-; CHECK-NEXT: v_writelane_b32 v41, s63, 15
-; CHECK-NEXT: s_load_dwordx2 s[62:63], s[4:5], 0x0
+; CHECK-NEXT: v_writelane_b32 v41, s55, 15
+; CHECK-NEXT: s_load_dwordx2 s[54:55], s[4:5], 0x0
; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
; CHECK-NEXT: v_mov_b32_e32 v40, v31
@@ -56,11 +56,11 @@ define weak_odr void @test(i32 %0) !dbg !34 {
; CHECK-NEXT: s_mov_b32 s53, s12
; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11]
; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9]
-; CHECK-NEXT: s_mov_b64 s[46:47], s[6:7]
+; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
-; CHECK-NEXT: s_swappc_b64 s[30:31], s[62:63]
+; CHECK-NEXT: s_swappc_b64 s[30:31], s[54:55]
; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
-; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47]
+; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37]
; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
; CHECK-NEXT: s_mov_b32 s12, s53
@@ -68,23 +68,23 @@ define weak_odr void @test(i32 %0) !dbg !34 {
; CHECK-NEXT: s_mov_b32 s14, s51
; CHECK-NEXT: s_mov_b32 s15, s50
; CHECK-NEXT: v_mov_b32_e32 v31, v40
-; CHECK-NEXT: s_swappc_b64 s[30:31], s[62:63]
+; CHECK-NEXT: s_swappc_b64 s[30:31], s[54:55]
; CHECK-NEXT: .Ltmp1:
; CHECK-NEXT: ;DEBUG_VALUE: dummy:dummy <- [$vgpr0_vgpr1+0]
; CHECK-NEXT: .loc 1 0 9 is_stmt 0 ; dummy:0:9
; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
; CHECK-NEXT: v_mov_b32_e32 v2, 0
; CHECK-NEXT: flat_store_dword v[0:1], v2
-; CHECK-NEXT: v_readlane_b32 s63, v41, 15
-; CHECK-NEXT: v_readlane_b32 s62, v41, 14
+; CHECK-NEXT: v_readlane_b32 s55, v41, 15
+; CHECK-NEXT: v_readlane_b32 s54, v41, 14
; CHECK-NEXT: v_readlane_b32 s53, v41, 13
; CHECK-NEXT: v_readlane_b32 s52, v41, 12
; CHECK-NEXT: v_readlane_b32 s51, v41, 11
; CHECK-NEXT: v_readlane_b32 s50, v41, 10
; CHECK-NEXT: v_readlane_b32 s49, v41, 9
; CHECK-NEXT: v_readlane_b32 s48, v41, 8
-; CHECK-NEXT: v_readlane_b32 s47, v41, 7
-; CHECK-NEXT: v_readlane_b32 s46, v41, 6
+; CHECK-NEXT: v_readlane_b32 s39, v41, 7
+; CHECK-NEXT: v_readlane_b32 s38, v41, 6
; CHECK-NEXT: v_readlane_b32 s37, v41, 5
; CHECK-NEXT: v_readlane_b32 s36, v41, 4
; CHECK-NEXT: v_readlane_b32 s35, v41, 3
diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-mov-b32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-mov-b32.mir
index 4dd03a17f7caa..7f370b2cca658 100644
--- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-mov-b32.mir
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-mov-b32.mir
@@ -142,8 +142,8 @@ body: |
; GFX1100-NEXT: renamable $sgpr20 = S_MOV_B32 killed $sgpr22
; GFX1100-NEXT: undef renamable $sgpr22 = COPY killed undef renamable $sgpr22, implicit-def $sgpr22_sgpr23
; GFX1100-NEXT: undef renamable $sgpr26 = COPY killed undef renamable $sgpr26, implicit-def $sgpr26_sgpr27
- ; GFX1100-NEXT: $sgpr38 = S_ADD_I32 $sgpr32, 8, implicit-def $scc
- ; GFX1100-NEXT: renamable $sgpr31 = S_MOV_B32 killed $sgpr38
+ ; GFX1100-NEXT: $sgpr40 = S_ADD_I32 $sgpr32, 8, implicit-def $scc
+ ; GFX1100-NEXT: renamable $sgpr31 = S_MOV_B32 killed $sgpr40
; GFX1100-NEXT: renamable $vgpr3 = COPY killed renamable $sgpr30, implicit $exec
; GFX1100-NEXT: renamable $vgpr0_vgpr1 = COPY renamable $sgpr28_sgpr29, implicit $exec
; GFX1100-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $vcc, implicit $exec
@@ -167,8 +167,8 @@ body: |
; GFX1200-NEXT: renamable $sgpr20 = S_MOV_B32 killed $sgpr22
; GFX1200-NEXT: undef renamable $sgpr22 = COPY killed undef renamable $sgpr22, implicit-def $sgpr22_sgpr23
; GFX1200-NEXT: undef renamable $sgpr26 = COPY killed undef renamable $sgpr26, implicit-def $sgpr26_sgpr27
- ; GFX1200-NEXT: $sgpr38 = S_ADD_I32 $sgpr32, 8, implicit-def $scc
- ; GFX1200-NEXT: renamable $sgpr31 = S_MOV_B32 killed $sgpr38
+ ; GFX1200-NEXT: $sgpr40 = S_ADD_I32 $sgpr32, 8, implicit-def $scc
+ ; GFX1200-NEXT: renamable $sgpr31 = S_MOV_B32 killed $sgpr40
; GFX1200-NEXT: renamable $vgpr3 = COPY killed renamable $sgpr30, implicit $exec
; GFX1200-NEXT: renamable $vgpr0_vgpr1 = COPY renamable $sgpr28_sgpr29, implicit $exec
; GFX1200-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $vcc, implicit $exec
@@ -706,7 +706,7 @@ body: |
; GFX8-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
; GFX8-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
; GFX8-NEXT: $sgpr4 = S_MOV_B32 24
- ; GFX8-NEXT: $vgpr0, dead $sgpr70_sgpr71 = V_ADD_CO_U32_e64 killed $sgpr4, killed $vgpr0, 0, implicit $exec
+ ; GFX8-NEXT: $vgpr0, dead $sgpr72_sgpr73 = V_ADD_CO_U32_e64 killed $sgpr4, killed $vgpr0, 0, implicit $exec
; GFX8-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
; GFX8-NEXT: S_NOP 0, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX8-NEXT: S_NOP 0, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
@@ -809,10 +809,10 @@ body: |
; GFX1100-NEXT: S_NOP 0, implicit-def $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63
; GFX1100-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc_lo, implicit $exec
; GFX1100-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
- ; GFX1100-NEXT: $sgpr70 = S_ADDC_U32 $sgpr32, 24, implicit-def $scc, implicit $scc
- ; GFX1100-NEXT: S_BITCMP1_B32 $sgpr70, 0, implicit-def $scc
- ; GFX1100-NEXT: $sgpr70 = S_BITSET0_B32 0, $sgpr70
- ; GFX1100-NEXT: renamable $sgpr4 = S_MOV_B32 killed $sgpr70
+ ; GFX1100-NEXT: $sgpr72 = S_ADDC_U32 $sgpr32, 24, implicit-def $scc, implicit $scc
+ ; GFX1100-NEXT: S_BITCMP1_B32 $sgpr72, 0, implicit-def $scc
+ ; GFX1100-NEXT: $sgpr72 = S_BITSET0_B32 0, $sgpr72
+ ; GFX1100-NEXT: renamable $sgpr4 = S_MOV_B32 killed $sgpr72
; GFX1100-NEXT: S_NOP 0, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX1100-NEXT: S_NOP 0, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX1100-NEXT: S_NOP 0, implicit $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23
@@ -836,10 +836,10 @@ body: |
; GFX1200-NEXT: S_NOP 0, implicit-def $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63
; GFX1200-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc_lo, implicit $exec
; GFX1200-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
- ; GFX1200-NEXT: $sgpr70 = S_ADDC_U32 $sgpr32, 24, implicit-def $scc, implicit $scc
- ; GFX1200-NEXT: S_BITCMP1_B32 $sgpr70, 0, implicit-def $scc
- ; GFX1200-NEXT: $sgpr70 = S_BITSET0_B32 0, $sgpr70
- ; GFX1200-NEXT: renamable $sgpr4 = S_MOV_B32 killed $sgpr70
+ ; GFX1200-NEXT: $sgpr72 = S_ADDC_U32 $sgpr32, 24, implicit-def $scc, implicit $scc
+ ; GFX1200-NEXT: S_BITCMP1_B32 $sgpr72, 0, implicit-def $scc
+ ; GFX1200-NEXT: $sgpr72 = S_BITSET0_B32 0, $sgpr72
+ ; GFX1200-NEXT: renamable $sgpr4 = S_MOV_B32 killed $sgpr72
; GFX1200-NEXT: S_NOP 0, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX1200-NEXT: S_NOP 0, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX1200-NEXT: S_NOP 0, implicit $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23
@@ -901,7 +901,7 @@ body: |
; GFX8-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
; GFX8-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
; GFX8-NEXT: $sgpr4 = S_MOV_B32 68
- ; GFX8-NEXT: $vgpr0, dead $sgpr70_sgpr71 = V_ADD_CO_U32_e64 killed $sgpr4, killed $vgpr0, 0, implicit $exec
+ ; GFX8-NEXT: $vgpr0, dead $sgpr72_sgpr73 = V_ADD_CO_U32_e64 killed $sgpr4, killed $vgpr0, 0, implicit $exec
; GFX8-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
; GFX8-NEXT: S_NOP 0, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX8-NEXT: S_NOP 0, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
@@ -1004,10 +1004,10 @@ body: |
; GFX1100-NEXT: S_NOP 0, implicit-def $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63
; GFX1100-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc_lo, implicit $exec
; GFX1100-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
- ; GFX1100-NEXT: $sgpr70 = S_ADDC_U32 $sgpr32, 68, implicit-def $scc, implicit $scc
- ; GFX1100-NEXT: S_BITCMP1_B32 $sgpr70, 0, implicit-def $scc
- ; GFX1100-NEXT: $sgpr70 = S_BITSET0_B32 0, $sgpr70
- ; GFX1100-NEXT: renamable $sgpr4 = S_MOV_B32 killed $sgpr70
+ ; GFX1100-NEXT: $sgpr72 = S_ADDC_U32 $sgpr32, 68, implicit-def $scc, implicit $scc
+ ; GFX1100-NEXT: S_BITCMP1_B32 $sgpr72, 0, implicit-def $scc
+ ; GFX1100-NEXT: $sgpr72 = S_BITSET0_B32 0, $sgpr72
+ ; GFX1100-NEXT: renamable $sgpr4 = S_MOV_B32 killed $sgpr72
; GFX1100-NEXT: S_NOP 0, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX1100-NEXT: S_NOP 0, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX1100-NEXT: S_NOP 0, implicit $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23
@@ -1031,10 +1031,10 @@ body: |
; GFX1200-NEXT: S_NOP 0, implicit-def $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63
; GFX1200-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc_lo, implicit $exec
; GFX1200-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
- ; GFX1200-NEXT: $sgpr70 = S_ADDC_U32 $sgpr32, 68, implicit-def $scc, implicit $scc
- ; GFX1200-NEXT: S_BITCMP1_B32 $sgpr70, 0, implicit-def $scc
- ; GFX1200-NEXT: $sgpr70 = S_BITSET0_B32 0, $sgpr70
- ; GFX1200-NEXT: renamable $sgpr4 = S_MOV_B32 killed $sgpr70
+ ; GFX1200-NEXT: $sgpr72 = S_ADDC_U32 $sgpr32, 68, implicit-def $scc, implicit $scc
+ ; GFX1200-NEXT: S_BITCMP1_B32 $sgpr72, 0, implicit-def $scc
+ ; GFX1200-NEXT: $sgpr72 = S_BITSET0_B32 0, $sgpr72
+ ; GFX1200-NEXT: renamable $sgpr4 = S_MOV_B32 killed $sgpr72
; GFX1200-NEXT: S_NOP 0, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX1200-NEXT: S_NOP 0, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX1200-NEXT: S_NOP 0, implicit $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23
diff --git a/llvm/test/CodeGen/AMDGPU/function-args-inreg.ll b/llvm/test/CodeGen/AMDGPU/function-args-inreg.ll
index ff2fb986e7828..dc20ae3765069 100644
--- a/llvm/test/CodeGen/AMDGPU/function-args-inreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/function-args-inreg.ll
@@ -2060,9 +2060,9 @@ define void @void_func_a13i32_inreg([13 x i32] inreg %arg0, ptr addrspace(1) %p
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_mov_b32 s29, s33
; GFX9-NEXT: s_mov_b32 s33, s32
-; GFX9-NEXT: s_or_saveexec_b64 s[38:39], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[40:41], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[38:39]
+; GFX9-NEXT: s_mov_b64 exec, s[40:41]
; GFX9-NEXT: v_mov_b32_e32 v2, s28
; GFX9-NEXT: global_store_dword v[0:1], v2, off offset:48
; GFX9-NEXT: v_mov_b32_e32 v5, s27
diff --git a/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll b/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll
index 3e84aa37fbcaa..512d58d3f996d 100644
--- a/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll
+++ b/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll
@@ -202,18 +202,18 @@ define void @indirect_use_50_vgpr() #0 {
}
; GCN-LABEL: {{^}}use_80_sgpr:
-; GCN: .set use_80_sgpr.num_vgpr, 1
+; GCN: .set use_80_sgpr.num_vgpr, 0
; GCN: .set use_80_sgpr.num_agpr, 0
; GCN: .set use_80_sgpr.numbered_sgpr, 80
-; GCN: .set use_80_sgpr.private_seg_size, 8
+; GCN: .set use_80_sgpr.private_seg_size, 0
; GCN: .set use_80_sgpr.uses_vcc, 0
; GCN: .set use_80_sgpr.uses_flat_scratch, 0
; GCN: .set use_80_sgpr.has_dyn_sized_stack, 0
; GCN: .set use_80_sgpr.has_recursion, 0
; GCN: .set use_80_sgpr.has_indirect_call, 0
; GCN: TotalNumSgprs: 84
-; GCN: NumVgprs: 1
-; GCN: ScratchSize: 8
+; GCN: NumVgprs: 0
+; GCN: ScratchSize: 0
define void @use_80_sgpr() #1 {
call void asm sideeffect "", "~{s79}"() #0
ret void
@@ -231,7 +231,7 @@ define void @use_80_sgpr() #1 {
; GCN: .set indirect_use_80_sgpr.has_indirect_call, or(0, use_80_sgpr.has_indirect_call)
; GCN: TotalNumSgprs: 84
; GCN: NumVgprs: 41
-; GCN: ScratchSize: 24
+; GCN: ScratchSize: 16
define void @indirect_use_80_sgpr() #1 {
call void @use_80_sgpr()
ret void
@@ -249,7 +249,7 @@ define void @indirect_use_80_sgpr() #1 {
; GCN: .set indirect_2_level_use_80_sgpr.has_indirect_call, or(0, indirect_use_80_sgpr.has_indirect_call)
; GCN: TotalNumSgprs: 86
; GCN: NumVgprs: 41
-; GCN: ScratchSize: 24
+; GCN: ScratchSize: 16
define amdgpu_kernel void @indirect_2_level_use_80_sgpr() #0 {
call void @indirect_use_80_sgpr()
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/gfx-call-non-gfx-func.ll b/llvm/test/CodeGen/AMDGPU/gfx-call-non-gfx-func.ll
index be12d4be59106..8ac187eacf1fe 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx-call-non-gfx-func.ll
+++ b/llvm/test/CodeGen/AMDGPU/gfx-call-non-gfx-func.ll
@@ -41,43 +41,43 @@ define amdgpu_gfx void @gfx_func() {
; SDAG-NEXT: v_writelane_b32 v40, s29, 25
; SDAG-NEXT: v_writelane_b32 v40, s30, 26
; SDAG-NEXT: v_writelane_b32 v40, s31, 27
-; SDAG-NEXT: v_writelane_b32 v40, s70, 28
-; SDAG-NEXT: v_writelane_b32 v40, s71, 29
-; SDAG-NEXT: v_writelane_b32 v40, s72, 30
-; SDAG-NEXT: v_writelane_b32 v40, s73, 31
-; SDAG-NEXT: v_writelane_b32 v40, s74, 32
-; SDAG-NEXT: v_writelane_b32 v40, s75, 33
-; SDAG-NEXT: v_writelane_b32 v40, s76, 34
-; SDAG-NEXT: v_writelane_b32 v40, s77, 35
-; SDAG-NEXT: v_writelane_b32 v40, s86, 36
-; SDAG-NEXT: v_writelane_b32 v40, s87, 37
-; SDAG-NEXT: v_writelane_b32 v40, s88, 38
-; SDAG-NEXT: v_writelane_b32 v40, s89, 39
-; SDAG-NEXT: v_writelane_b32 v40, s90, 40
-; SDAG-NEXT: v_writelane_b32 v40, s91, 41
-; SDAG-NEXT: v_writelane_b32 v40, s92, 42
+; SDAG-NEXT: v_writelane_b32 v40, s72, 28
+; SDAG-NEXT: v_writelane_b32 v40, s73, 29
+; SDAG-NEXT: v_writelane_b32 v40, s74, 30
+; SDAG-NEXT: v_writelane_b32 v40, s75, 31
+; SDAG-NEXT: v_writelane_b32 v40, s76, 32
+; SDAG-NEXT: v_writelane_b32 v40, s77, 33
+; SDAG-NEXT: v_writelane_b32 v40, s78, 34
+; SDAG-NEXT: v_writelane_b32 v40, s79, 35
+; SDAG-NEXT: v_writelane_b32 v40, s88, 36
+; SDAG-NEXT: v_writelane_b32 v40, s89, 37
+; SDAG-NEXT: v_writelane_b32 v40, s90, 38
+; SDAG-NEXT: v_writelane_b32 v40, s91, 39
+; SDAG-NEXT: v_writelane_b32 v40, s92, 40
+; SDAG-NEXT: v_writelane_b32 v40, s93, 41
+; SDAG-NEXT: v_writelane_b32 v40, s94, 42
; SDAG-NEXT: s_mov_b32 s35, extern_c_func at abs32@hi
; SDAG-NEXT: s_mov_b32 s34, extern_c_func at abs32@lo
; SDAG-NEXT: s_mov_b64 s[8:9], 0
; SDAG-NEXT: s_addk_i32 s32, 0x400
-; SDAG-NEXT: v_writelane_b32 v40, s93, 43
+; SDAG-NEXT: v_writelane_b32 v40, s95, 43
; SDAG-NEXT: s_swappc_b64 s[30:31], s[34:35]
-; SDAG-NEXT: v_readlane_b32 s93, v40, 43
-; SDAG-NEXT: v_readlane_b32 s92, v40, 42
-; SDAG-NEXT: v_readlane_b32 s91, v40, 41
-; SDAG-NEXT: v_readlane_b32 s90, v40, 40
-; SDAG-NEXT: v_readlane_b32 s89, v40, 39
-; SDAG-NEXT: v_readlane_b32 s88, v40, 38
-; SDAG-NEXT: v_readlane_b32 s87, v40, 37
-; SDAG-NEXT: v_readlane_b32 s86, v40, 36
-; SDAG-NEXT: v_readlane_b32 s77, v40, 35
-; SDAG-NEXT: v_readlane_b32 s76, v40, 34
-; SDAG-NEXT: v_readlane_b32 s75, v40, 33
-; SDAG-NEXT: v_readlane_b32 s74, v40, 32
-; SDAG-NEXT: v_readlane_b32 s73, v40, 31
-; SDAG-NEXT: v_readlane_b32 s72, v40, 30
-; SDAG-NEXT: v_readlane_b32 s71, v40, 29
-; SDAG-NEXT: v_readlane_b32 s70, v40, 28
+; SDAG-NEXT: v_readlane_b32 s95, v40, 43
+; SDAG-NEXT: v_readlane_b32 s94, v40, 42
+; SDAG-NEXT: v_readlane_b32 s93, v40, 41
+; SDAG-NEXT: v_readlane_b32 s92, v40, 40
+; SDAG-NEXT: v_readlane_b32 s91, v40, 39
+; SDAG-NEXT: v_readlane_b32 s90, v40, 38
+; SDAG-NEXT: v_readlane_b32 s89, v40, 37
+; SDAG-NEXT: v_readlane_b32 s88, v40, 36
+; SDAG-NEXT: v_readlane_b32 s79, v40, 35
+; SDAG-NEXT: v_readlane_b32 s78, v40, 34
+; SDAG-NEXT: v_readlane_b32 s77, v40, 33
+; SDAG-NEXT: v_readlane_b32 s76, v40, 32
+; SDAG-NEXT: v_readlane_b32 s75, v40, 31
+; SDAG-NEXT: v_readlane_b32 s74, v40, 30
+; SDAG-NEXT: v_readlane_b32 s73, v40, 29
+; SDAG-NEXT: v_readlane_b32 s72, v40, 28
; SDAG-NEXT: v_readlane_b32 s31, v40, 27
; SDAG-NEXT: v_readlane_b32 s30, v40, 26
; SDAG-NEXT: v_readlane_b32 s29, v40, 25
@@ -150,43 +150,43 @@ define amdgpu_gfx void @gfx_func() {
; GISEL-NEXT: v_writelane_b32 v40, s29, 25
; GISEL-NEXT: v_writelane_b32 v40, s30, 26
; GISEL-NEXT: v_writelane_b32 v40, s31, 27
-; GISEL-NEXT: v_writelane_b32 v40, s70, 28
-; GISEL-NEXT: v_writelane_b32 v40, s71, 29
-; GISEL-NEXT: v_writelane_b32 v40, s72, 30
-; GISEL-NEXT: v_writelane_b32 v40, s73, 31
-; GISEL-NEXT: v_writelane_b32 v40, s74, 32
-; GISEL-NEXT: v_writelane_b32 v40, s75, 33
-; GISEL-NEXT: v_writelane_b32 v40, s76, 34
-; GISEL-NEXT: v_writelane_b32 v40, s77, 35
-; GISEL-NEXT: v_writelane_b32 v40, s86, 36
-; GISEL-NEXT: v_writelane_b32 v40, s87, 37
-; GISEL-NEXT: v_writelane_b32 v40, s88, 38
-; GISEL-NEXT: v_writelane_b32 v40, s89, 39
-; GISEL-NEXT: v_writelane_b32 v40, s90, 40
-; GISEL-NEXT: v_writelane_b32 v40, s91, 41
-; GISEL-NEXT: v_writelane_b32 v40, s92, 42
+; GISEL-NEXT: v_writelane_b32 v40, s72, 28
+; GISEL-NEXT: v_writelane_b32 v40, s73, 29
+; GISEL-NEXT: v_writelane_b32 v40, s74, 30
+; GISEL-NEXT: v_writelane_b32 v40, s75, 31
+; GISEL-NEXT: v_writelane_b32 v40, s76, 32
+; GISEL-NEXT: v_writelane_b32 v40, s77, 33
+; GISEL-NEXT: v_writelane_b32 v40, s78, 34
+; GISEL-NEXT: v_writelane_b32 v40, s79, 35
+; GISEL-NEXT: v_writelane_b32 v40, s88, 36
+; GISEL-NEXT: v_writelane_b32 v40, s89, 37
+; GISEL-NEXT: v_writelane_b32 v40, s90, 38
+; GISEL-NEXT: v_writelane_b32 v40, s91, 39
+; GISEL-NEXT: v_writelane_b32 v40, s92, 40
+; GISEL-NEXT: v_writelane_b32 v40, s93, 41
+; GISEL-NEXT: v_writelane_b32 v40, s94, 42
; GISEL-NEXT: s_mov_b32 s34, extern_c_func at abs32@lo
; GISEL-NEXT: s_mov_b32 s35, extern_c_func at abs32@hi
; GISEL-NEXT: s_mov_b64 s[8:9], 0
; GISEL-NEXT: s_addk_i32 s32, 0x400
-; GISEL-NEXT: v_writelane_b32 v40, s93, 43
+; GISEL-NEXT: v_writelane_b32 v40, s95, 43
; GISEL-NEXT: s_swappc_b64 s[30:31], s[34:35]
-; GISEL-NEXT: v_readlane_b32 s93, v40, 43
-; GISEL-NEXT: v_readlane_b32 s92, v40, 42
-; GISEL-NEXT: v_readlane_b32 s91, v40, 41
-; GISEL-NEXT: v_readlane_b32 s90, v40, 40
-; GISEL-NEXT: v_readlane_b32 s89, v40, 39
-; GISEL-NEXT: v_readlane_b32 s88, v40, 38
-; GISEL-NEXT: v_readlane_b32 s87, v40, 37
-; GISEL-NEXT: v_readlane_b32 s86, v40, 36
-; GISEL-NEXT: v_readlane_b32 s77, v40, 35
-; GISEL-NEXT: v_readlane_b32 s76, v40, 34
-; GISEL-NEXT: v_readlane_b32 s75, v40, 33
-; GISEL-NEXT: v_readlane_b32 s74, v40, 32
-; GISEL-NEXT: v_readlane_b32 s73, v40, 31
-; GISEL-NEXT: v_readlane_b32 s72, v40, 30
-; GISEL-NEXT: v_readlane_b32 s71, v40, 29
-; GISEL-NEXT: v_readlane_b32 s70, v40, 28
+; GISEL-NEXT: v_readlane_b32 s95, v40, 43
+; GISEL-NEXT: v_readlane_b32 s94, v40, 42
+; GISEL-NEXT: v_readlane_b32 s93, v40, 41
+; GISEL-NEXT: v_readlane_b32 s92, v40, 40
+; GISEL-NEXT: v_readlane_b32 s91, v40, 39
+; GISEL-NEXT: v_readlane_b32 s90, v40, 38
+; GISEL-NEXT: v_readlane_b32 s89, v40, 37
+; GISEL-NEXT: v_readlane_b32 s88, v40, 36
+; GISEL-NEXT: v_readlane_b32 s79, v40, 35
+; GISEL-NEXT: v_readlane_b32 s78, v40, 34
+; GISEL-NEXT: v_readlane_b32 s77, v40, 33
+; GISEL-NEXT: v_readlane_b32 s76, v40, 32
+; GISEL-NEXT: v_readlane_b32 s75, v40, 31
+; GISEL-NEXT: v_readlane_b32 s74, v40, 30
+; GISEL-NEXT: v_readlane_b32 s73, v40, 29
+; GISEL-NEXT: v_readlane_b32 s72, v40, 28
; GISEL-NEXT: v_readlane_b32 s31, v40, 27
; GISEL-NEXT: v_readlane_b32 s30, v40, 26
; GISEL-NEXT: v_readlane_b32 s29, v40, 25
diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll
index ef230e4b877b0..2322b29abaa10 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll
+++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll
@@ -9091,8 +9091,8 @@ define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 {
; GFX9-NEXT: v_writelane_b32 v40, s35, 3
; GFX9-NEXT: v_writelane_b32 v40, s36, 4
; GFX9-NEXT: v_writelane_b32 v40, s37, 5
-; GFX9-NEXT: v_writelane_b32 v40, s46, 6
-; GFX9-NEXT: v_writelane_b32 v40, s47, 7
+; GFX9-NEXT: v_writelane_b32 v40, s38, 6
+; GFX9-NEXT: v_writelane_b32 v40, s39, 7
; GFX9-NEXT: v_writelane_b32 v40, s48, 8
; GFX9-NEXT: v_writelane_b32 v40, s49, 9
; GFX9-NEXT: v_writelane_b32 v40, s50, 10
@@ -9100,25 +9100,25 @@ define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 {
; GFX9-NEXT: v_writelane_b32 v40, s52, 12
; GFX9-NEXT: v_writelane_b32 v40, s53, 13
; GFX9-NEXT: s_addk_i32 s32, 0x800
-; GFX9-NEXT: v_writelane_b32 v40, s62, 14
+; GFX9-NEXT: v_writelane_b32 v40, s54, 14
; GFX9-NEXT: s_mov_b32 s5, byval_align16_f64_arg at abs32@hi
; GFX9-NEXT: s_mov_b32 s4, byval_align16_f64_arg at abs32@lo
-; GFX9-NEXT: v_writelane_b32 v40, s63, 15
+; GFX9-NEXT: v_writelane_b32 v40, s55, 15
; GFX9-NEXT: s_waitcnt vmcnt(2)
; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32
; GFX9-NEXT: s_waitcnt vmcnt(2)
; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:4
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: v_readlane_b32 s63, v40, 15
-; GFX9-NEXT: v_readlane_b32 s62, v40, 14
+; GFX9-NEXT: v_readlane_b32 s55, v40, 15
+; GFX9-NEXT: v_readlane_b32 s54, v40, 14
; GFX9-NEXT: v_readlane_b32 s53, v40, 13
; GFX9-NEXT: v_readlane_b32 s52, v40, 12
; GFX9-NEXT: v_readlane_b32 s51, v40, 11
; GFX9-NEXT: v_readlane_b32 s50, v40, 10
; GFX9-NEXT: v_readlane_b32 s49, v40, 9
; GFX9-NEXT: v_readlane_b32 s48, v40, 8
-; GFX9-NEXT: v_readlane_b32 s47, v40, 7
-; GFX9-NEXT: v_readlane_b32 s46, v40, 6
+; GFX9-NEXT: v_readlane_b32 s39, v40, 7
+; GFX9-NEXT: v_readlane_b32 s38, v40, 6
; GFX9-NEXT: v_readlane_b32 s37, v40, 5
; GFX9-NEXT: v_readlane_b32 s36, v40, 4
; GFX9-NEXT: v_readlane_b32 s35, v40, 3
@@ -9159,27 +9159,27 @@ define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 {
; GFX10-NEXT: v_writelane_b32 v40, s35, 3
; GFX10-NEXT: v_writelane_b32 v40, s36, 4
; GFX10-NEXT: v_writelane_b32 v40, s37, 5
-; GFX10-NEXT: v_writelane_b32 v40, s46, 6
-; GFX10-NEXT: v_writelane_b32 v40, s47, 7
+; GFX10-NEXT: v_writelane_b32 v40, s38, 6
+; GFX10-NEXT: v_writelane_b32 v40, s39, 7
; GFX10-NEXT: v_writelane_b32 v40, s48, 8
; GFX10-NEXT: v_writelane_b32 v40, s49, 9
; GFX10-NEXT: v_writelane_b32 v40, s50, 10
; GFX10-NEXT: v_writelane_b32 v40, s51, 11
; GFX10-NEXT: v_writelane_b32 v40, s52, 12
; GFX10-NEXT: v_writelane_b32 v40, s53, 13
-; GFX10-NEXT: v_writelane_b32 v40, s62, 14
-; GFX10-NEXT: v_writelane_b32 v40, s63, 15
+; GFX10-NEXT: v_writelane_b32 v40, s54, 14
+; GFX10-NEXT: v_writelane_b32 v40, s55, 15
; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX10-NEXT: v_readlane_b32 s63, v40, 15
-; GFX10-NEXT: v_readlane_b32 s62, v40, 14
+; GFX10-NEXT: v_readlane_b32 s55, v40, 15
+; GFX10-NEXT: v_readlane_b32 s54, v40, 14
; GFX10-NEXT: v_readlane_b32 s53, v40, 13
; GFX10-NEXT: v_readlane_b32 s52, v40, 12
; GFX10-NEXT: v_readlane_b32 s51, v40, 11
; GFX10-NEXT: v_readlane_b32 s50, v40, 10
; GFX10-NEXT: v_readlane_b32 s49, v40, 9
; GFX10-NEXT: v_readlane_b32 s48, v40, 8
-; GFX10-NEXT: v_readlane_b32 s47, v40, 7
-; GFX10-NEXT: v_readlane_b32 s46, v40, 6
+; GFX10-NEXT: v_readlane_b32 s39, v40, 7
+; GFX10-NEXT: v_readlane_b32 s38, v40, 6
; GFX10-NEXT: v_readlane_b32 s37, v40, 5
; GFX10-NEXT: v_readlane_b32 s36, v40, 4
; GFX10-NEXT: v_readlane_b32 s35, v40, 3
@@ -9215,29 +9215,29 @@ define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 {
; GFX11-NEXT: v_writelane_b32 v40, s35, 3
; GFX11-NEXT: v_writelane_b32 v40, s36, 4
; GFX11-NEXT: v_writelane_b32 v40, s37, 5
-; GFX11-NEXT: v_writelane_b32 v40, s46, 6
-; GFX11-NEXT: v_writelane_b32 v40, s47, 7
+; GFX11-NEXT: v_writelane_b32 v40, s38, 6
+; GFX11-NEXT: v_writelane_b32 v40, s39, 7
; GFX11-NEXT: v_writelane_b32 v40, s48, 8
; GFX11-NEXT: v_writelane_b32 v40, s49, 9
; GFX11-NEXT: v_writelane_b32 v40, s50, 10
; GFX11-NEXT: v_writelane_b32 v40, s51, 11
; GFX11-NEXT: v_writelane_b32 v40, s52, 12
; GFX11-NEXT: v_writelane_b32 v40, s53, 13
-; GFX11-NEXT: v_writelane_b32 v40, s62, 14
-; GFX11-NEXT: v_writelane_b32 v40, s63, 15
+; GFX11-NEXT: v_writelane_b32 v40, s54, 14
+; GFX11-NEXT: v_writelane_b32 v40, s55, 15
; GFX11-NEXT: s_waitcnt vmcnt(1)
; GFX11-NEXT: scratch_store_b64 off, v[32:33], s32
; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX11-NEXT: v_readlane_b32 s63, v40, 15
-; GFX11-NEXT: v_readlane_b32 s62, v40, 14
+; GFX11-NEXT: v_readlane_b32 s55, v40, 15
+; GFX11-NEXT: v_readlane_b32 s54, v40, 14
; GFX11-NEXT: v_readlane_b32 s53, v40, 13
; GFX11-NEXT: v_readlane_b32 s52, v40, 12
; GFX11-NEXT: v_readlane_b32 s51, v40, 11
; GFX11-NEXT: v_readlane_b32 s50, v40, 10
; GFX11-NEXT: v_readlane_b32 s49, v40, 9
; GFX11-NEXT: v_readlane_b32 s48, v40, 8
-; GFX11-NEXT: v_readlane_b32 s47, v40, 7
-; GFX11-NEXT: v_readlane_b32 s46, v40, 6
+; GFX11-NEXT: v_readlane_b32 s39, v40, 7
+; GFX11-NEXT: v_readlane_b32 s38, v40, 6
; GFX11-NEXT: v_readlane_b32 s37, v40, 5
; GFX11-NEXT: v_readlane_b32 s36, v40, 4
; GFX11-NEXT: v_readlane_b32 s35, v40, 3
@@ -9273,29 +9273,29 @@ define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 {
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s35, 3
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s36, 4
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s37, 5
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s46, 6
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s47, 7
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s38, 6
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s39, 7
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s48, 8
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s49, 9
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s50, 10
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s51, 11
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s52, 12
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s53, 13
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s62, 14
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s63, 15
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s54, 14
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s55, 15
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(1)
; GFX10-SCRATCH-NEXT: scratch_store_dwordx2 off, v[32:33], s32
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s63, v40, 15
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s62, v40, 14
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s55, v40, 15
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s54, v40, 14
; GFX10-SCRATCH-NEXT: v_readlane_b32 s53, v40, 13
; GFX10-SCRATCH-NEXT: v_readlane_b32 s52, v40, 12
; GFX10-SCRATCH-NEXT: v_readlane_b32 s51, v40, 11
; GFX10-SCRATCH-NEXT: v_readlane_b32 s50, v40, 10
; GFX10-SCRATCH-NEXT: v_readlane_b32 s49, v40, 9
; GFX10-SCRATCH-NEXT: v_readlane_b32 s48, v40, 8
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s47, v40, 7
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s46, v40, 6
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s39, v40, 7
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s38, v40, 6
; GFX10-SCRATCH-NEXT: v_readlane_b32 s37, v40, 5
; GFX10-SCRATCH-NEXT: v_readlane_b32 s36, v40, 4
; GFX10-SCRATCH-NEXT: v_readlane_b32 s35, v40, 3
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll
index 3c85914536f28..4f5c46d5f424f 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll
@@ -365,12 +365,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX7LESS-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_align4_unsafe:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
-; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
+; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s36, s36, s11
+; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0
; GFX7LESS-NEXT: s_mov_b32 s14, s10
; GFX7LESS-NEXT: s_mov_b32 s13, s9
; GFX7LESS-NEXT: s_mov_b32 s12, s8
@@ -388,8 +388,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -437,12 +437,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
;
; GFX9-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_align4_unsafe:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s11
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s11
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-NEXT: s_mov_b32 s12, s8
; GFX9-NEXT: s_add_u32 s8, s34, 44
@@ -458,9 +458,9 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -504,13 +504,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
;
; GFX1064-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_align4_unsafe:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s50, -1
-; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s38, -1
+; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s36, s36, s11
; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_addc_u32 s49, s49, 0
+; GFX1064-NEXT: s_addc_u32 s37, s37, 0
; GFX1064-NEXT: s_mov_b32 s12, s8
; GFX1064-NEXT: s_add_u32 s8, s34, 44
; GFX1064-NEXT: s_mov_b32 s13, s9
@@ -526,8 +526,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-NEXT: s_mov_b32 s32, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -571,13 +571,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
;
; GFX1032-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_align4_unsafe:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s50, -1
-; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s38, -1
+; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s36, s36, s11
; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_addc_u32 s49, s49, 0
+; GFX1032-NEXT: s_addc_u32 s37, s37, 0
; GFX1032-NEXT: s_mov_b32 s12, s8
; GFX1032-NEXT: s_add_u32 s8, s34, 44
; GFX1032-NEXT: s_mov_b32 s13, s9
@@ -593,8 +593,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-NEXT: s_mov_b32 s32, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -735,19 +735,19 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX7LESS-DPP-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_align4_unsafe:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5]
@@ -760,11 +760,11 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[36:39], 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0
; GFX7LESS-DPP-NEXT: .LBB1_1: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -773,7 +773,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v2
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v1
-; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc
+; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v2
; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
@@ -785,12 +785,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
;
; GFX9-DPP-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_align4_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s50, -1
-; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11
-; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s38, -1
+; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-DPP-NEXT: s_mov_b32 s12, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44
@@ -806,9 +806,9 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -867,13 +867,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
;
; GFX1064-DPP-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_align4_unsafe:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX1064-DPP-NEXT: s_mov_b32 s12, s8
; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1064-DPP-NEXT: s_mov_b32 s13, s9
@@ -889,8 +889,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -944,13 +944,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
;
; GFX1032-DPP-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_align4_unsafe:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX1032-DPP-NEXT: s_mov_b32 s12, s8
; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1032-DPP-NEXT: s_mov_b32 s13, s9
@@ -966,8 +966,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -1585,12 +1585,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
; GFX7LESS-LABEL: global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
-; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
+; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s36, s36, s11
+; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0
; GFX7LESS-NEXT: s_mov_b32 s14, s10
; GFX7LESS-NEXT: s_mov_b32 s13, s9
; GFX7LESS-NEXT: s_mov_b32 s12, s8
@@ -1608,8 +1608,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -1657,12 +1657,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
;
; GFX9-LABEL: global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s11
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s11
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-NEXT: s_mov_b32 s12, s8
; GFX9-NEXT: s_add_u32 s8, s34, 44
@@ -1678,9 +1678,9 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -1724,13 +1724,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
;
; GFX1064-LABEL: global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s50, -1
-; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s38, -1
+; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s36, s36, s11
; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_addc_u32 s49, s49, 0
+; GFX1064-NEXT: s_addc_u32 s37, s37, 0
; GFX1064-NEXT: s_mov_b32 s12, s8
; GFX1064-NEXT: s_add_u32 s8, s34, 44
; GFX1064-NEXT: s_mov_b32 s13, s9
@@ -1746,8 +1746,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-NEXT: s_mov_b32 s32, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -1791,13 +1791,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
;
; GFX1032-LABEL: global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s50, -1
-; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s38, -1
+; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s36, s36, s11
; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_addc_u32 s49, s49, 0
+; GFX1032-NEXT: s_addc_u32 s37, s37, 0
; GFX1032-NEXT: s_mov_b32 s12, s8
; GFX1032-NEXT: s_add_u32 s8, s34, 44
; GFX1032-NEXT: s_mov_b32 s13, s9
@@ -1813,8 +1813,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-NEXT: s_mov_b32 s32, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -1955,19 +1955,19 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
; GFX7LESS-DPP-LABEL: global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5]
@@ -1980,11 +1980,11 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[36:39], 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0
; GFX7LESS-DPP-NEXT: .LBB3_1: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -1993,7 +1993,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v2
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v1
-; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc
+; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v2
; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
@@ -2005,12 +2005,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
;
; GFX9-DPP-LABEL: global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s50, -1
-; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11
-; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s38, -1
+; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-DPP-NEXT: s_mov_b32 s12, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44
@@ -2026,9 +2026,9 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -2087,13 +2087,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
;
; GFX1064-DPP-LABEL: global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX1064-DPP-NEXT: s_mov_b32 s12, s8
; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1064-DPP-NEXT: s_mov_b32 s13, s9
@@ -2109,8 +2109,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -2164,13 +2164,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
;
; GFX1032-DPP-LABEL: global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX1032-DPP-NEXT: s_mov_b32 s12, s8
; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1032-DPP-NEXT: s_mov_b32 s13, s9
@@ -2186,8 +2186,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -2865,12 +2865,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX7LESS-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
-; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
+; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s36, s36, s11
+; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0
; GFX7LESS-NEXT: s_mov_b32 s14, s10
; GFX7LESS-NEXT: s_mov_b32 s13, s9
; GFX7LESS-NEXT: s_mov_b32 s12, s8
@@ -2888,8 +2888,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -2937,12 +2937,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
;
; GFX9-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s11
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s11
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-NEXT: s_mov_b32 s12, s8
; GFX9-NEXT: s_add_u32 s8, s34, 44
@@ -2958,9 +2958,9 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -3004,13 +3004,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
;
; GFX1064-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s50, -1
-; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s38, -1
+; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s36, s36, s11
; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_addc_u32 s49, s49, 0
+; GFX1064-NEXT: s_addc_u32 s37, s37, 0
; GFX1064-NEXT: s_mov_b32 s12, s8
; GFX1064-NEXT: s_add_u32 s8, s34, 44
; GFX1064-NEXT: s_mov_b32 s13, s9
@@ -3026,8 +3026,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-NEXT: s_mov_b32 s32, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -3071,13 +3071,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
;
; GFX1032-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s50, -1
-; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s38, -1
+; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s36, s36, s11
; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_addc_u32 s49, s49, 0
+; GFX1032-NEXT: s_addc_u32 s37, s37, 0
; GFX1032-NEXT: s_mov_b32 s12, s8
; GFX1032-NEXT: s_add_u32 s8, s34, 44
; GFX1032-NEXT: s_mov_b32 s13, s9
@@ -3093,8 +3093,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-NEXT: s_mov_b32 s32, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -3235,19 +3235,19 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX7LESS-DPP-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5]
@@ -3260,11 +3260,11 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[36:39], 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0
; GFX7LESS-DPP-NEXT: .LBB5_1: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -3273,7 +3273,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v2
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v1
-; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc
+; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v2
; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
@@ -3285,12 +3285,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
;
; GFX9-DPP-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s50, -1
-; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11
-; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s38, -1
+; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-DPP-NEXT: s_mov_b32 s12, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44
@@ -3306,9 +3306,9 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -3367,13 +3367,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
;
; GFX1064-DPP-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX1064-DPP-NEXT: s_mov_b32 s12, s8
; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1064-DPP-NEXT: s_mov_b32 s13, s9
@@ -3389,8 +3389,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -3444,13 +3444,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
;
; GFX1032-DPP-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX1032-DPP-NEXT: s_mov_b32 s12, s8
; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1032-DPP-NEXT: s_mov_b32 s13, s9
@@ -3466,8 +3466,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -3641,12 +3641,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX7LESS-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
-; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
+; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s36, s36, s11
+; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0
; GFX7LESS-NEXT: s_mov_b32 s14, s10
; GFX7LESS-NEXT: s_mov_b32 s13, s9
; GFX7LESS-NEXT: s_mov_b32 s12, s8
@@ -3664,8 +3664,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -3713,12 +3713,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
;
; GFX9-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s11
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s11
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-NEXT: s_mov_b32 s12, s8
; GFX9-NEXT: s_add_u32 s8, s34, 44
@@ -3734,9 +3734,9 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -3780,13 +3780,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
;
; GFX1064-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s50, -1
-; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s38, -1
+; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s36, s36, s11
; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_addc_u32 s49, s49, 0
+; GFX1064-NEXT: s_addc_u32 s37, s37, 0
; GFX1064-NEXT: s_mov_b32 s12, s8
; GFX1064-NEXT: s_add_u32 s8, s34, 44
; GFX1064-NEXT: s_mov_b32 s13, s9
@@ -3802,8 +3802,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-NEXT: s_mov_b32 s32, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -3847,13 +3847,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
;
; GFX1032-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s50, -1
-; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s38, -1
+; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s36, s36, s11
; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_addc_u32 s49, s49, 0
+; GFX1032-NEXT: s_addc_u32 s37, s37, 0
; GFX1032-NEXT: s_mov_b32 s12, s8
; GFX1032-NEXT: s_add_u32 s8, s34, 44
; GFX1032-NEXT: s_mov_b32 s13, s9
@@ -3869,8 +3869,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-NEXT: s_mov_b32 s32, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -4011,19 +4011,19 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX7LESS-DPP-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5]
@@ -4036,11 +4036,11 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[36:39], 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0
; GFX7LESS-DPP-NEXT: .LBB6_1: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -4049,7 +4049,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v2
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v1
-; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc
+; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v2
; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
@@ -4061,12 +4061,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
;
; GFX9-DPP-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s50, -1
-; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11
-; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s38, -1
+; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-DPP-NEXT: s_mov_b32 s12, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44
@@ -4082,9 +4082,9 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -4143,13 +4143,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
;
; GFX1064-DPP-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX1064-DPP-NEXT: s_mov_b32 s12, s8
; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1064-DPP-NEXT: s_mov_b32 s13, s9
@@ -4165,8 +4165,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -4220,13 +4220,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
;
; GFX1032-DPP-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX1032-DPP-NEXT: s_mov_b32 s12, s8
; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1032-DPP-NEXT: s_mov_b32 s13, s9
@@ -4242,8 +4242,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -4920,12 +4920,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
; GFX7LESS-LABEL: global_atomic_fadd_uni_address_div_value_default_scope_strictfp:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
-; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
+; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s36, s36, s11
+; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0
; GFX7LESS-NEXT: s_mov_b32 s14, s10
; GFX7LESS-NEXT: s_mov_b32 s13, s9
; GFX7LESS-NEXT: s_mov_b32 s12, s8
@@ -4943,8 +4943,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -4992,12 +4992,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
;
; GFX9-LABEL: global_atomic_fadd_uni_address_div_value_default_scope_strictfp:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s11
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s11
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-NEXT: s_mov_b32 s12, s8
; GFX9-NEXT: s_add_u32 s8, s34, 44
@@ -5013,9 +5013,9 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -5059,13 +5059,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
;
; GFX1064-LABEL: global_atomic_fadd_uni_address_div_value_default_scope_strictfp:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s50, -1
-; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s38, -1
+; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s36, s36, s11
; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_addc_u32 s49, s49, 0
+; GFX1064-NEXT: s_addc_u32 s37, s37, 0
; GFX1064-NEXT: s_mov_b32 s12, s8
; GFX1064-NEXT: s_add_u32 s8, s34, 44
; GFX1064-NEXT: s_mov_b32 s13, s9
@@ -5081,8 +5081,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-NEXT: s_mov_b32 s32, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -5126,13 +5126,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
;
; GFX1032-LABEL: global_atomic_fadd_uni_address_div_value_default_scope_strictfp:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s50, -1
-; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s38, -1
+; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s36, s36, s11
; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_addc_u32 s49, s49, 0
+; GFX1032-NEXT: s_addc_u32 s37, s37, 0
; GFX1032-NEXT: s_mov_b32 s12, s8
; GFX1032-NEXT: s_add_u32 s8, s34, 44
; GFX1032-NEXT: s_mov_b32 s13, s9
@@ -5148,8 +5148,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-NEXT: s_mov_b32 s32, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -5316,19 +5316,19 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
; GFX7LESS-DPP-LABEL: global_atomic_fadd_uni_address_div_value_default_scope_strictfp:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5]
@@ -5341,11 +5341,11 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[36:39], 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0
; GFX7LESS-DPP-NEXT: .LBB8_1: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -5354,7 +5354,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v2
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v1
-; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc
+; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v2
; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
@@ -5366,12 +5366,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
;
; GFX9-DPP-LABEL: global_atomic_fadd_uni_address_div_value_default_scope_strictfp:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s50, -1
-; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11
-; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s38, -1
+; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-DPP-NEXT: s_mov_b32 s12, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44
@@ -5387,9 +5387,9 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -5448,13 +5448,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
;
; GFX1064-DPP-LABEL: global_atomic_fadd_uni_address_div_value_default_scope_strictfp:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX1064-DPP-NEXT: s_mov_b32 s12, s8
; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1064-DPP-NEXT: s_mov_b32 s13, s9
@@ -5470,8 +5470,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -5525,13 +5525,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
;
; GFX1032-DPP-LABEL: global_atomic_fadd_uni_address_div_value_default_scope_strictfp:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX1032-DPP-NEXT: s_mov_b32 s12, s8
; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1032-DPP-NEXT: s_mov_b32 s13, s9
@@ -5547,8 +5547,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -5753,7 +5753,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX7LESS-NEXT: s_mov_b32 s67, 0xe8f000
; GFX7LESS-NEXT: s_add_u32 s64, s64, s11
; GFX7LESS-NEXT: s_addc_u32 s65, s65, 0
-; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX7LESS-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v3, s0, 0
@@ -5769,7 +5769,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5]
; GFX7LESS-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9
; GFX7LESS-NEXT: s_bcnt1_i32_b64 s2, s[0:1]
-; GFX7LESS-NEXT: s_mov_b64 s[62:63], 0
+; GFX7LESS-NEXT: s_mov_b64 s[54:55], 0
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v3, 20, v2
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
@@ -5803,7 +5803,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX7LESS-NEXT: s_mov_b32 s12, s51
; GFX7LESS-NEXT: s_mov_b32 s13, s50
@@ -5820,8 +5820,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX7LESS-NEXT: buffer_load_dword v0, off, s[64:67], 0
; GFX7LESS-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX7LESS-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX7LESS-NEXT: s_cbranch_execnz .LBB9_2
; GFX7LESS-NEXT: .LBB9_3:
; GFX7LESS-NEXT: s_endpgm
@@ -5838,7 +5838,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX9-NEXT: s_add_u32 s64, s64, s11
; GFX9-NEXT: v_mbcnt_hi_u32_b32 v3, s1, v3
; GFX9-NEXT: s_addc_u32 s65, s65, 0
-; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
; GFX9-NEXT: s_movk_i32 s32, 0x800
; GFX9-NEXT: s_and_saveexec_b64 s[2:3], vcc
@@ -5859,7 +5859,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX9-NEXT: s_mov_b64 s[36:37], s[4:5]
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v4, s1
-; GFX9-NEXT: s_mov_b64 s[62:63], 0
+; GFX9-NEXT: s_mov_b64 s[54:55], 0
; GFX9-NEXT: v_mov_b32_e32 v3, s0
; GFX9-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX9-NEXT: .LBB9_2: ; %atomicrmw.start
@@ -5878,7 +5878,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12
; GFX9-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8
-; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX9-NEXT: s_mov_b32 s12, s51
; GFX9-NEXT: s_mov_b32 s13, s50
@@ -5899,8 +5899,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX9-NEXT: buffer_load_dword v4, off, s[64:67], 0 offset:4
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX9-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX9-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX9-NEXT: s_cbranch_execnz .LBB9_2
; GFX9-NEXT: .LBB9_3:
; GFX9-NEXT: s_endpgm
@@ -5930,8 +5930,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX1064-NEXT: s_mov_b32 s51, s8
; GFX1064-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1064-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3]
-; GFX1064-NEXT: s_mov_b64 s[62:63], 0
+; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1064-NEXT: s_mov_b64 s[54:55], 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1064-NEXT: v_mul_f64 v[41:42], v[3:4], 4.0
@@ -5962,7 +5962,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX1064-NEXT: v_mov_b32_e32 v7, 0
; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1064-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1064-NEXT: s_mov_b32 s12, s51
; GFX1064-NEXT: s_mov_b32 s13, s50
@@ -5979,8 +5979,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX1064-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX1064-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1064-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX1064-NEXT: s_cbranch_execnz .LBB9_2
; GFX1064-NEXT: .LBB9_3:
; GFX1064-NEXT: s_endpgm
@@ -5998,7 +5998,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
; GFX1032-NEXT: s_addc_u32 s65, s65, 0
; GFX1032-NEXT: s_mov_b64 s[48:49], s[0:1]
-; GFX1032-NEXT: s_mov_b32 s62, 0
+; GFX1032-NEXT: s_mov_b32 s54, 0
; GFX1032-NEXT: s_movk_i32 s32, 0x400
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-NEXT: s_cbranch_execz .LBB9_3
@@ -6010,7 +6010,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX1032-NEXT: s_mov_b32 s51, s8
; GFX1032-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1032-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1032-NEXT: v_mul_f64 v[41:42], v[3:4], 4.0
@@ -6041,7 +6041,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX1032-NEXT: v_mov_b32_e32 v7, 0
; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1032-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1032-NEXT: s_mov_b32 s12, s51
; GFX1032-NEXT: s_mov_b32 s13, s50
@@ -6058,8 +6058,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX1032-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
+; GFX1032-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s54
; GFX1032-NEXT: s_cbranch_execnz .LBB9_2
; GFX1032-NEXT: .LBB9_3:
; GFX1032-NEXT: s_endpgm
@@ -6085,8 +6085,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX1164-NEXT: s_mov_b32 s51, s8
; GFX1164-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1164-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3]
-; GFX1164-NEXT: s_mov_b64 s[62:63], 0
+; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1164-NEXT: s_mov_b64 s[54:55], 0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1)
@@ -6116,7 +6116,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX1164-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-NEXT: v_mov_b32_e32 v7, 0
; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1164-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1164-NEXT: s_mov_b32 s12, s51
; GFX1164-NEXT: s_mov_b32 s13, s50
@@ -6130,8 +6130,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63]
+; GFX1164-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[54:55]
; GFX1164-NEXT: s_cbranch_execnz .LBB9_2
; GFX1164-NEXT: .LBB9_3:
; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2
@@ -6143,7 +6143,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX1132-NEXT: v_mov_b32_e32 v40, v0
; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, s8, 0
; GFX1132-NEXT: s_mov_b64 s[48:49], s[0:1]
-; GFX1132-NEXT: s_mov_b32 s62, 0
+; GFX1132-NEXT: s_mov_b32 s54, 0
; GFX1132-NEXT: s_mov_b32 s32, 32
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0
@@ -6157,7 +6157,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX1132-NEXT: s_mov_b32 s51, s13
; GFX1132-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1132-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
@@ -6183,7 +6183,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
; GFX1132-NEXT: v_mov_b32_e32 v7, 0
; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1132-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1132-NEXT: s_mov_b32 s12, s51
; GFX1132-NEXT: s_mov_b32 s13, s50
@@ -6196,8 +6196,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
+; GFX1132-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54
; GFX1132-NEXT: s_cbranch_execnz .LBB9_2
; GFX1132-NEXT: .LBB9_3:
; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2
@@ -6212,7 +6212,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], exec
; GFX7LESS-DPP-NEXT: v_mbcnt_lo_u32_b32_e64 v3, s0, 0
@@ -6228,7 +6228,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9
; GFX7LESS-DPP-NEXT: s_bcnt1_i32_b64 s2, s[0:1]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[62:63], 0
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[54:55], 0
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
@@ -6262,7 +6262,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50
@@ -6279,8 +6279,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[64:67], 0
; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX7LESS-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB9_2
; GFX7LESS-DPP-NEXT: .LBB9_3:
; GFX7LESS-DPP-NEXT: s_endpgm
@@ -6297,7 +6297,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX9-DPP-NEXT: s_add_u32 s64, s64, s11
; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v3, s1, v3
; GFX9-DPP-NEXT: s_addc_u32 s65, s65, 0
-; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800
; GFX9-DPP-NEXT: s_and_saveexec_b64 s[2:3], vcc
@@ -6318,7 +6318,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: v_mov_b32_e32 v4, s1
-; GFX9-DPP-NEXT: s_mov_b64 s[62:63], 0
+; GFX9-DPP-NEXT: s_mov_b64 s[54:55], 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s0
; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX9-DPP-NEXT: .LBB9_2: ; %atomicrmw.start
@@ -6337,7 +6337,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12
; GFX9-DPP-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8
-; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX9-DPP-NEXT: s_mov_b32 s12, s51
; GFX9-DPP-NEXT: s_mov_b32 s13, s50
@@ -6358,8 +6358,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX9-DPP-NEXT: buffer_load_dword v4, off, s[64:67], 0 offset:4
; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX9-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX9-DPP-NEXT: s_cbranch_execnz .LBB9_2
; GFX9-DPP-NEXT: .LBB9_3:
; GFX9-DPP-NEXT: s_endpgm
@@ -6389,8 +6389,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX1064-DPP-NEXT: s_mov_b32 s51, s8
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
-; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0
+; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1064-DPP-NEXT: s_mov_b64 s[54:55], 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1064-DPP-NEXT: v_mul_f64 v[41:42], v[3:4], 4.0
@@ -6421,7 +6421,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1064-DPP-NEXT: s_mov_b32 s12, s51
; GFX1064-DPP-NEXT: s_mov_b32 s13, s50
@@ -6438,8 +6438,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX1064-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB9_2
; GFX1064-DPP-NEXT: .LBB9_3:
; GFX1064-DPP-NEXT: s_endpgm
@@ -6457,7 +6457,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
; GFX1032-DPP-NEXT: s_addc_u32 s65, s65, 0
; GFX1032-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
-; GFX1032-DPP-NEXT: s_mov_b32 s62, 0
+; GFX1032-DPP-NEXT: s_mov_b32 s54, 0
; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB9_3
@@ -6469,7 +6469,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX1032-DPP-NEXT: s_mov_b32 s51, s8
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1032-DPP-NEXT: v_mul_f64 v[41:42], v[3:4], 4.0
@@ -6500,7 +6500,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1032-DPP-NEXT: s_mov_b32 s12, s51
; GFX1032-DPP-NEXT: s_mov_b32 s13, s50
@@ -6517,8 +6517,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
+; GFX1032-DPP-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s54
; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB9_2
; GFX1032-DPP-NEXT: .LBB9_3:
; GFX1032-DPP-NEXT: s_endpgm
@@ -6544,8 +6544,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX1164-DPP-NEXT: s_mov_b32 s51, s8
; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
-; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0
+; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1164-DPP-NEXT: s_mov_b64 s[54:55], 0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
@@ -6575,7 +6575,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1164-DPP-NEXT: s_mov_b32 s12, s51
; GFX1164-DPP-NEXT: s_mov_b32 s13, s50
@@ -6589,8 +6589,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63]
+; GFX1164-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[54:55]
; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB9_2
; GFX1164-DPP-NEXT: .LBB9_3:
; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2
@@ -6602,7 +6602,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0
; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s8, 0
; GFX1132-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
-; GFX1132-DPP-NEXT: s_mov_b32 s62, 0
+; GFX1132-DPP-NEXT: s_mov_b32 s54, 0
; GFX1132-DPP-NEXT: s_mov_b32 s32, 32
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
@@ -6616,7 +6616,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX1132-DPP-NEXT: s_mov_b32 s51, s13
; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
@@ -6642,7 +6642,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1132-DPP-NEXT: s_mov_b32 s12, s51
; GFX1132-DPP-NEXT: s_mov_b32 s13, s50
@@ -6655,8 +6655,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent
; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
+; GFX1132-DPP-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54
; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB9_2
; GFX1132-DPP-NEXT: .LBB9_3:
; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2
@@ -6669,18 +6669,18 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX7LESS-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_align4_unsafe:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s82, -1
-; GFX7LESS-NEXT: s_mov_b32 s83, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s80, s80, s11
-; GFX7LESS-NEXT: s_addc_u32 s81, s81, 0
+; GFX7LESS-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-NEXT: s_mov_b32 s67, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s64, s64, s11
+; GFX7LESS-NEXT: s_addc_u32 s65, s65, 0
; GFX7LESS-NEXT: s_mov_b32 s33, s10
; GFX7LESS-NEXT: s_mov_b32 s50, s9
; GFX7LESS-NEXT: s_mov_b32 s51, s8
; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX7LESS-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0
@@ -6699,8 +6699,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX7LESS-NEXT: s_mov_b32 s13, s50
; GFX7LESS-NEXT: s_mov_b32 s14, s33
; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -6725,21 +6725,21 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX7LESS-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX7LESS-NEXT: s_cbranch_execz .LBB10_5
; GFX7LESS-NEXT: ; %bb.3:
-; GFX7LESS-NEXT: s_load_dwordx2 s[64:65], s[36:37], 0x9
-; GFX7LESS-NEXT: s_mov_b32 s67, 0xf000
-; GFX7LESS-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-NEXT: s_load_dwordx2 s[52:53], s[36:37], 0x9
+; GFX7LESS-NEXT: s_mov_b32 s55, 0xf000
+; GFX7LESS-NEXT: s_mov_b32 s54, -1
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[64:67], 0
-; GFX7LESS-NEXT: s_mov_b64 s[52:53], 0
+; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[52:55], 0
+; GFX7LESS-NEXT: s_mov_b64 s[54:55], 0
; GFX7LESS-NEXT: .LBB10_4: ; %atomicrmw.start
; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7LESS-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-NEXT: v_add_f64 v[2:3], v[0:1], v[41:42]
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:4
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[80:83], 0
+; GFX7LESS-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:4
+; GFX7LESS-NEXT: buffer_store_dword v0, off, s[64:67], 0
; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:12
-; GFX7LESS-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:8
+; GFX7LESS-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:12
+; GFX7LESS-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:8
; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0
; GFX7LESS-NEXT: s_getpc_b64 s[0:1]
; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
@@ -6753,25 +6753,25 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX7LESS-NEXT: s_mov_b32 s12, s51
; GFX7LESS-NEXT: s_mov_b32 s13, s50
; GFX7LESS-NEXT: s_mov_b32 s14, s33
; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX7LESS-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-NEXT: v_mov_b32_e32 v2, s64
-; GFX7LESS-NEXT: v_mov_b32_e32 v3, s65
+; GFX7LESS-NEXT: v_mov_b32_e32 v2, s52
+; GFX7LESS-NEXT: v_mov_b32_e32 v3, s53
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-NEXT: buffer_load_dword v0, off, s[80:83], 0
-; GFX7LESS-NEXT: buffer_load_dword v1, off, s[80:83], 0 offset:4
+; GFX7LESS-NEXT: buffer_load_dword v0, off, s[64:67], 0
+; GFX7LESS-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-NEXT: s_or_b64 s[52:53], vcc, s[52:53]
-; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[52:53]
+; GFX7LESS-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX7LESS-NEXT: s_cbranch_execnz .LBB10_4
; GFX7LESS-NEXT: .LBB10_5:
; GFX7LESS-NEXT: s_endpgm
@@ -6798,7 +6798,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b32 s33, s10
; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7]
-; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX9-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65]
@@ -6835,7 +6835,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX9-NEXT: ; %bb.3:
; GFX9-NEXT: s_load_dwordx2 s[52:53], s[36:37], 0x24
; GFX9-NEXT: v_mov_b32_e32 v0, 0
-; GFX9-NEXT: s_mov_b64 s[62:63], 0
+; GFX9-NEXT: s_mov_b64 s[54:55], 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53]
; GFX9-NEXT: .LBB10_4: ; %atomicrmw.start
@@ -6854,7 +6854,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
; GFX9-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
-; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX9-NEXT: s_mov_b32 s12, s51
; GFX9-NEXT: s_mov_b32 s13, s50
@@ -6875,8 +6875,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX9-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX9-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX9-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX9-NEXT: s_cbranch_execnz .LBB10_4
; GFX9-NEXT: .LBB10_5:
; GFX9-NEXT: s_endpgm
@@ -6903,7 +6903,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1064-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1064-NEXT: s_mov_b32 s33, s10
; GFX1064-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65]
@@ -6940,7 +6940,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1064-NEXT: ; %bb.3:
; GFX1064-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24
; GFX1064-NEXT: v_mov_b32_e32 v0, 0
-; GFX1064-NEXT: s_mov_b64 s[62:63], 0
+; GFX1064-NEXT: s_mov_b64 s[54:55], 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53]
; GFX1064-NEXT: .LBB10_4: ; %atomicrmw.start
@@ -6964,7 +6964,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1064-NEXT: v_mov_b32_e32 v7, 0
; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1064-NEXT: s_mov_b32 s12, s51
; GFX1064-NEXT: s_mov_b32 s13, s50
@@ -6981,8 +6981,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1064-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX1064-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1064-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX1064-NEXT: s_cbranch_execnz .LBB10_4
; GFX1064-NEXT: .LBB10_5:
; GFX1064-NEXT: s_endpgm
@@ -7009,7 +7009,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1032-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1032-NEXT: s_mov_b32 s33, s10
; GFX1032-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65]
@@ -7038,7 +7038,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1032-NEXT: s_cbranch_scc1 .LBB10_1
; GFX1032-NEXT: ; %bb.2: ; %ComputeEnd
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1032-NEXT: s_mov_b32 s62, 0
+; GFX1032-NEXT: s_mov_b32 s54, 0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-NEXT: s_xor_b32 s0, exec_lo, s0
@@ -7069,7 +7069,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1032-NEXT: v_mov_b32_e32 v7, 0
; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1032-NEXT: s_mov_b32 s12, s51
; GFX1032-NEXT: s_mov_b32 s13, s50
@@ -7086,8 +7086,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1032-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
+; GFX1032-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s54
; GFX1032-NEXT: s_cbranch_execnz .LBB10_4
; GFX1032-NEXT: .LBB10_5:
; GFX1032-NEXT: s_endpgm
@@ -7115,7 +7115,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1164-NEXT: s_mov_b32 s14, s33
; GFX1164-NEXT: s_mov_b32 s32, 32
; GFX1164-NEXT: v_mov_b32_e32 v40, v0
-; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1164-NEXT: v_mov_b32_e32 v41, 0
@@ -7145,7 +7145,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1164-NEXT: ; %bb.3:
; GFX1164-NEXT: s_load_b64 s[52:53], s[34:35], 0x24
; GFX1164-NEXT: v_mov_b32_e32 v0, 0
-; GFX1164-NEXT: s_mov_b64 s[62:63], 0
+; GFX1164-NEXT: s_mov_b64 s[54:55], 0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: global_load_b64 v[1:2], v0, s[52:53]
; GFX1164-NEXT: s_set_inst_prefetch_distance 0x1
@@ -7169,7 +7169,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1164-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-NEXT: v_mov_b32_e32 v7, 0
; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1164-NEXT: s_mov_b32 s12, s51
; GFX1164-NEXT: s_mov_b32 s13, s50
@@ -7183,8 +7183,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63]
+; GFX1164-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[54:55]
; GFX1164-NEXT: s_cbranch_execnz .LBB10_4
; GFX1164-NEXT: .LBB10_5:
; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2
@@ -7213,7 +7213,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1132-NEXT: s_mov_b32 s32, 32
; GFX1132-NEXT: s_mov_b32 s33, s15
; GFX1132-NEXT: v_mov_b32_e32 v40, v0
-; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-NEXT: v_mov_b32_e32 v41, 0
@@ -7233,7 +7233,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1132-NEXT: s_cbranch_scc1 .LBB10_1
; GFX1132-NEXT: ; %bb.2: ; %ComputeEnd
; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1132-NEXT: s_mov_b32 s62, 0
+; GFX1132-NEXT: s_mov_b32 s54, 0
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0
@@ -7262,7 +7262,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
; GFX1132-NEXT: v_mov_b32_e32 v7, 0
; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1132-NEXT: s_mov_b32 s12, s51
; GFX1132-NEXT: s_mov_b32 s13, s50
@@ -7275,8 +7275,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
+; GFX1132-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54
; GFX1132-NEXT: s_cbranch_execnz .LBB10_4
; GFX1132-NEXT: .LBB10_5:
; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2
@@ -7285,22 +7285,22 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX7LESS-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_align4_unsafe:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s82, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s83, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s80, s80, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s81, s81, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s10
; GFX7LESS-DPP-NEXT: s_mov_b32 s50, s9
; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[64:65], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s55, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s54, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
@@ -7318,23 +7318,23 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v42
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v40, v0
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v41, v1
-; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[0:1], off, s[64:67], 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[52:53], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[0:1], off, s[52:55], 0
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[54:55], 0
; GFX7LESS-DPP-NEXT: .LBB10_1: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_add_f64 v[2:3], v[0:1], v[40:41]
-; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:4
-; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[80:83], 0
+; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:4
+; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[64:67], 0
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:8
+; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:12
+; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:8
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
@@ -7348,37 +7348,37 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v42
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s64
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s65
+; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s52
+; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-DPP-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[80:83], 0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[80:83], 0 offset:4
+; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[64:67], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-DPP-NEXT: s_or_b64 s[52:53], vcc, s[52:53]
-; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[52:53]
+; GFX7LESS-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB10_1
; GFX7LESS-DPP-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX7LESS-DPP-NEXT: s_endpgm
;
; GFX9-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_align4_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s82, -1
-; GFX9-DPP-NEXT: s_mov_b32 s83, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s80, s80, s11
-; GFX9-DPP-NEXT: s_addc_u32 s81, s81, 0
+; GFX9-DPP-NEXT: s_mov_b32 s68, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s69, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s70, -1
+; GFX9-DPP-NEXT: s_mov_b32 s71, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s68, s68, s11
+; GFX9-DPP-NEXT: s_addc_u32 s69, s69, 0
; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
; GFX9-DPP-NEXT: s_mov_b32 s51, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44
@@ -7393,17 +7393,17 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b32 s33, s10
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
-; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[68:69]
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX9-DPP-NEXT: s_mov_b32 s12, s51
; GFX9-DPP-NEXT: s_mov_b32 s13, s50
; GFX9-DPP-NEXT: s_mov_b32 s14, s33
; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[70:71]
; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -7457,10 +7457,10 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9-DPP-NEXT: s_cbranch_execz .LBB10_3
; GFX9-DPP-NEXT: ; %bb.1:
-; GFX9-DPP-NEXT: s_load_dwordx2 s[62:63], s[36:37], 0x24
+; GFX9-DPP-NEXT: s_load_dwordx2 s[54:55], s[36:37], 0x24
; GFX9-DPP-NEXT: s_mov_b64 s[64:65], 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[62:63]
+; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[54:55]
; GFX9-DPP-NEXT: .LBB10_2: ; %atomicrmw.start
; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-DPP-NEXT: s_waitcnt vmcnt(0)
@@ -7471,31 +7471,31 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
-; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:4
-; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[68:69]
+; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[68:71], 0 offset:4
+; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[68:71], 0
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[80:83], 0 offset:12
-; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:8
-; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[68:71], 0 offset:12
+; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[68:71], 0 offset:8
+; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX9-DPP-NEXT: s_mov_b32 s12, s51
; GFX9-DPP-NEXT: s_mov_b32 s13, s50
; GFX9-DPP-NEXT: s_mov_b32 s14, s33
; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[70:71]
; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s62
-; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s63
+; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s54
+; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s55
; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[80:83], 0
-; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[80:83], 0 offset:4
+; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[68:71], 0
+; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[68:71], 0 offset:4
; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
; GFX9-DPP-NEXT: s_or_b64 s[64:65], vcc, s[64:65]
@@ -7526,7 +7526,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1064-DPP-NEXT: s_mov_b32 s33, s10
; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
@@ -7582,7 +7582,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1064-DPP-NEXT: s_cbranch_execz .LBB10_3
; GFX1064-DPP-NEXT: ; %bb.1:
; GFX1064-DPP-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24
-; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0
+; GFX1064-DPP-NEXT: s_mov_b64 s[54:55], 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53]
; GFX1064-DPP-NEXT: .LBB10_2: ; %atomicrmw.start
@@ -7606,7 +7606,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1064-DPP-NEXT: s_mov_b32 s12, s51
; GFX1064-DPP-NEXT: s_mov_b32 s13, s50
@@ -7623,8 +7623,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX1064-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB10_2
; GFX1064-DPP-NEXT: .LBB10_3:
; GFX1064-DPP-NEXT: s_endpgm
@@ -7651,7 +7651,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1032-DPP-NEXT: s_mov_b32 s33, s10
; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
@@ -7696,7 +7696,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1032-DPP-NEXT: v_mov_b32_e32 v41, v8
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v42, v9
-; GFX1032-DPP-NEXT: s_mov_b32 s62, 0
+; GFX1032-DPP-NEXT: s_mov_b32 s54, 0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB10_3
@@ -7725,7 +7725,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1032-DPP-NEXT: s_mov_b32 s12, s51
; GFX1032-DPP-NEXT: s_mov_b32 s13, s50
@@ -7742,8 +7742,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
+; GFX1032-DPP-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s54
; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB10_2
; GFX1032-DPP-NEXT: .LBB10_3:
; GFX1032-DPP-NEXT: s_endpgm
@@ -7771,7 +7771,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1164-DPP-NEXT: s_mov_b32 s14, s33
; GFX1164-DPP-NEXT: s_mov_b32 s32, 32
; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0
-; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -7826,7 +7826,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1164-DPP-NEXT: s_cbranch_execz .LBB10_3
; GFX1164-DPP-NEXT: ; %bb.1:
; GFX1164-DPP-NEXT: s_load_b64 s[52:53], s[34:35], 0x24
-; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0
+; GFX1164-DPP-NEXT: s_mov_b64 s[54:55], 0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[52:53]
; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x1
@@ -7850,7 +7850,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1164-DPP-NEXT: s_mov_b32 s12, s51
; GFX1164-DPP-NEXT: s_mov_b32 s13, s50
@@ -7864,8 +7864,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63]
+; GFX1164-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[54:55]
; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB10_2
; GFX1164-DPP-NEXT: .LBB10_3:
; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2
@@ -7894,7 +7894,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1132-DPP-NEXT: s_mov_b32 s32, 32
; GFX1132-DPP-NEXT: s_mov_b32 s33, s15
; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0
-; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
@@ -7936,7 +7936,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX1132-DPP-NEXT: v_mov_b32_e32 v42, v9
-; GFX1132-DPP-NEXT: s_mov_b32 s62, 0
+; GFX1132-DPP-NEXT: s_mov_b32 s54, 0
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1
; GFX1132-DPP-NEXT: s_cbranch_execz .LBB10_3
@@ -7962,7 +7962,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1132-DPP-NEXT: s_mov_b32 s12, s51
; GFX1132-DPP-NEXT: s_mov_b32 s13, s50
@@ -7975,8 +7975,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
+; GFX1132-DPP-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54
; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB10_2
; GFX1132-DPP-NEXT: .LBB10_3:
; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2
@@ -8508,12 +8508,12 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
; GFX7LESS-LABEL: global_atomic_fadd_double_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
-; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
+; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s36, s36, s11
+; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0
; GFX7LESS-NEXT: s_mov_b32 s14, s10
; GFX7LESS-NEXT: s_mov_b32 s13, s9
; GFX7LESS-NEXT: s_mov_b32 s12, s8
@@ -8531,8 +8531,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -8585,12 +8585,12 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
;
; GFX9-LABEL: global_atomic_fadd_double_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s11
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s11
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-NEXT: s_mov_b32 s12, s8
; GFX9-NEXT: s_add_u32 s8, s34, 44
@@ -8606,9 +8606,9 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -8655,13 +8655,13 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
;
; GFX1064-LABEL: global_atomic_fadd_double_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s50, -1
-; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s38, -1
+; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s36, s36, s11
; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_addc_u32 s49, s49, 0
+; GFX1064-NEXT: s_addc_u32 s37, s37, 0
; GFX1064-NEXT: s_mov_b32 s12, s8
; GFX1064-NEXT: s_add_u32 s8, s34, 44
; GFX1064-NEXT: s_mov_b32 s13, s9
@@ -8677,8 +8677,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-NEXT: s_mov_b32 s32, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -8725,13 +8725,13 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
;
; GFX1032-LABEL: global_atomic_fadd_double_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s50, -1
-; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s38, -1
+; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s36, s36, s11
; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_addc_u32 s49, s49, 0
+; GFX1032-NEXT: s_addc_u32 s37, s37, 0
; GFX1032-NEXT: s_mov_b32 s12, s8
; GFX1032-NEXT: s_add_u32 s8, s34, 44
; GFX1032-NEXT: s_mov_b32 s13, s9
@@ -8747,8 +8747,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-NEXT: s_mov_b32 s32, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -8922,19 +8922,19 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
; GFX7LESS-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5]
@@ -8947,11 +8947,11 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[4:5], off, s[48:51], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[4:5], off, s[36:39], 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0
; GFX7LESS-DPP-NEXT: .LBB12_1: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -8962,7 +8962,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v8, v4
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, v3
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, v2
-; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[48:51], 0 glc
+; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[36:39], 0 glc
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_cmp_eq_u64_e32 vcc, v[6:7], v[4:5]
; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
@@ -8975,12 +8975,12 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
;
; GFX9-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s50, -1
-; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11
-; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s38, -1
+; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-DPP-NEXT: s_mov_b32 s12, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44
@@ -8996,9 +8996,9 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -9074,13 +9074,13 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
;
; GFX1064-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX1064-DPP-NEXT: s_mov_b32 s12, s8
; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1064-DPP-NEXT: s_mov_b32 s13, s9
@@ -9096,8 +9096,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -9163,13 +9163,13 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
;
; GFX1032-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX1032-DPP-NEXT: s_mov_b32 s12, s8
; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1032-DPP-NEXT: s_mov_b32 s13, s9
@@ -9185,8 +9185,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -9941,12 +9941,12 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX7LESS-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
-; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
+; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s36, s36, s11
+; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0
; GFX7LESS-NEXT: s_mov_b32 s14, s10
; GFX7LESS-NEXT: s_mov_b32 s13, s9
; GFX7LESS-NEXT: s_mov_b32 s12, s8
@@ -9964,8 +9964,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -10018,12 +10018,12 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
;
; GFX9-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s11
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s11
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-NEXT: s_mov_b32 s12, s8
; GFX9-NEXT: s_add_u32 s8, s34, 44
@@ -10039,9 +10039,9 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -10088,13 +10088,13 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
;
; GFX1064-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s50, -1
-; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s38, -1
+; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s36, s36, s11
; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_addc_u32 s49, s49, 0
+; GFX1064-NEXT: s_addc_u32 s37, s37, 0
; GFX1064-NEXT: s_mov_b32 s12, s8
; GFX1064-NEXT: s_add_u32 s8, s34, 44
; GFX1064-NEXT: s_mov_b32 s13, s9
@@ -10110,8 +10110,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-NEXT: s_mov_b32 s32, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -10158,13 +10158,13 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
;
; GFX1032-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s50, -1
-; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s38, -1
+; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s36, s36, s11
; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_addc_u32 s49, s49, 0
+; GFX1032-NEXT: s_addc_u32 s37, s37, 0
; GFX1032-NEXT: s_mov_b32 s12, s8
; GFX1032-NEXT: s_add_u32 s8, s34, 44
; GFX1032-NEXT: s_mov_b32 s13, s9
@@ -10180,8 +10180,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-NEXT: s_mov_b32 s32, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -10355,19 +10355,19 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX7LESS-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5]
@@ -10380,11 +10380,11 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[4:5], off, s[48:51], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[4:5], off, s[36:39], 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0
; GFX7LESS-DPP-NEXT: .LBB14_1: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -10395,7 +10395,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v8, v4
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, v3
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, v2
-; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[48:51], 0 glc
+; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[36:39], 0 glc
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_cmp_eq_u64_e32 vcc, v[6:7], v[4:5]
; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
@@ -10408,12 +10408,12 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
;
; GFX9-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s50, -1
-; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11
-; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s38, -1
+; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-DPP-NEXT: s_mov_b32 s12, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44
@@ -10429,9 +10429,9 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -10507,13 +10507,13 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
;
; GFX1064-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX1064-DPP-NEXT: s_mov_b32 s12, s8
; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1064-DPP-NEXT: s_mov_b32 s13, s9
@@ -10529,8 +10529,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -10596,13 +10596,13 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
;
; GFX1032-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX1032-DPP-NEXT: s_mov_b32 s12, s8
; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1032-DPP-NEXT: s_mov_b32 s13, s9
@@ -10618,8 +10618,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -10856,12 +10856,12 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX7LESS-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
-; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
+; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s36, s36, s11
+; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0
; GFX7LESS-NEXT: s_mov_b32 s14, s10
; GFX7LESS-NEXT: s_mov_b32 s13, s9
; GFX7LESS-NEXT: s_mov_b32 s12, s8
@@ -10879,8 +10879,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -10933,12 +10933,12 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
;
; GFX9-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s11
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s11
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-NEXT: s_mov_b32 s12, s8
; GFX9-NEXT: s_add_u32 s8, s34, 44
@@ -10954,9 +10954,9 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -11003,13 +11003,13 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
;
; GFX1064-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s50, -1
-; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s38, -1
+; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s36, s36, s11
; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_addc_u32 s49, s49, 0
+; GFX1064-NEXT: s_addc_u32 s37, s37, 0
; GFX1064-NEXT: s_mov_b32 s12, s8
; GFX1064-NEXT: s_add_u32 s8, s34, 44
; GFX1064-NEXT: s_mov_b32 s13, s9
@@ -11025,8 +11025,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-NEXT: s_mov_b32 s32, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -11073,13 +11073,13 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
;
; GFX1032-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s50, -1
-; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s38, -1
+; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s36, s36, s11
; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_addc_u32 s49, s49, 0
+; GFX1032-NEXT: s_addc_u32 s37, s37, 0
; GFX1032-NEXT: s_mov_b32 s12, s8
; GFX1032-NEXT: s_add_u32 s8, s34, 44
; GFX1032-NEXT: s_mov_b32 s13, s9
@@ -11095,8 +11095,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-NEXT: s_mov_b32 s32, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -11270,19 +11270,19 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX7LESS-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5]
@@ -11295,11 +11295,11 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[4:5], off, s[48:51], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[4:5], off, s[36:39], 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0
; GFX7LESS-DPP-NEXT: .LBB15_1: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -11310,7 +11310,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v8, v4
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, v3
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, v2
-; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[48:51], 0 glc
+; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[36:39], 0 glc
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_cmp_eq_u64_e32 vcc, v[6:7], v[4:5]
; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
@@ -11323,12 +11323,12 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
;
; GFX9-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s50, -1
-; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11
-; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s38, -1
+; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-DPP-NEXT: s_mov_b32 s12, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44
@@ -11344,9 +11344,9 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -11422,13 +11422,13 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
;
; GFX1064-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX1064-DPP-NEXT: s_mov_b32 s12, s8
; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1064-DPP-NEXT: s_mov_b32 s13, s9
@@ -11444,8 +11444,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -11511,13 +11511,13 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
;
; GFX1032-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX1032-DPP-NEXT: s_mov_b32 s12, s8
; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1032-DPP-NEXT: s_mov_b32 s13, s9
@@ -11533,8 +11533,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -11795,11 +11795,11 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX7LESS-NEXT: s_mov_b32 s51, s8
; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX7LESS-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
-; GFX7LESS-NEXT: s_mov_b64 s[62:63], 0
+; GFX7LESS-NEXT: s_mov_b64 s[54:55], 0
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v3, v0, v1
@@ -11829,7 +11829,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX7LESS-NEXT: s_mov_b32 s12, s51
; GFX7LESS-NEXT: s_mov_b32 s13, s50
@@ -11846,8 +11846,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX7LESS-NEXT: buffer_load_dword v0, off, s[64:67], 0
; GFX7LESS-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX7LESS-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX7LESS-NEXT: s_cbranch_execnz .LBB16_2
; GFX7LESS-NEXT: .LBB16_3:
; GFX7LESS-NEXT: s_endpgm
@@ -11884,10 +11884,10 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX9-NEXT: s_mov_b32 s51, s8
; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX9-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v2, s1
-; GFX9-NEXT: s_mov_b64 s[62:63], 0
+; GFX9-NEXT: s_mov_b64 s[54:55], 0
; GFX9-NEXT: v_mov_b32_e32 v1, s0
; GFX9-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX9-NEXT: .LBB16_2: ; %atomicrmw.start
@@ -11906,7 +11906,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
; GFX9-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
-; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX9-NEXT: s_mov_b32 s12, s51
; GFX9-NEXT: s_mov_b32 s13, s50
@@ -11927,8 +11927,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX9-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX9-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX9-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX9-NEXT: s_cbranch_execnz .LBB16_2
; GFX9-NEXT: .LBB16_3:
; GFX9-NEXT: s_endpgm
@@ -11962,8 +11962,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX1064-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1064-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX1064-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3]
-; GFX1064-NEXT: s_mov_b64 s[62:63], 0
+; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1064-NEXT: s_mov_b64 s[54:55], 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
@@ -11990,7 +11990,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX1064-NEXT: v_mov_b32_e32 v7, 0
; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1064-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1064-NEXT: s_mov_b32 s12, s51
; GFX1064-NEXT: s_mov_b32 s13, s50
@@ -12007,8 +12007,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX1064-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX1064-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1064-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX1064-NEXT: s_cbranch_execnz .LBB16_2
; GFX1064-NEXT: .LBB16_3:
; GFX1064-NEXT: s_endpgm
@@ -12024,7 +12024,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX1032-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1032-NEXT: s_bcnt1_i32_b32 s0, exec_lo
; GFX1032-NEXT: s_mov_b32 s1, 0x43300000
-; GFX1032-NEXT: s_mov_b32 s62, 0
+; GFX1032-NEXT: s_mov_b32 s54, 0
; GFX1032-NEXT: v_add_f64 v[3:4], 0xc3300000, s[0:1]
; GFX1032-NEXT: s_movk_i32 s32, 0x400
; GFX1032-NEXT: v_mul_f64 v[41:42], 4.0, v[3:4]
@@ -12042,7 +12042,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX1032-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1032-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX1032-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
@@ -12069,7 +12069,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX1032-NEXT: v_mov_b32_e32 v7, 0
; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1032-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1032-NEXT: s_mov_b32 s12, s51
; GFX1032-NEXT: s_mov_b32 s13, s50
@@ -12086,8 +12086,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX1032-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
+; GFX1032-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s54
; GFX1032-NEXT: s_cbranch_execnz .LBB16_2
; GFX1032-NEXT: .LBB16_3:
; GFX1032-NEXT: s_endpgm
@@ -12121,8 +12121,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX1164-NEXT: s_mov_b32 s51, s8
; GFX1164-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1164-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3]
-; GFX1164-NEXT: s_mov_b64 s[62:63], 0
+; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1164-NEXT: s_mov_b64 s[54:55], 0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
@@ -12150,7 +12150,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX1164-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-NEXT: v_mov_b32_e32 v7, 0
; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1164-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1164-NEXT: s_mov_b32 s12, s51
; GFX1164-NEXT: s_mov_b32 s13, s50
@@ -12164,8 +12164,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63]
+; GFX1164-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[54:55]
; GFX1164-NEXT: s_cbranch_execnz .LBB16_2
; GFX1164-NEXT: .LBB16_3:
; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2
@@ -12178,7 +12178,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX1132-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1132-NEXT: v_dual_mov_b32 v40, v0 :: v_dual_mov_b32 v1, s0
; GFX1132-NEXT: v_mov_b32_e32 v0, 0x43300000
-; GFX1132-NEXT: s_mov_b32 s62, 0
+; GFX1132-NEXT: s_mov_b32 s54, 0
; GFX1132-NEXT: s_clause 0x1
; GFX1132-NEXT: scratch_store_b32 off, v0, off offset:20
; GFX1132-NEXT: scratch_store_b32 off, v1, off offset:16
@@ -12199,7 +12199,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX1132-NEXT: s_mov_b32 s51, s13
; GFX1132-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1132-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
@@ -12223,7 +12223,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
; GFX1132-NEXT: v_mov_b32_e32 v7, 0
; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1132-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1132-NEXT: s_mov_b32 s12, s51
; GFX1132-NEXT: s_mov_b32 s13, s50
@@ -12236,8 +12236,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
+; GFX1132-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54
; GFX1132-NEXT: s_cbranch_execnz .LBB16_2
; GFX1132-NEXT: .LBB16_3:
; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2
@@ -12270,11 +12270,11 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[62:63], 0
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[54:55], 0
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v3, v0, v1
@@ -12304,7 +12304,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50
@@ -12321,8 +12321,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[64:67], 0
; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX7LESS-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB16_2
; GFX7LESS-DPP-NEXT: .LBB16_3:
; GFX7LESS-DPP-NEXT: s_endpgm
@@ -12359,10 +12359,10 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX9-DPP-NEXT: s_mov_b32 s51, s8
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s1
-; GFX9-DPP-NEXT: s_mov_b64 s[62:63], 0
+; GFX9-DPP-NEXT: s_mov_b64 s[54:55], 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v1, s0
; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX9-DPP-NEXT: .LBB16_2: ; %atomicrmw.start
@@ -12381,7 +12381,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
-; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX9-DPP-NEXT: s_mov_b32 s12, s51
; GFX9-DPP-NEXT: s_mov_b32 s13, s50
@@ -12402,8 +12402,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX9-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX9-DPP-NEXT: s_cbranch_execnz .LBB16_2
; GFX9-DPP-NEXT: .LBB16_3:
; GFX9-DPP-NEXT: s_endpgm
@@ -12437,8 +12437,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
-; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0
+; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1064-DPP-NEXT: s_mov_b64 s[54:55], 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
@@ -12465,7 +12465,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1064-DPP-NEXT: s_mov_b32 s12, s51
; GFX1064-DPP-NEXT: s_mov_b32 s13, s50
@@ -12482,8 +12482,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX1064-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB16_2
; GFX1064-DPP-NEXT: .LBB16_3:
; GFX1064-DPP-NEXT: s_endpgm
@@ -12499,7 +12499,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX1032-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1032-DPP-NEXT: s_bcnt1_i32_b32 s0, exec_lo
; GFX1032-DPP-NEXT: s_mov_b32 s1, 0x43300000
-; GFX1032-DPP-NEXT: s_mov_b32 s62, 0
+; GFX1032-DPP-NEXT: s_mov_b32 s54, 0
; GFX1032-DPP-NEXT: v_add_f64 v[3:4], 0xc3300000, s[0:1]
; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400
; GFX1032-DPP-NEXT: v_mul_f64 v[41:42], 4.0, v[3:4]
@@ -12517,7 +12517,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
@@ -12544,7 +12544,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1032-DPP-NEXT: s_mov_b32 s12, s51
; GFX1032-DPP-NEXT: s_mov_b32 s13, s50
@@ -12561,8 +12561,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
+; GFX1032-DPP-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s54
; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB16_2
; GFX1032-DPP-NEXT: .LBB16_3:
; GFX1032-DPP-NEXT: s_endpgm
@@ -12596,8 +12596,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX1164-DPP-NEXT: s_mov_b32 s51, s8
; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
-; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0
+; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1164-DPP-NEXT: s_mov_b64 s[54:55], 0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
@@ -12625,7 +12625,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1164-DPP-NEXT: s_mov_b32 s12, s51
; GFX1164-DPP-NEXT: s_mov_b32 s13, s50
@@ -12639,8 +12639,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63]
+; GFX1164-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[54:55]
; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB16_2
; GFX1164-DPP-NEXT: .LBB16_3:
; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2
@@ -12653,7 +12653,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX1132-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1132-DPP-NEXT: v_dual_mov_b32 v40, v0 :: v_dual_mov_b32 v1, s0
; GFX1132-DPP-NEXT: v_mov_b32_e32 v0, 0x43300000
-; GFX1132-DPP-NEXT: s_mov_b32 s62, 0
+; GFX1132-DPP-NEXT: s_mov_b32 s54, 0
; GFX1132-DPP-NEXT: s_clause 0x1
; GFX1132-DPP-NEXT: scratch_store_b32 off, v0, off offset:20
; GFX1132-DPP-NEXT: scratch_store_b32 off, v1, off offset:16
@@ -12674,7 +12674,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX1132-DPP-NEXT: s_mov_b32 s51, s13
; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
@@ -12698,7 +12698,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1132-DPP-NEXT: s_mov_b32 s12, s51
; GFX1132-DPP-NEXT: s_mov_b32 s13, s50
@@ -12711,8 +12711,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau
; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
+; GFX1132-DPP-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54
; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB16_2
; GFX1132-DPP-NEXT: .LBB16_3:
; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2
@@ -12725,18 +12725,18 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX7LESS-LABEL: global_atomic_fadd_double_uni_address_div_value_default_scope_strictfp:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s82, -1
-; GFX7LESS-NEXT: s_mov_b32 s83, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s80, s80, s11
-; GFX7LESS-NEXT: s_addc_u32 s81, s81, 0
+; GFX7LESS-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-NEXT: s_mov_b32 s67, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s64, s64, s11
+; GFX7LESS-NEXT: s_addc_u32 s65, s65, 0
; GFX7LESS-NEXT: s_mov_b32 s33, s10
; GFX7LESS-NEXT: s_mov_b32 s50, s9
; GFX7LESS-NEXT: s_mov_b32 s51, s8
; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX7LESS-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0
@@ -12755,8 +12755,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX7LESS-NEXT: s_mov_b32 s13, s50
; GFX7LESS-NEXT: s_mov_b32 s14, s33
; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -12781,21 +12781,21 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX7LESS-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX7LESS-NEXT: s_cbranch_execz .LBB17_5
; GFX7LESS-NEXT: ; %bb.3:
-; GFX7LESS-NEXT: s_load_dwordx2 s[64:65], s[36:37], 0x9
-; GFX7LESS-NEXT: s_mov_b32 s67, 0xf000
-; GFX7LESS-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-NEXT: s_load_dwordx2 s[52:53], s[36:37], 0x9
+; GFX7LESS-NEXT: s_mov_b32 s55, 0xf000
+; GFX7LESS-NEXT: s_mov_b32 s54, -1
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[64:67], 0
-; GFX7LESS-NEXT: s_mov_b64 s[52:53], 0
+; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[52:55], 0
+; GFX7LESS-NEXT: s_mov_b64 s[54:55], 0
; GFX7LESS-NEXT: .LBB17_4: ; %atomicrmw.start
; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7LESS-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-NEXT: v_add_f64 v[2:3], v[0:1], v[41:42]
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:4
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[80:83], 0
+; GFX7LESS-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:4
+; GFX7LESS-NEXT: buffer_store_dword v0, off, s[64:67], 0
; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:12
-; GFX7LESS-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:8
+; GFX7LESS-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:12
+; GFX7LESS-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:8
; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0
; GFX7LESS-NEXT: s_getpc_b64 s[0:1]
; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
@@ -12809,25 +12809,25 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX7LESS-NEXT: s_mov_b32 s12, s51
; GFX7LESS-NEXT: s_mov_b32 s13, s50
; GFX7LESS-NEXT: s_mov_b32 s14, s33
; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX7LESS-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-NEXT: v_mov_b32_e32 v2, s64
-; GFX7LESS-NEXT: v_mov_b32_e32 v3, s65
+; GFX7LESS-NEXT: v_mov_b32_e32 v2, s52
+; GFX7LESS-NEXT: v_mov_b32_e32 v3, s53
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-NEXT: buffer_load_dword v0, off, s[80:83], 0
-; GFX7LESS-NEXT: buffer_load_dword v1, off, s[80:83], 0 offset:4
+; GFX7LESS-NEXT: buffer_load_dword v0, off, s[64:67], 0
+; GFX7LESS-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-NEXT: s_or_b64 s[52:53], vcc, s[52:53]
-; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[52:53]
+; GFX7LESS-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX7LESS-NEXT: s_cbranch_execnz .LBB17_4
; GFX7LESS-NEXT: .LBB17_5:
; GFX7LESS-NEXT: s_endpgm
@@ -12854,7 +12854,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b32 s33, s10
; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7]
-; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX9-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65]
@@ -12891,7 +12891,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX9-NEXT: ; %bb.3:
; GFX9-NEXT: s_load_dwordx2 s[52:53], s[36:37], 0x24
; GFX9-NEXT: v_mov_b32_e32 v0, 0
-; GFX9-NEXT: s_mov_b64 s[62:63], 0
+; GFX9-NEXT: s_mov_b64 s[54:55], 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53]
; GFX9-NEXT: .LBB17_4: ; %atomicrmw.start
@@ -12910,7 +12910,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
; GFX9-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
-; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX9-NEXT: s_mov_b32 s12, s51
; GFX9-NEXT: s_mov_b32 s13, s50
@@ -12931,8 +12931,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX9-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX9-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX9-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX9-NEXT: s_cbranch_execnz .LBB17_4
; GFX9-NEXT: .LBB17_5:
; GFX9-NEXT: s_endpgm
@@ -12959,7 +12959,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1064-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1064-NEXT: s_mov_b32 s33, s10
; GFX1064-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65]
@@ -12996,7 +12996,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1064-NEXT: ; %bb.3:
; GFX1064-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24
; GFX1064-NEXT: v_mov_b32_e32 v0, 0
-; GFX1064-NEXT: s_mov_b64 s[62:63], 0
+; GFX1064-NEXT: s_mov_b64 s[54:55], 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53]
; GFX1064-NEXT: .LBB17_4: ; %atomicrmw.start
@@ -13020,7 +13020,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1064-NEXT: v_mov_b32_e32 v7, 0
; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1064-NEXT: s_mov_b32 s12, s51
; GFX1064-NEXT: s_mov_b32 s13, s50
@@ -13037,8 +13037,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1064-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX1064-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1064-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX1064-NEXT: s_cbranch_execnz .LBB17_4
; GFX1064-NEXT: .LBB17_5:
; GFX1064-NEXT: s_endpgm
@@ -13065,7 +13065,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1032-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1032-NEXT: s_mov_b32 s33, s10
; GFX1032-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65]
@@ -13094,7 +13094,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1032-NEXT: s_cbranch_scc1 .LBB17_1
; GFX1032-NEXT: ; %bb.2: ; %ComputeEnd
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1032-NEXT: s_mov_b32 s62, 0
+; GFX1032-NEXT: s_mov_b32 s54, 0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-NEXT: s_xor_b32 s0, exec_lo, s0
@@ -13125,7 +13125,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1032-NEXT: v_mov_b32_e32 v7, 0
; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1032-NEXT: s_mov_b32 s12, s51
; GFX1032-NEXT: s_mov_b32 s13, s50
@@ -13142,8 +13142,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1032-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
+; GFX1032-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s54
; GFX1032-NEXT: s_cbranch_execnz .LBB17_4
; GFX1032-NEXT: .LBB17_5:
; GFX1032-NEXT: s_endpgm
@@ -13171,7 +13171,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1164-NEXT: s_mov_b32 s14, s33
; GFX1164-NEXT: s_mov_b32 s32, 32
; GFX1164-NEXT: v_mov_b32_e32 v40, v0
-; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1164-NEXT: v_mov_b32_e32 v41, 0
@@ -13201,7 +13201,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1164-NEXT: ; %bb.3:
; GFX1164-NEXT: s_load_b64 s[52:53], s[34:35], 0x24
; GFX1164-NEXT: v_mov_b32_e32 v0, 0
-; GFX1164-NEXT: s_mov_b64 s[62:63], 0
+; GFX1164-NEXT: s_mov_b64 s[54:55], 0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: global_load_b64 v[1:2], v0, s[52:53]
; GFX1164-NEXT: s_set_inst_prefetch_distance 0x1
@@ -13225,7 +13225,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1164-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-NEXT: v_mov_b32_e32 v7, 0
; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1164-NEXT: s_mov_b32 s12, s51
; GFX1164-NEXT: s_mov_b32 s13, s50
@@ -13239,8 +13239,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63]
+; GFX1164-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[54:55]
; GFX1164-NEXT: s_cbranch_execnz .LBB17_4
; GFX1164-NEXT: .LBB17_5:
; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2
@@ -13269,7 +13269,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1132-NEXT: s_mov_b32 s32, 32
; GFX1132-NEXT: s_mov_b32 s33, s15
; GFX1132-NEXT: v_mov_b32_e32 v40, v0
-; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-NEXT: v_mov_b32_e32 v41, 0
@@ -13289,7 +13289,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1132-NEXT: s_cbranch_scc1 .LBB17_1
; GFX1132-NEXT: ; %bb.2: ; %ComputeEnd
; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1132-NEXT: s_mov_b32 s62, 0
+; GFX1132-NEXT: s_mov_b32 s54, 0
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0
@@ -13318,7 +13318,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
; GFX1132-NEXT: v_mov_b32_e32 v7, 0
; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1132-NEXT: s_mov_b32 s12, s51
; GFX1132-NEXT: s_mov_b32 s13, s50
@@ -13331,8 +13331,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
+; GFX1132-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54
; GFX1132-NEXT: s_cbranch_execnz .LBB17_4
; GFX1132-NEXT: .LBB17_5:
; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2
@@ -13341,22 +13341,22 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX7LESS-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_default_scope_strictfp:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s82, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s83, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s80, s80, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s81, s81, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s10
; GFX7LESS-DPP-NEXT: s_mov_b32 s50, s9
; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[64:65], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s55, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s54, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
@@ -13374,23 +13374,23 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v42
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v40, v0
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v41, v1
-; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[0:1], off, s[64:67], 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[52:53], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[0:1], off, s[52:55], 0
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[54:55], 0
; GFX7LESS-DPP-NEXT: .LBB17_1: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_add_f64 v[2:3], v[0:1], v[40:41]
-; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:4
-; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[80:83], 0
+; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:4
+; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[64:67], 0
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:8
+; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:12
+; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:8
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
@@ -13404,37 +13404,37 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v42
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s64
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s65
+; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s52
+; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-DPP-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[80:83], 0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[80:83], 0 offset:4
+; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[64:67], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-DPP-NEXT: s_or_b64 s[52:53], vcc, s[52:53]
-; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[52:53]
+; GFX7LESS-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB17_1
; GFX7LESS-DPP-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX7LESS-DPP-NEXT: s_endpgm
;
; GFX9-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_default_scope_strictfp:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s82, -1
-; GFX9-DPP-NEXT: s_mov_b32 s83, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s80, s80, s11
-; GFX9-DPP-NEXT: s_addc_u32 s81, s81, 0
+; GFX9-DPP-NEXT: s_mov_b32 s68, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s69, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s70, -1
+; GFX9-DPP-NEXT: s_mov_b32 s71, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s68, s68, s11
+; GFX9-DPP-NEXT: s_addc_u32 s69, s69, 0
; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
; GFX9-DPP-NEXT: s_mov_b32 s51, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44
@@ -13449,17 +13449,17 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b32 s33, s10
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
-; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[68:69]
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX9-DPP-NEXT: s_mov_b32 s12, s51
; GFX9-DPP-NEXT: s_mov_b32 s13, s50
; GFX9-DPP-NEXT: s_mov_b32 s14, s33
; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[70:71]
; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -13513,10 +13513,10 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9-DPP-NEXT: s_cbranch_execz .LBB17_3
; GFX9-DPP-NEXT: ; %bb.1:
-; GFX9-DPP-NEXT: s_load_dwordx2 s[62:63], s[36:37], 0x24
+; GFX9-DPP-NEXT: s_load_dwordx2 s[54:55], s[36:37], 0x24
; GFX9-DPP-NEXT: s_mov_b64 s[64:65], 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[62:63]
+; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[54:55]
; GFX9-DPP-NEXT: .LBB17_2: ; %atomicrmw.start
; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-DPP-NEXT: s_waitcnt vmcnt(0)
@@ -13527,31 +13527,31 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
-; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:4
-; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[68:69]
+; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[68:71], 0 offset:4
+; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[68:71], 0
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[80:83], 0 offset:12
-; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:8
-; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[68:71], 0 offset:12
+; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[68:71], 0 offset:8
+; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX9-DPP-NEXT: s_mov_b32 s12, s51
; GFX9-DPP-NEXT: s_mov_b32 s13, s50
; GFX9-DPP-NEXT: s_mov_b32 s14, s33
; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[70:71]
; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s62
-; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s63
+; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s54
+; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s55
; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[80:83], 0
-; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[80:83], 0 offset:4
+; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[68:71], 0
+; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[68:71], 0 offset:4
; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
; GFX9-DPP-NEXT: s_or_b64 s[64:65], vcc, s[64:65]
@@ -13582,7 +13582,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1064-DPP-NEXT: s_mov_b32 s33, s10
; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
@@ -13638,7 +13638,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1064-DPP-NEXT: s_cbranch_execz .LBB17_3
; GFX1064-DPP-NEXT: ; %bb.1:
; GFX1064-DPP-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24
-; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0
+; GFX1064-DPP-NEXT: s_mov_b64 s[54:55], 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53]
; GFX1064-DPP-NEXT: .LBB17_2: ; %atomicrmw.start
@@ -13662,7 +13662,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1064-DPP-NEXT: s_mov_b32 s12, s51
; GFX1064-DPP-NEXT: s_mov_b32 s13, s50
@@ -13679,8 +13679,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX1064-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB17_2
; GFX1064-DPP-NEXT: .LBB17_3:
; GFX1064-DPP-NEXT: s_endpgm
@@ -13707,7 +13707,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1032-DPP-NEXT: s_mov_b32 s33, s10
; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
@@ -13752,7 +13752,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1032-DPP-NEXT: v_mov_b32_e32 v41, v8
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v42, v9
-; GFX1032-DPP-NEXT: s_mov_b32 s62, 0
+; GFX1032-DPP-NEXT: s_mov_b32 s54, 0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB17_3
@@ -13781,7 +13781,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1032-DPP-NEXT: s_mov_b32 s12, s51
; GFX1032-DPP-NEXT: s_mov_b32 s13, s50
@@ -13798,8 +13798,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
+; GFX1032-DPP-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s54
; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB17_2
; GFX1032-DPP-NEXT: .LBB17_3:
; GFX1032-DPP-NEXT: s_endpgm
@@ -13827,7 +13827,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1164-DPP-NEXT: s_mov_b32 s14, s33
; GFX1164-DPP-NEXT: s_mov_b32 s32, 32
; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0
-; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -13882,7 +13882,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1164-DPP-NEXT: s_cbranch_execz .LBB17_3
; GFX1164-DPP-NEXT: ; %bb.1:
; GFX1164-DPP-NEXT: s_load_b64 s[52:53], s[34:35], 0x24
-; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0
+; GFX1164-DPP-NEXT: s_mov_b64 s[54:55], 0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[52:53]
; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x1
@@ -13906,7 +13906,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1164-DPP-NEXT: s_mov_b32 s12, s51
; GFX1164-DPP-NEXT: s_mov_b32 s13, s50
@@ -13920,8 +13920,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63]
+; GFX1164-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[54:55]
; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB17_2
; GFX1164-DPP-NEXT: .LBB17_3:
; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2
@@ -13950,7 +13950,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1132-DPP-NEXT: s_mov_b32 s32, 32
; GFX1132-DPP-NEXT: s_mov_b32 s33, s15
; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0
-; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
@@ -13992,7 +13992,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX1132-DPP-NEXT: v_mov_b32_e32 v42, v9
-; GFX1132-DPP-NEXT: s_mov_b32 s62, 0
+; GFX1132-DPP-NEXT: s_mov_b32 s54, 0
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1
; GFX1132-DPP-NEXT: s_cbranch_execz .LBB17_3
@@ -14018,7 +14018,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1132-DPP-NEXT: s_mov_b32 s12, s51
; GFX1132-DPP-NEXT: s_mov_b32 s13, s50
@@ -14031,8 +14031,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
+; GFX1132-DPP-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54
; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB17_2
; GFX1132-DPP-NEXT: .LBB17_3:
; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll
index cc9c310e5c059..e1ba4a2b0bf2a 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll
@@ -273,12 +273,12 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
; GFX7LESS-LABEL: global_atomic_fmax_uni_address_div_value_agent_scope_unsafe:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
-; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
+; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s36, s36, s11
+; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0
; GFX7LESS-NEXT: s_mov_b32 s14, s10
; GFX7LESS-NEXT: s_mov_b32 s13, s9
; GFX7LESS-NEXT: s_mov_b32 s12, s8
@@ -296,8 +296,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -349,12 +349,12 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
;
; GFX9-LABEL: global_atomic_fmax_uni_address_div_value_agent_scope_unsafe:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s11
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s11
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-NEXT: s_mov_b32 s12, s8
; GFX9-NEXT: s_add_u32 s8, s34, 44
@@ -370,9 +370,9 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -420,13 +420,13 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
;
; GFX1064-LABEL: global_atomic_fmax_uni_address_div_value_agent_scope_unsafe:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s50, -1
-; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s38, -1
+; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s36, s36, s11
; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_addc_u32 s49, s49, 0
+; GFX1064-NEXT: s_addc_u32 s37, s37, 0
; GFX1064-NEXT: s_mov_b32 s12, s8
; GFX1064-NEXT: s_add_u32 s8, s34, 44
; GFX1064-NEXT: s_mov_b32 s13, s9
@@ -442,8 +442,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-NEXT: s_mov_b32 s32, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -477,13 +477,13 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
;
; GFX1032-LABEL: global_atomic_fmax_uni_address_div_value_agent_scope_unsafe:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s50, -1
-; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s38, -1
+; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s36, s36, s11
; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_addc_u32 s49, s49, 0
+; GFX1032-NEXT: s_addc_u32 s37, s37, 0
; GFX1032-NEXT: s_mov_b32 s12, s8
; GFX1032-NEXT: s_add_u32 s8, s34, 44
; GFX1032-NEXT: s_mov_b32 s13, s9
@@ -499,8 +499,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-NEXT: s_mov_b32 s32, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -635,19 +635,19 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
; GFX7LESS-DPP-LABEL: global_atomic_fmax_uni_address_div_value_agent_scope_unsafe:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5]
@@ -660,11 +660,11 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[36:39], 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0
; GFX7LESS-DPP-NEXT: v_mul_f32_e32 v2, 1.0, v0
; GFX7LESS-DPP-NEXT: .LBB1_1: ; %atomicrmw.start
@@ -675,7 +675,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v1
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v0
-; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc
+; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v1
; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
@@ -687,12 +687,12 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
;
; GFX9-DPP-LABEL: global_atomic_fmax_uni_address_div_value_agent_scope_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s50, -1
-; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11
-; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s38, -1
+; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-DPP-NEXT: s_mov_b32 s12, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44
@@ -708,9 +708,9 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -778,13 +778,13 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
;
; GFX1064-DPP-LABEL: global_atomic_fmax_uni_address_div_value_agent_scope_unsafe:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX1064-DPP-NEXT: s_mov_b32 s12, s8
; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1064-DPP-NEXT: s_mov_b32 s13, s9
@@ -800,8 +800,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -851,13 +851,13 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
;
; GFX1032-DPP-LABEL: global_atomic_fmax_uni_address_div_value_agent_scope_unsafe:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX1032-DPP-NEXT: s_mov_b32 s12, s8
; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1032-DPP-NEXT: s_mov_b32 s13, s9
@@ -873,8 +873,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -1312,12 +1312,12 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
; GFX7LESS-LABEL: global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
-; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
+; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s36, s36, s11
+; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0
; GFX7LESS-NEXT: s_mov_b32 s14, s10
; GFX7LESS-NEXT: s_mov_b32 s13, s9
; GFX7LESS-NEXT: s_mov_b32 s12, s8
@@ -1335,8 +1335,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -1388,12 +1388,12 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
;
; GFX9-LABEL: global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s11
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s11
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-NEXT: s_mov_b32 s12, s8
; GFX9-NEXT: s_add_u32 s8, s34, 44
@@ -1409,9 +1409,9 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -1459,13 +1459,13 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
;
; GFX1064-LABEL: global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s50, -1
-; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s38, -1
+; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s36, s36, s11
; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_addc_u32 s49, s49, 0
+; GFX1064-NEXT: s_addc_u32 s37, s37, 0
; GFX1064-NEXT: s_mov_b32 s12, s8
; GFX1064-NEXT: s_add_u32 s8, s34, 44
; GFX1064-NEXT: s_mov_b32 s13, s9
@@ -1481,8 +1481,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-NEXT: s_mov_b32 s32, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -1516,13 +1516,13 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
;
; GFX1032-LABEL: global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s50, -1
-; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s38, -1
+; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s36, s36, s11
; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_addc_u32 s49, s49, 0
+; GFX1032-NEXT: s_addc_u32 s37, s37, 0
; GFX1032-NEXT: s_mov_b32 s12, s8
; GFX1032-NEXT: s_add_u32 s8, s34, 44
; GFX1032-NEXT: s_mov_b32 s13, s9
@@ -1538,8 +1538,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-NEXT: s_mov_b32 s32, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -1674,19 +1674,19 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
; GFX7LESS-DPP-LABEL: global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5]
@@ -1699,11 +1699,11 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[36:39], 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0
; GFX7LESS-DPP-NEXT: v_mul_f32_e32 v2, 1.0, v0
; GFX7LESS-DPP-NEXT: .LBB3_1: ; %atomicrmw.start
@@ -1714,7 +1714,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v1
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v0
-; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc
+; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v1
; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
@@ -1726,12 +1726,12 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
;
; GFX9-DPP-LABEL: global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s50, -1
-; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11
-; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s38, -1
+; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-DPP-NEXT: s_mov_b32 s12, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44
@@ -1747,9 +1747,9 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -1817,13 +1817,13 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
;
; GFX1064-DPP-LABEL: global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX1064-DPP-NEXT: s_mov_b32 s12, s8
; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1064-DPP-NEXT: s_mov_b32 s13, s9
@@ -1839,8 +1839,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -1890,13 +1890,13 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
;
; GFX1032-DPP-LABEL: global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX1032-DPP-NEXT: s_mov_b32 s12, s8
; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1032-DPP-NEXT: s_mov_b32 s13, s9
@@ -1912,8 +1912,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -2351,12 +2351,12 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
; GFX7LESS-LABEL: global_atomic_fmax_uni_address_div_value_default_scope_unsafe:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
-; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
+; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s36, s36, s11
+; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0
; GFX7LESS-NEXT: s_mov_b32 s14, s10
; GFX7LESS-NEXT: s_mov_b32 s13, s9
; GFX7LESS-NEXT: s_mov_b32 s12, s8
@@ -2374,8 +2374,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -2427,12 +2427,12 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
;
; GFX9-LABEL: global_atomic_fmax_uni_address_div_value_default_scope_unsafe:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s11
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s11
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-NEXT: s_mov_b32 s12, s8
; GFX9-NEXT: s_add_u32 s8, s34, 44
@@ -2448,9 +2448,9 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -2498,13 +2498,13 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
;
; GFX1064-LABEL: global_atomic_fmax_uni_address_div_value_default_scope_unsafe:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s50, -1
-; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s38, -1
+; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s36, s36, s11
; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_addc_u32 s49, s49, 0
+; GFX1064-NEXT: s_addc_u32 s37, s37, 0
; GFX1064-NEXT: s_mov_b32 s12, s8
; GFX1064-NEXT: s_add_u32 s8, s34, 44
; GFX1064-NEXT: s_mov_b32 s13, s9
@@ -2520,8 +2520,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-NEXT: s_mov_b32 s32, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -2555,13 +2555,13 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
;
; GFX1032-LABEL: global_atomic_fmax_uni_address_div_value_default_scope_unsafe:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s50, -1
-; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s38, -1
+; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s36, s36, s11
; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_addc_u32 s49, s49, 0
+; GFX1032-NEXT: s_addc_u32 s37, s37, 0
; GFX1032-NEXT: s_mov_b32 s12, s8
; GFX1032-NEXT: s_add_u32 s8, s34, 44
; GFX1032-NEXT: s_mov_b32 s13, s9
@@ -2577,8 +2577,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-NEXT: s_mov_b32 s32, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -2713,19 +2713,19 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
; GFX7LESS-DPP-LABEL: global_atomic_fmax_uni_address_div_value_default_scope_unsafe:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5]
@@ -2738,11 +2738,11 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[36:39], 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0
; GFX7LESS-DPP-NEXT: v_mul_f32_e32 v2, 1.0, v0
; GFX7LESS-DPP-NEXT: .LBB5_1: ; %atomicrmw.start
@@ -2753,7 +2753,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v1
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v0
-; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc
+; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v1
; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
@@ -2765,12 +2765,12 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
;
; GFX9-DPP-LABEL: global_atomic_fmax_uni_address_div_value_default_scope_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s50, -1
-; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11
-; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s38, -1
+; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-DPP-NEXT: s_mov_b32 s12, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44
@@ -2786,9 +2786,9 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -2856,13 +2856,13 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
;
; GFX1064-DPP-LABEL: global_atomic_fmax_uni_address_div_value_default_scope_unsafe:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX1064-DPP-NEXT: s_mov_b32 s12, s8
; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1064-DPP-NEXT: s_mov_b32 s13, s9
@@ -2878,8 +2878,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -2929,13 +2929,13 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
;
; GFX1032-DPP-LABEL: global_atomic_fmax_uni_address_div_value_default_scope_unsafe:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX1032-DPP-NEXT: s_mov_b32 s12, s8
; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1032-DPP-NEXT: s_mov_b32 s13, s9
@@ -2951,8 +2951,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -3153,11 +3153,11 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX7LESS-NEXT: s_mov_b32 s51, s8
; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX7LESS-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
-; GFX7LESS-NEXT: s_mov_b64 s[62:63], 0
+; GFX7LESS-NEXT: s_mov_b64 s[54:55], 0
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v3, v0, v1
@@ -3189,7 +3189,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX7LESS-NEXT: s_mov_b32 s12, s51
; GFX7LESS-NEXT: s_mov_b32 s13, s50
@@ -3205,8 +3205,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX7LESS-NEXT: buffer_load_dword v0, off, s[64:67], 0
; GFX7LESS-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX7LESS-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX7LESS-NEXT: s_cbranch_execnz .LBB6_2
; GFX7LESS-NEXT: .LBB6_3:
; GFX7LESS-NEXT: s_endpgm
@@ -3237,10 +3237,10 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX9-NEXT: s_mov_b32 s51, s8
; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX9-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v2, s1
-; GFX9-NEXT: s_mov_b64 s[62:63], 0
+; GFX9-NEXT: s_mov_b64 s[54:55], 0
; GFX9-NEXT: v_mov_b32_e32 v1, s0
; GFX9-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX9-NEXT: .LBB6_2: ; %atomicrmw.start
@@ -3258,7 +3258,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX9-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-NEXT: v_max_f64 v[3:4], v[3:4], 4.0
-; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX9-NEXT: s_mov_b32 s12, s51
; GFX9-NEXT: s_mov_b32 s13, s50
@@ -3281,8 +3281,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX9-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX9-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX9-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX9-NEXT: s_cbranch_execnz .LBB6_2
; GFX9-NEXT: .LBB6_3:
; GFX9-NEXT: s_endpgm
@@ -3312,8 +3312,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX1064-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1064-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX1064-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3]
-; GFX1064-NEXT: s_mov_b64 s[62:63], 0
+; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1064-NEXT: s_mov_b64 s[54:55], 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
@@ -3340,7 +3340,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX1064-NEXT: v_mov_b32_e32 v7, 0
; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1064-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1064-NEXT: s_mov_b32 s12, s51
; GFX1064-NEXT: s_mov_b32 s13, s50
@@ -3358,8 +3358,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX1064-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX1064-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1064-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX1064-NEXT: s_cbranch_execnz .LBB6_2
; GFX1064-NEXT: .LBB6_3:
; GFX1064-NEXT: s_endpgm
@@ -3375,7 +3375,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX1032-NEXT: s_add_u32 s64, s64, s11
; GFX1032-NEXT: s_addc_u32 s65, s65, 0
; GFX1032-NEXT: s_mov_b64 s[48:49], s[0:1]
-; GFX1032-NEXT: s_mov_b32 s62, 0
+; GFX1032-NEXT: s_mov_b32 s54, 0
; GFX1032-NEXT: s_movk_i32 s32, 0x400
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-NEXT: s_cbranch_execz .LBB6_3
@@ -3389,7 +3389,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX1032-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1032-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX1032-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
@@ -3416,7 +3416,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX1032-NEXT: v_mov_b32_e32 v7, 0
; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1032-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1032-NEXT: s_mov_b32 s12, s51
; GFX1032-NEXT: s_mov_b32 s13, s50
@@ -3434,8 +3434,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX1032-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
+; GFX1032-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s54
; GFX1032-NEXT: s_cbranch_execnz .LBB6_2
; GFX1032-NEXT: .LBB6_3:
; GFX1032-NEXT: s_endpgm
@@ -3458,8 +3458,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX1164-NEXT: s_mov_b32 s51, s8
; GFX1164-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1164-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3]
-; GFX1164-NEXT: s_mov_b64 s[62:63], 0
+; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1164-NEXT: s_mov_b64 s[54:55], 0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
@@ -3484,7 +3484,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX1164-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-NEXT: v_mov_b32_e32 v7, 0
; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1164-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1164-NEXT: s_mov_b32 s12, s51
; GFX1164-NEXT: s_mov_b32 s13, s50
@@ -3502,8 +3502,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63]
+; GFX1164-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[54:55]
; GFX1164-NEXT: s_cbranch_execnz .LBB6_2
; GFX1164-NEXT: .LBB6_3:
; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2
@@ -3514,7 +3514,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX1132-NEXT: v_mov_b32_e32 v40, v0
; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1132-NEXT: s_mov_b64 s[48:49], s[0:1]
-; GFX1132-NEXT: s_mov_b32 s62, 0
+; GFX1132-NEXT: s_mov_b32 s54, 0
; GFX1132-NEXT: s_mov_b32 s32, 32
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0
@@ -3526,7 +3526,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX1132-NEXT: s_mov_b32 s51, s13
; GFX1132-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1132-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
@@ -3548,7 +3548,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
; GFX1132-NEXT: v_mov_b32_e32 v7, 0
; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1132-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1132-NEXT: s_mov_b32 s12, s51
; GFX1132-NEXT: s_mov_b32 s13, s50
@@ -3565,8 +3565,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
+; GFX1132-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54
; GFX1132-NEXT: s_cbranch_execnz .LBB6_2
; GFX1132-NEXT: .LBB6_3:
; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2
@@ -3593,11 +3593,11 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[62:63], 0
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[54:55], 0
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v3, v0, v1
@@ -3629,7 +3629,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50
@@ -3645,8 +3645,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[64:67], 0
; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX7LESS-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB6_2
; GFX7LESS-DPP-NEXT: .LBB6_3:
; GFX7LESS-DPP-NEXT: s_endpgm
@@ -3677,10 +3677,10 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX9-DPP-NEXT: s_mov_b32 s51, s8
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s1
-; GFX9-DPP-NEXT: s_mov_b64 s[62:63], 0
+; GFX9-DPP-NEXT: s_mov_b64 s[54:55], 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v1, s0
; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX9-DPP-NEXT: .LBB6_2: ; %atomicrmw.start
@@ -3698,7 +3698,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-DPP-NEXT: v_max_f64 v[3:4], v[3:4], 4.0
-; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX9-DPP-NEXT: s_mov_b32 s12, s51
; GFX9-DPP-NEXT: s_mov_b32 s13, s50
@@ -3721,8 +3721,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX9-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX9-DPP-NEXT: s_cbranch_execnz .LBB6_2
; GFX9-DPP-NEXT: .LBB6_3:
; GFX9-DPP-NEXT: s_endpgm
@@ -3752,8 +3752,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
-; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0
+; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1064-DPP-NEXT: s_mov_b64 s[54:55], 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
@@ -3780,7 +3780,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1064-DPP-NEXT: s_mov_b32 s12, s51
; GFX1064-DPP-NEXT: s_mov_b32 s13, s50
@@ -3798,8 +3798,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX1064-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB6_2
; GFX1064-DPP-NEXT: .LBB6_3:
; GFX1064-DPP-NEXT: s_endpgm
@@ -3815,7 +3815,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX1032-DPP-NEXT: s_add_u32 s64, s64, s11
; GFX1032-DPP-NEXT: s_addc_u32 s65, s65, 0
; GFX1032-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
-; GFX1032-DPP-NEXT: s_mov_b32 s62, 0
+; GFX1032-DPP-NEXT: s_mov_b32 s54, 0
; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB6_3
@@ -3829,7 +3829,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
@@ -3856,7 +3856,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1032-DPP-NEXT: s_mov_b32 s12, s51
; GFX1032-DPP-NEXT: s_mov_b32 s13, s50
@@ -3874,8 +3874,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
+; GFX1032-DPP-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s54
; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB6_2
; GFX1032-DPP-NEXT: .LBB6_3:
; GFX1032-DPP-NEXT: s_endpgm
@@ -3898,8 +3898,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX1164-DPP-NEXT: s_mov_b32 s51, s8
; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
-; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0
+; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1164-DPP-NEXT: s_mov_b64 s[54:55], 0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
@@ -3924,7 +3924,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1164-DPP-NEXT: s_mov_b32 s12, s51
; GFX1164-DPP-NEXT: s_mov_b32 s13, s50
@@ -3942,8 +3942,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63]
+; GFX1164-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[54:55]
; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB6_2
; GFX1164-DPP-NEXT: .LBB6_3:
; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2
@@ -3954,7 +3954,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0
; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1132-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
-; GFX1132-DPP-NEXT: s_mov_b32 s62, 0
+; GFX1132-DPP-NEXT: s_mov_b32 s54, 0
; GFX1132-DPP-NEXT: s_mov_b32 s32, 32
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
@@ -3966,7 +3966,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX1132-DPP-NEXT: s_mov_b32 s51, s13
; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
@@ -3988,7 +3988,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1132-DPP-NEXT: s_mov_b32 s12, s51
; GFX1132-DPP-NEXT: s_mov_b32 s13, s50
@@ -4005,8 +4005,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent
; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
+; GFX1132-DPP-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54
; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB6_2
; GFX1132-DPP-NEXT: .LBB6_3:
; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2
@@ -4019,18 +4019,18 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX7LESS-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s82, -1
-; GFX7LESS-NEXT: s_mov_b32 s83, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s80, s80, s11
-; GFX7LESS-NEXT: s_addc_u32 s81, s81, 0
+; GFX7LESS-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-NEXT: s_mov_b32 s67, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s64, s64, s11
+; GFX7LESS-NEXT: s_addc_u32 s65, s65, 0
; GFX7LESS-NEXT: s_mov_b32 s33, s10
; GFX7LESS-NEXT: s_mov_b32 s50, s9
; GFX7LESS-NEXT: s_mov_b32 s51, s8
; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX7LESS-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0
@@ -4049,8 +4049,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX7LESS-NEXT: s_mov_b32 s13, s50
; GFX7LESS-NEXT: s_mov_b32 s14, s33
; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -4077,19 +4077,19 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX7LESS-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX7LESS-NEXT: s_cbranch_execz .LBB7_5
; GFX7LESS-NEXT: ; %bb.3:
-; GFX7LESS-NEXT: s_load_dwordx2 s[64:65], s[36:37], 0x9
-; GFX7LESS-NEXT: s_mov_b32 s67, 0xf000
-; GFX7LESS-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-NEXT: s_load_dwordx2 s[52:53], s[36:37], 0x9
+; GFX7LESS-NEXT: s_mov_b32 s55, 0xf000
+; GFX7LESS-NEXT: s_mov_b32 s54, -1
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[64:67], 0
-; GFX7LESS-NEXT: s_mov_b64 s[52:53], 0
+; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[52:55], 0
+; GFX7LESS-NEXT: s_mov_b64 s[54:55], 0
; GFX7LESS-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
; GFX7LESS-NEXT: .LBB7_4: ; %atomicrmw.start
; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7LESS-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-NEXT: v_max_f64 v[2:3], v[0:1], v[0:1]
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:4
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[80:83], 0
+; GFX7LESS-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:4
+; GFX7LESS-NEXT: buffer_store_dword v0, off, s[64:67], 0
; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
; GFX7LESS-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-NEXT: v_max_f64 v[0:1], v[2:3], v[41:42]
@@ -4097,8 +4097,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX7LESS-NEXT: s_getpc_b64 s[0:1]
; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX7LESS-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:12
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[80:83], 0 offset:8
+; GFX7LESS-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12
+; GFX7LESS-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8
; GFX7LESS-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX7LESS-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-NEXT: v_mov_b32_e32 v0, 8
@@ -4108,24 +4108,24 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX7LESS-NEXT: s_mov_b32 s12, s51
; GFX7LESS-NEXT: s_mov_b32 s13, s50
; GFX7LESS-NEXT: s_mov_b32 s14, s33
; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83]
-; GFX7LESS-NEXT: v_mov_b32_e32 v2, s64
-; GFX7LESS-NEXT: v_mov_b32_e32 v3, s65
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX7LESS-NEXT: v_mov_b32_e32 v2, s52
+; GFX7LESS-NEXT: v_mov_b32_e32 v3, s53
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-NEXT: buffer_load_dword v0, off, s[80:83], 0
-; GFX7LESS-NEXT: buffer_load_dword v1, off, s[80:83], 0 offset:4
+; GFX7LESS-NEXT: buffer_load_dword v0, off, s[64:67], 0
+; GFX7LESS-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-NEXT: s_or_b64 s[52:53], vcc, s[52:53]
-; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[52:53]
+; GFX7LESS-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX7LESS-NEXT: s_cbranch_execnz .LBB7_4
; GFX7LESS-NEXT: .LBB7_5:
; GFX7LESS-NEXT: s_endpgm
@@ -4152,7 +4152,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b32 s33, s10
; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7]
-; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX9-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65]
@@ -4192,7 +4192,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX9-NEXT: s_load_dwordx2 s[52:53], s[36:37], 0x24
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
-; GFX9-NEXT: s_mov_b64 s[62:63], 0
+; GFX9-NEXT: s_mov_b64 s[54:55], 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_dwordx2 v[4:5], v0, s[52:53]
; GFX9-NEXT: .LBB7_4: ; %atomicrmw.start
@@ -4210,7 +4210,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX9-NEXT: buffer_store_dword v4, off, s[64:67], 0
; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[41:42]
-; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX9-NEXT: s_mov_b32 s12, s51
; GFX9-NEXT: s_mov_b32 s13, s50
@@ -4233,8 +4233,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX9-NEXT: buffer_load_dword v5, off, s[64:67], 0 offset:4
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX9-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX9-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX9-NEXT: s_cbranch_execnz .LBB7_4
; GFX9-NEXT: .LBB7_5:
; GFX9-NEXT: s_endpgm
@@ -4261,7 +4261,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1064-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1064-NEXT: s_mov_b32 s33, s10
; GFX1064-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65]
@@ -4301,7 +4301,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1064-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24
; GFX1064-NEXT: v_mov_b32_e32 v0, 0
; GFX1064-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
-; GFX1064-NEXT: s_mov_b64 s[62:63], 0
+; GFX1064-NEXT: s_mov_b64 s[54:55], 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: global_load_dwordx2 v[4:5], v0, s[52:53]
; GFX1064-NEXT: .LBB7_4: ; %atomicrmw.start
@@ -4325,7 +4325,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1064-NEXT: v_mov_b32_e32 v7, 0
; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1064-NEXT: s_mov_b32 s12, s51
; GFX1064-NEXT: s_mov_b32 s13, s50
@@ -4343,8 +4343,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1064-NEXT: buffer_load_dword v5, off, s[64:67], 0 offset:4
; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX1064-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1064-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX1064-NEXT: s_cbranch_execnz .LBB7_4
; GFX1064-NEXT: .LBB7_5:
; GFX1064-NEXT: s_endpgm
@@ -4371,7 +4371,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1032-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1032-NEXT: s_mov_b32 s33, s10
; GFX1032-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65]
@@ -4402,7 +4402,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1032-NEXT: s_cbranch_scc1 .LBB7_1
; GFX1032-NEXT: ; %bb.2: ; %ComputeEnd
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1032-NEXT: s_mov_b32 s62, 0
+; GFX1032-NEXT: s_mov_b32 s54, 0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-NEXT: s_xor_b32 s0, exec_lo, s0
@@ -4434,7 +4434,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1032-NEXT: v_mov_b32_e32 v7, 0
; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1032-NEXT: s_mov_b32 s12, s51
; GFX1032-NEXT: s_mov_b32 s13, s50
@@ -4452,8 +4452,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1032-NEXT: buffer_load_dword v5, off, s[64:67], 0 offset:4
; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
+; GFX1032-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s54
; GFX1032-NEXT: s_cbranch_execnz .LBB7_4
; GFX1032-NEXT: .LBB7_5:
; GFX1032-NEXT: s_endpgm
@@ -4481,7 +4481,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1164-NEXT: s_mov_b32 s14, s33
; GFX1164-NEXT: s_mov_b32 s32, 32
; GFX1164-NEXT: v_mov_b32_e32 v40, v0
-; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1164-NEXT: v_mov_b32_e32 v2, 0
@@ -4514,7 +4514,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1164-NEXT: s_load_b64 s[52:53], s[34:35], 0x24
; GFX1164-NEXT: v_mov_b32_e32 v0, 0
; GFX1164-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
-; GFX1164-NEXT: s_mov_b64 s[62:63], 0
+; GFX1164-NEXT: s_mov_b64 s[54:55], 0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: global_load_b64 v[4:5], v0, s[52:53]
; GFX1164-NEXT: s_set_inst_prefetch_distance 0x1
@@ -4535,7 +4535,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1164-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-NEXT: v_mov_b32_e32 v7, 0
; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1164-NEXT: s_mov_b32 s12, s51
; GFX1164-NEXT: s_mov_b32 s13, s50
@@ -4553,8 +4553,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63]
+; GFX1164-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[54:55]
; GFX1164-NEXT: s_cbranch_execnz .LBB7_4
; GFX1164-NEXT: .LBB7_5:
; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2
@@ -4583,7 +4583,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1132-NEXT: s_mov_b32 s32, 32
; GFX1132-NEXT: s_mov_b32 s33, s15
; GFX1132-NEXT: v_mov_b32_e32 v40, v0
-; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-NEXT: v_mov_b32_e32 v2, 0
@@ -4606,7 +4606,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1132-NEXT: s_cbranch_scc1 .LBB7_1
; GFX1132-NEXT: ; %bb.2: ; %ComputeEnd
; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1132-NEXT: s_mov_b32 s62, 0
+; GFX1132-NEXT: s_mov_b32 s54, 0
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0
@@ -4635,7 +4635,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1132-NEXT: v_mov_b32_e32 v3, s53
; GFX1132-NEXT: v_mov_b32_e32 v7, 0
; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1132-NEXT: s_mov_b32 s12, s51
; GFX1132-NEXT: s_mov_b32 s13, s50
@@ -4653,8 +4653,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
+; GFX1132-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54
; GFX1132-NEXT: s_cbranch_execnz .LBB7_4
; GFX1132-NEXT: .LBB7_5:
; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2
@@ -4663,22 +4663,22 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX7LESS-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s82, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s83, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s80, s80, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s81, s81, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s10
; GFX7LESS-DPP-NEXT: s_mov_b32 s50, s9
; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[64:65], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s55, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s54, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
@@ -4696,27 +4696,27 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[2:3], off, s[64:67], 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[52:53], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[2:3], off, s[52:55], 0
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[54:55], 0
; GFX7LESS-DPP-NEXT: v_max_f64 v[41:42], v[0:1], v[0:1]
; GFX7LESS-DPP-NEXT: .LBB7_1: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_max_f64 v[0:1], v[2:3], v[2:3]
-; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:4
-; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0
+; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:4
+; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
; GFX7LESS-DPP-NEXT: v_max_f64 v[0:1], v[0:1], v[41:42]
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX7LESS-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[80:83], 0 offset:8
+; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12
+; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8
; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v0, 8
@@ -4726,36 +4726,36 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s64
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s65
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s52
+; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[80:83], 0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v3, off, s[80:83], 0 offset:4
+; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dword v3, off, s[64:67], 0 offset:4
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX7LESS-DPP-NEXT: s_or_b64 s[52:53], vcc, s[52:53]
-; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[52:53]
+; GFX7LESS-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB7_1
; GFX7LESS-DPP-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX7LESS-DPP-NEXT: s_endpgm
;
; GFX9-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s82, -1
-; GFX9-DPP-NEXT: s_mov_b32 s83, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s80, s80, s11
-; GFX9-DPP-NEXT: s_addc_u32 s81, s81, 0
+; GFX9-DPP-NEXT: s_mov_b32 s68, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s69, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s70, -1
+; GFX9-DPP-NEXT: s_mov_b32 s71, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s68, s68, s11
+; GFX9-DPP-NEXT: s_addc_u32 s69, s69, 0
; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
; GFX9-DPP-NEXT: s_mov_b32 s51, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44
@@ -4770,17 +4770,17 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b32 s33, s10
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
-; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[68:69]
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX9-DPP-NEXT: s_mov_b32 s12, s51
; GFX9-DPP-NEXT: s_mov_b32 s13, s50
; GFX9-DPP-NEXT: s_mov_b32 s14, s33
; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[70:71]
; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -4841,10 +4841,10 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9-DPP-NEXT: s_cbranch_execz .LBB7_3
; GFX9-DPP-NEXT: ; %bb.1:
-; GFX9-DPP-NEXT: s_load_dwordx2 s[62:63], s[36:37], 0x24
+; GFX9-DPP-NEXT: s_load_dwordx2 s[54:55], s[36:37], 0x24
; GFX9-DPP-NEXT: s_mov_b64 s[64:65], 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[62:63]
+; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[54:55]
; GFX9-DPP-NEXT: .LBB7_2: ; %atomicrmw.start
; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-DPP-NEXT: v_max_f64 v[3:4], s[52:53], s[52:53]
@@ -4856,32 +4856,32 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
-; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:4
-; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[68:69]
+; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[68:71], 0 offset:4
+; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[68:71], 0
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-DPP-NEXT: v_max_f64 v[3:4], v[5:6], v[3:4]
-; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX9-DPP-NEXT: s_mov_b32 s12, s51
; GFX9-DPP-NEXT: s_mov_b32 s13, s50
; GFX9-DPP-NEXT: s_mov_b32 s14, s33
; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[80:83], 0 offset:12
-; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:8
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
+; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[68:71], 0 offset:12
+; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[68:71], 0 offset:8
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[70:71]
; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s62
-; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s63
+; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s54
+; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s55
; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[80:83], 0
-; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[80:83], 0 offset:4
+; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[68:71], 0
+; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[68:71], 0 offset:4
; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
; GFX9-DPP-NEXT: s_or_b64 s[64:65], vcc, s[64:65]
@@ -4912,7 +4912,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1064-DPP-NEXT: s_mov_b32 s33, s10
; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
@@ -4976,7 +4976,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1064-DPP-NEXT: s_cbranch_execz .LBB7_3
; GFX1064-DPP-NEXT: ; %bb.1:
; GFX1064-DPP-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24
-; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0
+; GFX1064-DPP-NEXT: s_mov_b64 s[54:55], 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53]
; GFX1064-DPP-NEXT: .LBB7_2: ; %atomicrmw.start
@@ -4999,7 +4999,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1064-DPP-NEXT: s_mov_b32 s12, s51
; GFX1064-DPP-NEXT: s_mov_b32 s13, s50
@@ -5019,8 +5019,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX1064-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB7_2
; GFX1064-DPP-NEXT: .LBB7_3:
; GFX1064-DPP-NEXT: s_endpgm
@@ -5047,7 +5047,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1032-DPP-NEXT: s_mov_b32 s33, s10
; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
@@ -5098,7 +5098,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, v8
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, v9
-; GFX1032-DPP-NEXT: s_mov_b32 s62, 0
+; GFX1032-DPP-NEXT: s_mov_b32 s54, 0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB7_3
@@ -5128,7 +5128,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1032-DPP-NEXT: s_mov_b32 s12, s51
; GFX1032-DPP-NEXT: s_mov_b32 s13, s50
@@ -5146,8 +5146,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
+; GFX1032-DPP-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s54
; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB7_2
; GFX1032-DPP-NEXT: .LBB7_3:
; GFX1032-DPP-NEXT: s_endpgm
@@ -5175,7 +5175,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1164-DPP-NEXT: s_mov_b32 s14, s33
; GFX1164-DPP-NEXT: s_mov_b32 s32, 32
; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0
-; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -5241,7 +5241,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1164-DPP-NEXT: ; %bb.1:
; GFX1164-DPP-NEXT: s_load_b64 s[52:53], s[34:35], 0x24
; GFX1164-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4]
-; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0
+; GFX1164-DPP-NEXT: s_mov_b64 s[54:55], 0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[52:53]
; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x1
@@ -5262,7 +5262,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1164-DPP-NEXT: s_mov_b32 s12, s51
; GFX1164-DPP-NEXT: s_mov_b32 s13, s50
@@ -5280,8 +5280,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63]
+; GFX1164-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[54:55]
; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB7_2
; GFX1164-DPP-NEXT: .LBB7_3:
; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2
@@ -5310,7 +5310,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1132-DPP-NEXT: s_mov_b32 s32, 32
; GFX1132-DPP-NEXT: s_mov_b32 s33, s15
; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0
-; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
@@ -5356,7 +5356,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1132-DPP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v3, v8
; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
; GFX1132-DPP-NEXT: v_mov_b32_e32 v4, v9
-; GFX1132-DPP-NEXT: s_mov_b32 s62, 0
+; GFX1132-DPP-NEXT: s_mov_b32 s54, 0
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1
@@ -5382,7 +5382,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1132-DPP-NEXT: s_mov_b32 s12, s51
; GFX1132-DPP-NEXT: s_mov_b32 s13, s50
@@ -5399,8 +5399,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
+; GFX1132-DPP-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54
; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB7_2
; GFX1132-DPP-NEXT: .LBB7_3:
; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2
@@ -5750,12 +5750,12 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
; GFX7LESS-LABEL: global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
-; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
+; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s36, s36, s11
+; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0
; GFX7LESS-NEXT: s_mov_b32 s14, s10
; GFX7LESS-NEXT: s_mov_b32 s13, s9
; GFX7LESS-NEXT: s_mov_b32 s12, s8
@@ -5773,8 +5773,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -5831,12 +5831,12 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
;
; GFX9-LABEL: global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s11
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s11
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-NEXT: s_mov_b32 s12, s8
; GFX9-NEXT: s_add_u32 s8, s34, 44
@@ -5852,9 +5852,9 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -5905,13 +5905,13 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
;
; GFX1064-LABEL: global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s50, -1
-; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s38, -1
+; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s36, s36, s11
; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_addc_u32 s49, s49, 0
+; GFX1064-NEXT: s_addc_u32 s37, s37, 0
; GFX1064-NEXT: s_mov_b32 s12, s8
; GFX1064-NEXT: s_add_u32 s8, s34, 44
; GFX1064-NEXT: s_mov_b32 s13, s9
@@ -5927,8 +5927,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-NEXT: s_mov_b32 s32, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -5964,13 +5964,13 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
;
; GFX1032-LABEL: global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s50, -1
-; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s38, -1
+; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s36, s36, s11
; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_addc_u32 s49, s49, 0
+; GFX1032-NEXT: s_addc_u32 s37, s37, 0
; GFX1032-NEXT: s_mov_b32 s12, s8
; GFX1032-NEXT: s_add_u32 s8, s34, 44
; GFX1032-NEXT: s_mov_b32 s13, s9
@@ -5986,8 +5986,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-NEXT: s_mov_b32 s32, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -6162,19 +6162,19 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
; GFX7LESS-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5]
@@ -6187,11 +6187,11 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[2:3], off, s[48:51], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[2:3], off, s[36:39], 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0
; GFX7LESS-DPP-NEXT: v_max_f64 v[4:5], v[0:1], v[0:1]
; GFX7LESS-DPP-NEXT: .LBB9_1: ; %atomicrmw.start
@@ -6204,7 +6204,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v8, v2
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, v1
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, v0
-; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[48:51], 0 glc
+; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[36:39], 0 glc
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_cmp_eq_u64_e32 vcc, v[6:7], v[2:3]
; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
@@ -6217,12 +6217,12 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
;
; GFX9-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s50, -1
-; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11
-; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s38, -1
+; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-DPP-NEXT: s_mov_b32 s12, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44
@@ -6238,9 +6238,9 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -6325,13 +6325,13 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
;
; GFX1064-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX1064-DPP-NEXT: s_mov_b32 s12, s8
; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1064-DPP-NEXT: s_mov_b32 s13, s9
@@ -6347,8 +6347,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -6409,13 +6409,13 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
;
; GFX1032-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX1032-DPP-NEXT: s_mov_b32 s12, s8
; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1032-DPP-NEXT: s_mov_b32 s13, s9
@@ -6431,8 +6431,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -6701,11 +6701,11 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX7LESS-NEXT: s_mov_b32 s51, s8
; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX7LESS-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
-; GFX7LESS-NEXT: s_mov_b64 s[62:63], 0
+; GFX7LESS-NEXT: s_mov_b64 s[54:55], 0
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v3, v0, v1
@@ -6737,7 +6737,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX7LESS-NEXT: s_mov_b32 s12, s51
; GFX7LESS-NEXT: s_mov_b32 s13, s50
@@ -6753,8 +6753,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX7LESS-NEXT: buffer_load_dword v0, off, s[64:67], 0
; GFX7LESS-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX7LESS-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX7LESS-NEXT: s_cbranch_execnz .LBB10_2
; GFX7LESS-NEXT: .LBB10_3:
; GFX7LESS-NEXT: s_endpgm
@@ -6785,10 +6785,10 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX9-NEXT: s_mov_b32 s51, s8
; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX9-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v2, s1
-; GFX9-NEXT: s_mov_b64 s[62:63], 0
+; GFX9-NEXT: s_mov_b64 s[54:55], 0
; GFX9-NEXT: v_mov_b32_e32 v1, s0
; GFX9-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX9-NEXT: .LBB10_2: ; %atomicrmw.start
@@ -6806,7 +6806,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX9-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-NEXT: v_max_f64 v[3:4], v[3:4], 4.0
-; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX9-NEXT: s_mov_b32 s12, s51
; GFX9-NEXT: s_mov_b32 s13, s50
@@ -6829,8 +6829,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX9-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX9-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX9-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX9-NEXT: s_cbranch_execnz .LBB10_2
; GFX9-NEXT: .LBB10_3:
; GFX9-NEXT: s_endpgm
@@ -6860,8 +6860,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX1064-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1064-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX1064-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3]
-; GFX1064-NEXT: s_mov_b64 s[62:63], 0
+; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1064-NEXT: s_mov_b64 s[54:55], 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
@@ -6888,7 +6888,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX1064-NEXT: v_mov_b32_e32 v7, 0
; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1064-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1064-NEXT: s_mov_b32 s12, s51
; GFX1064-NEXT: s_mov_b32 s13, s50
@@ -6906,8 +6906,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX1064-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX1064-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1064-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX1064-NEXT: s_cbranch_execnz .LBB10_2
; GFX1064-NEXT: .LBB10_3:
; GFX1064-NEXT: s_endpgm
@@ -6923,7 +6923,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX1032-NEXT: s_add_u32 s64, s64, s11
; GFX1032-NEXT: s_addc_u32 s65, s65, 0
; GFX1032-NEXT: s_mov_b64 s[48:49], s[0:1]
-; GFX1032-NEXT: s_mov_b32 s62, 0
+; GFX1032-NEXT: s_mov_b32 s54, 0
; GFX1032-NEXT: s_movk_i32 s32, 0x400
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-NEXT: s_cbranch_execz .LBB10_3
@@ -6937,7 +6937,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX1032-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1032-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX1032-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
@@ -6964,7 +6964,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX1032-NEXT: v_mov_b32_e32 v7, 0
; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1032-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1032-NEXT: s_mov_b32 s12, s51
; GFX1032-NEXT: s_mov_b32 s13, s50
@@ -6982,8 +6982,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX1032-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
+; GFX1032-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s54
; GFX1032-NEXT: s_cbranch_execnz .LBB10_2
; GFX1032-NEXT: .LBB10_3:
; GFX1032-NEXT: s_endpgm
@@ -7006,8 +7006,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX1164-NEXT: s_mov_b32 s51, s8
; GFX1164-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1164-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3]
-; GFX1164-NEXT: s_mov_b64 s[62:63], 0
+; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1164-NEXT: s_mov_b64 s[54:55], 0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
@@ -7032,7 +7032,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX1164-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-NEXT: v_mov_b32_e32 v7, 0
; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1164-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1164-NEXT: s_mov_b32 s12, s51
; GFX1164-NEXT: s_mov_b32 s13, s50
@@ -7050,8 +7050,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63]
+; GFX1164-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[54:55]
; GFX1164-NEXT: s_cbranch_execnz .LBB10_2
; GFX1164-NEXT: .LBB10_3:
; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2
@@ -7062,7 +7062,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX1132-NEXT: v_mov_b32_e32 v40, v0
; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1132-NEXT: s_mov_b64 s[48:49], s[0:1]
-; GFX1132-NEXT: s_mov_b32 s62, 0
+; GFX1132-NEXT: s_mov_b32 s54, 0
; GFX1132-NEXT: s_mov_b32 s32, 32
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0
@@ -7074,7 +7074,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX1132-NEXT: s_mov_b32 s51, s13
; GFX1132-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1132-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
@@ -7096,7 +7096,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
; GFX1132-NEXT: v_mov_b32_e32 v7, 0
; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1132-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1132-NEXT: s_mov_b32 s12, s51
; GFX1132-NEXT: s_mov_b32 s13, s50
@@ -7113,8 +7113,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
+; GFX1132-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54
; GFX1132-NEXT: s_cbranch_execnz .LBB10_2
; GFX1132-NEXT: .LBB10_3:
; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2
@@ -7141,11 +7141,11 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[62:63], 0
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[54:55], 0
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v3, v0, v1
@@ -7177,7 +7177,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50
@@ -7193,8 +7193,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[64:67], 0
; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX7LESS-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB10_2
; GFX7LESS-DPP-NEXT: .LBB10_3:
; GFX7LESS-DPP-NEXT: s_endpgm
@@ -7225,10 +7225,10 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX9-DPP-NEXT: s_mov_b32 s51, s8
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s1
-; GFX9-DPP-NEXT: s_mov_b64 s[62:63], 0
+; GFX9-DPP-NEXT: s_mov_b64 s[54:55], 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v1, s0
; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX9-DPP-NEXT: .LBB10_2: ; %atomicrmw.start
@@ -7246,7 +7246,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-DPP-NEXT: v_max_f64 v[3:4], v[3:4], 4.0
-; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX9-DPP-NEXT: s_mov_b32 s12, s51
; GFX9-DPP-NEXT: s_mov_b32 s13, s50
@@ -7269,8 +7269,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX9-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX9-DPP-NEXT: s_cbranch_execnz .LBB10_2
; GFX9-DPP-NEXT: .LBB10_3:
; GFX9-DPP-NEXT: s_endpgm
@@ -7300,8 +7300,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
-; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0
+; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1064-DPP-NEXT: s_mov_b64 s[54:55], 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
@@ -7328,7 +7328,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1064-DPP-NEXT: s_mov_b32 s12, s51
; GFX1064-DPP-NEXT: s_mov_b32 s13, s50
@@ -7346,8 +7346,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX1064-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB10_2
; GFX1064-DPP-NEXT: .LBB10_3:
; GFX1064-DPP-NEXT: s_endpgm
@@ -7363,7 +7363,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX1032-DPP-NEXT: s_add_u32 s64, s64, s11
; GFX1032-DPP-NEXT: s_addc_u32 s65, s65, 0
; GFX1032-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
-; GFX1032-DPP-NEXT: s_mov_b32 s62, 0
+; GFX1032-DPP-NEXT: s_mov_b32 s54, 0
; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB10_3
@@ -7377,7 +7377,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
@@ -7404,7 +7404,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1032-DPP-NEXT: s_mov_b32 s12, s51
; GFX1032-DPP-NEXT: s_mov_b32 s13, s50
@@ -7422,8 +7422,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
+; GFX1032-DPP-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s54
; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB10_2
; GFX1032-DPP-NEXT: .LBB10_3:
; GFX1032-DPP-NEXT: s_endpgm
@@ -7446,8 +7446,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX1164-DPP-NEXT: s_mov_b32 s51, s8
; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
-; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0
+; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1164-DPP-NEXT: s_mov_b64 s[54:55], 0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
@@ -7472,7 +7472,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1164-DPP-NEXT: s_mov_b32 s12, s51
; GFX1164-DPP-NEXT: s_mov_b32 s13, s50
@@ -7490,8 +7490,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63]
+; GFX1164-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[54:55]
; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB10_2
; GFX1164-DPP-NEXT: .LBB10_3:
; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2
@@ -7502,7 +7502,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0
; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1132-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
-; GFX1132-DPP-NEXT: s_mov_b32 s62, 0
+; GFX1132-DPP-NEXT: s_mov_b32 s54, 0
; GFX1132-DPP-NEXT: s_mov_b32 s32, 32
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
@@ -7514,7 +7514,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX1132-DPP-NEXT: s_mov_b32 s51, s13
; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
@@ -7536,7 +7536,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1132-DPP-NEXT: s_mov_b32 s12, s51
; GFX1132-DPP-NEXT: s_mov_b32 s13, s50
@@ -7553,8 +7553,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau
; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
+; GFX1132-DPP-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54
; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB10_2
; GFX1132-DPP-NEXT: .LBB10_3:
; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2
@@ -7567,18 +7567,18 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX7LESS-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s82, -1
-; GFX7LESS-NEXT: s_mov_b32 s83, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s80, s80, s11
-; GFX7LESS-NEXT: s_addc_u32 s81, s81, 0
+; GFX7LESS-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-NEXT: s_mov_b32 s67, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s64, s64, s11
+; GFX7LESS-NEXT: s_addc_u32 s65, s65, 0
; GFX7LESS-NEXT: s_mov_b32 s33, s10
; GFX7LESS-NEXT: s_mov_b32 s50, s9
; GFX7LESS-NEXT: s_mov_b32 s51, s8
; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX7LESS-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0
@@ -7597,8 +7597,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX7LESS-NEXT: s_mov_b32 s13, s50
; GFX7LESS-NEXT: s_mov_b32 s14, s33
; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -7625,19 +7625,19 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX7LESS-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX7LESS-NEXT: s_cbranch_execz .LBB11_5
; GFX7LESS-NEXT: ; %bb.3:
-; GFX7LESS-NEXT: s_load_dwordx2 s[64:65], s[36:37], 0x9
-; GFX7LESS-NEXT: s_mov_b32 s67, 0xf000
-; GFX7LESS-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-NEXT: s_load_dwordx2 s[52:53], s[36:37], 0x9
+; GFX7LESS-NEXT: s_mov_b32 s55, 0xf000
+; GFX7LESS-NEXT: s_mov_b32 s54, -1
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[64:67], 0
-; GFX7LESS-NEXT: s_mov_b64 s[52:53], 0
+; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[52:55], 0
+; GFX7LESS-NEXT: s_mov_b64 s[54:55], 0
; GFX7LESS-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
; GFX7LESS-NEXT: .LBB11_4: ; %atomicrmw.start
; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7LESS-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-NEXT: v_max_f64 v[2:3], v[0:1], v[0:1]
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:4
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[80:83], 0
+; GFX7LESS-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:4
+; GFX7LESS-NEXT: buffer_store_dword v0, off, s[64:67], 0
; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
; GFX7LESS-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-NEXT: v_max_f64 v[0:1], v[2:3], v[41:42]
@@ -7645,8 +7645,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX7LESS-NEXT: s_getpc_b64 s[0:1]
; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX7LESS-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:12
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[80:83], 0 offset:8
+; GFX7LESS-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12
+; GFX7LESS-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8
; GFX7LESS-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX7LESS-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-NEXT: v_mov_b32_e32 v0, 8
@@ -7656,24 +7656,24 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX7LESS-NEXT: s_mov_b32 s12, s51
; GFX7LESS-NEXT: s_mov_b32 s13, s50
; GFX7LESS-NEXT: s_mov_b32 s14, s33
; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83]
-; GFX7LESS-NEXT: v_mov_b32_e32 v2, s64
-; GFX7LESS-NEXT: v_mov_b32_e32 v3, s65
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX7LESS-NEXT: v_mov_b32_e32 v2, s52
+; GFX7LESS-NEXT: v_mov_b32_e32 v3, s53
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-NEXT: buffer_load_dword v0, off, s[80:83], 0
-; GFX7LESS-NEXT: buffer_load_dword v1, off, s[80:83], 0 offset:4
+; GFX7LESS-NEXT: buffer_load_dword v0, off, s[64:67], 0
+; GFX7LESS-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-NEXT: s_or_b64 s[52:53], vcc, s[52:53]
-; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[52:53]
+; GFX7LESS-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX7LESS-NEXT: s_cbranch_execnz .LBB11_4
; GFX7LESS-NEXT: .LBB11_5:
; GFX7LESS-NEXT: s_endpgm
@@ -7700,7 +7700,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b32 s33, s10
; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7]
-; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX9-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65]
@@ -7740,7 +7740,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX9-NEXT: s_load_dwordx2 s[52:53], s[36:37], 0x24
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
-; GFX9-NEXT: s_mov_b64 s[62:63], 0
+; GFX9-NEXT: s_mov_b64 s[54:55], 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_dwordx2 v[4:5], v0, s[52:53]
; GFX9-NEXT: .LBB11_4: ; %atomicrmw.start
@@ -7758,7 +7758,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX9-NEXT: buffer_store_dword v4, off, s[64:67], 0
; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[41:42]
-; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX9-NEXT: s_mov_b32 s12, s51
; GFX9-NEXT: s_mov_b32 s13, s50
@@ -7781,8 +7781,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX9-NEXT: buffer_load_dword v5, off, s[64:67], 0 offset:4
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX9-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX9-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX9-NEXT: s_cbranch_execnz .LBB11_4
; GFX9-NEXT: .LBB11_5:
; GFX9-NEXT: s_endpgm
@@ -7809,7 +7809,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1064-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1064-NEXT: s_mov_b32 s33, s10
; GFX1064-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65]
@@ -7849,7 +7849,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1064-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24
; GFX1064-NEXT: v_mov_b32_e32 v0, 0
; GFX1064-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
-; GFX1064-NEXT: s_mov_b64 s[62:63], 0
+; GFX1064-NEXT: s_mov_b64 s[54:55], 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: global_load_dwordx2 v[4:5], v0, s[52:53]
; GFX1064-NEXT: .LBB11_4: ; %atomicrmw.start
@@ -7873,7 +7873,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1064-NEXT: v_mov_b32_e32 v7, 0
; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1064-NEXT: s_mov_b32 s12, s51
; GFX1064-NEXT: s_mov_b32 s13, s50
@@ -7891,8 +7891,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1064-NEXT: buffer_load_dword v5, off, s[64:67], 0 offset:4
; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX1064-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1064-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX1064-NEXT: s_cbranch_execnz .LBB11_4
; GFX1064-NEXT: .LBB11_5:
; GFX1064-NEXT: s_endpgm
@@ -7919,7 +7919,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1032-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1032-NEXT: s_mov_b32 s33, s10
; GFX1032-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65]
@@ -7950,7 +7950,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1032-NEXT: s_cbranch_scc1 .LBB11_1
; GFX1032-NEXT: ; %bb.2: ; %ComputeEnd
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1032-NEXT: s_mov_b32 s62, 0
+; GFX1032-NEXT: s_mov_b32 s54, 0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-NEXT: s_xor_b32 s0, exec_lo, s0
@@ -7982,7 +7982,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1032-NEXT: v_mov_b32_e32 v7, 0
; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1032-NEXT: s_mov_b32 s12, s51
; GFX1032-NEXT: s_mov_b32 s13, s50
@@ -8000,8 +8000,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1032-NEXT: buffer_load_dword v5, off, s[64:67], 0 offset:4
; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
+; GFX1032-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s54
; GFX1032-NEXT: s_cbranch_execnz .LBB11_4
; GFX1032-NEXT: .LBB11_5:
; GFX1032-NEXT: s_endpgm
@@ -8029,7 +8029,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1164-NEXT: s_mov_b32 s14, s33
; GFX1164-NEXT: s_mov_b32 s32, 32
; GFX1164-NEXT: v_mov_b32_e32 v40, v0
-; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1164-NEXT: v_mov_b32_e32 v2, 0
@@ -8062,7 +8062,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1164-NEXT: s_load_b64 s[52:53], s[34:35], 0x24
; GFX1164-NEXT: v_mov_b32_e32 v0, 0
; GFX1164-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
-; GFX1164-NEXT: s_mov_b64 s[62:63], 0
+; GFX1164-NEXT: s_mov_b64 s[54:55], 0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: global_load_b64 v[4:5], v0, s[52:53]
; GFX1164-NEXT: s_set_inst_prefetch_distance 0x1
@@ -8083,7 +8083,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1164-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-NEXT: v_mov_b32_e32 v7, 0
; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1164-NEXT: s_mov_b32 s12, s51
; GFX1164-NEXT: s_mov_b32 s13, s50
@@ -8101,8 +8101,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63]
+; GFX1164-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[54:55]
; GFX1164-NEXT: s_cbranch_execnz .LBB11_4
; GFX1164-NEXT: .LBB11_5:
; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2
@@ -8131,7 +8131,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1132-NEXT: s_mov_b32 s32, 32
; GFX1132-NEXT: s_mov_b32 s33, s15
; GFX1132-NEXT: v_mov_b32_e32 v40, v0
-; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-NEXT: v_mov_b32_e32 v2, 0
@@ -8154,7 +8154,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1132-NEXT: s_cbranch_scc1 .LBB11_1
; GFX1132-NEXT: ; %bb.2: ; %ComputeEnd
; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1132-NEXT: s_mov_b32 s62, 0
+; GFX1132-NEXT: s_mov_b32 s54, 0
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0
@@ -8183,7 +8183,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1132-NEXT: v_mov_b32_e32 v3, s53
; GFX1132-NEXT: v_mov_b32_e32 v7, 0
; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1132-NEXT: s_mov_b32 s12, s51
; GFX1132-NEXT: s_mov_b32 s13, s50
@@ -8201,8 +8201,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
+; GFX1132-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54
; GFX1132-NEXT: s_cbranch_execnz .LBB11_4
; GFX1132-NEXT: .LBB11_5:
; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2
@@ -8211,22 +8211,22 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX7LESS-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s82, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s83, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s80, s80, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s81, s81, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s10
; GFX7LESS-DPP-NEXT: s_mov_b32 s50, s9
; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[64:65], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s55, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s54, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
@@ -8244,27 +8244,27 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[2:3], off, s[64:67], 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[52:53], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[2:3], off, s[52:55], 0
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[54:55], 0
; GFX7LESS-DPP-NEXT: v_max_f64 v[41:42], v[0:1], v[0:1]
; GFX7LESS-DPP-NEXT: .LBB11_1: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_max_f64 v[0:1], v[2:3], v[2:3]
-; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:4
-; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0
+; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:4
+; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
; GFX7LESS-DPP-NEXT: v_max_f64 v[0:1], v[0:1], v[41:42]
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX7LESS-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[80:83], 0 offset:8
+; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12
+; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8
; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v0, 8
@@ -8274,36 +8274,36 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s64
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s65
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s52
+; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[80:83], 0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v3, off, s[80:83], 0 offset:4
+; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dword v3, off, s[64:67], 0 offset:4
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX7LESS-DPP-NEXT: s_or_b64 s[52:53], vcc, s[52:53]
-; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[52:53]
+; GFX7LESS-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB11_1
; GFX7LESS-DPP-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX7LESS-DPP-NEXT: s_endpgm
;
; GFX9-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s82, -1
-; GFX9-DPP-NEXT: s_mov_b32 s83, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s80, s80, s11
-; GFX9-DPP-NEXT: s_addc_u32 s81, s81, 0
+; GFX9-DPP-NEXT: s_mov_b32 s68, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s69, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s70, -1
+; GFX9-DPP-NEXT: s_mov_b32 s71, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s68, s68, s11
+; GFX9-DPP-NEXT: s_addc_u32 s69, s69, 0
; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
; GFX9-DPP-NEXT: s_mov_b32 s51, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44
@@ -8318,17 +8318,17 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b32 s33, s10
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
-; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[68:69]
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX9-DPP-NEXT: s_mov_b32 s12, s51
; GFX9-DPP-NEXT: s_mov_b32 s13, s50
; GFX9-DPP-NEXT: s_mov_b32 s14, s33
; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[70:71]
; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -8389,10 +8389,10 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9-DPP-NEXT: s_cbranch_execz .LBB11_3
; GFX9-DPP-NEXT: ; %bb.1:
-; GFX9-DPP-NEXT: s_load_dwordx2 s[62:63], s[36:37], 0x24
+; GFX9-DPP-NEXT: s_load_dwordx2 s[54:55], s[36:37], 0x24
; GFX9-DPP-NEXT: s_mov_b64 s[64:65], 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[62:63]
+; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[54:55]
; GFX9-DPP-NEXT: .LBB11_2: ; %atomicrmw.start
; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-DPP-NEXT: v_max_f64 v[3:4], s[52:53], s[52:53]
@@ -8404,32 +8404,32 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
-; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:4
-; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[68:69]
+; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[68:71], 0 offset:4
+; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[68:71], 0
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-DPP-NEXT: v_max_f64 v[3:4], v[5:6], v[3:4]
-; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX9-DPP-NEXT: s_mov_b32 s12, s51
; GFX9-DPP-NEXT: s_mov_b32 s13, s50
; GFX9-DPP-NEXT: s_mov_b32 s14, s33
; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[80:83], 0 offset:12
-; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:8
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
+; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[68:71], 0 offset:12
+; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[68:71], 0 offset:8
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[70:71]
; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s62
-; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s63
+; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s54
+; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s55
; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[80:83], 0
-; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[80:83], 0 offset:4
+; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[68:71], 0
+; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[68:71], 0 offset:4
; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
; GFX9-DPP-NEXT: s_or_b64 s[64:65], vcc, s[64:65]
@@ -8460,7 +8460,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1064-DPP-NEXT: s_mov_b32 s33, s10
; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
@@ -8524,7 +8524,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1064-DPP-NEXT: s_cbranch_execz .LBB11_3
; GFX1064-DPP-NEXT: ; %bb.1:
; GFX1064-DPP-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24
-; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0
+; GFX1064-DPP-NEXT: s_mov_b64 s[54:55], 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53]
; GFX1064-DPP-NEXT: .LBB11_2: ; %atomicrmw.start
@@ -8547,7 +8547,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1064-DPP-NEXT: s_mov_b32 s12, s51
; GFX1064-DPP-NEXT: s_mov_b32 s13, s50
@@ -8567,8 +8567,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX1064-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB11_2
; GFX1064-DPP-NEXT: .LBB11_3:
; GFX1064-DPP-NEXT: s_endpgm
@@ -8595,7 +8595,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1032-DPP-NEXT: s_mov_b32 s33, s10
; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
@@ -8646,7 +8646,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, v8
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, v9
-; GFX1032-DPP-NEXT: s_mov_b32 s62, 0
+; GFX1032-DPP-NEXT: s_mov_b32 s54, 0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB11_3
@@ -8676,7 +8676,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1032-DPP-NEXT: s_mov_b32 s12, s51
; GFX1032-DPP-NEXT: s_mov_b32 s13, s50
@@ -8694,8 +8694,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
+; GFX1032-DPP-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s54
; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB11_2
; GFX1032-DPP-NEXT: .LBB11_3:
; GFX1032-DPP-NEXT: s_endpgm
@@ -8723,7 +8723,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1164-DPP-NEXT: s_mov_b32 s14, s33
; GFX1164-DPP-NEXT: s_mov_b32 s32, 32
; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0
-; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -8789,7 +8789,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1164-DPP-NEXT: ; %bb.1:
; GFX1164-DPP-NEXT: s_load_b64 s[52:53], s[34:35], 0x24
; GFX1164-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4]
-; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0
+; GFX1164-DPP-NEXT: s_mov_b64 s[54:55], 0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[52:53]
; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x1
@@ -8810,7 +8810,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1164-DPP-NEXT: s_mov_b32 s12, s51
; GFX1164-DPP-NEXT: s_mov_b32 s13, s50
@@ -8828,8 +8828,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63]
+; GFX1164-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[54:55]
; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB11_2
; GFX1164-DPP-NEXT: .LBB11_3:
; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2
@@ -8858,7 +8858,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1132-DPP-NEXT: s_mov_b32 s32, 32
; GFX1132-DPP-NEXT: s_mov_b32 s33, s15
; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0
-; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
@@ -8904,7 +8904,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1132-DPP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v3, v8
; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
; GFX1132-DPP-NEXT: v_mov_b32_e32 v4, v9
-; GFX1132-DPP-NEXT: s_mov_b32 s62, 0
+; GFX1132-DPP-NEXT: s_mov_b32 s54, 0
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1
@@ -8930,7 +8930,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1132-DPP-NEXT: s_mov_b32 s12, s51
; GFX1132-DPP-NEXT: s_mov_b32 s13, s50
@@ -8947,8 +8947,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
+; GFX1132-DPP-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54
; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB11_2
; GFX1132-DPP-NEXT: .LBB11_3:
; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll
index 81a16df17c728..6b1d5253e178f 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll
@@ -273,12 +273,12 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
; GFX7LESS-LABEL: global_atomic_fmin_uni_address_div_value_agent_scope_unsafe:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
-; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
+; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s36, s36, s11
+; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0
; GFX7LESS-NEXT: s_mov_b32 s14, s10
; GFX7LESS-NEXT: s_mov_b32 s13, s9
; GFX7LESS-NEXT: s_mov_b32 s12, s8
@@ -296,8 +296,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -349,12 +349,12 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
;
; GFX9-LABEL: global_atomic_fmin_uni_address_div_value_agent_scope_unsafe:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s11
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s11
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-NEXT: s_mov_b32 s12, s8
; GFX9-NEXT: s_add_u32 s8, s34, 44
@@ -370,9 +370,9 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -420,13 +420,13 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
;
; GFX1064-LABEL: global_atomic_fmin_uni_address_div_value_agent_scope_unsafe:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s50, -1
-; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s38, -1
+; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s36, s36, s11
; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_addc_u32 s49, s49, 0
+; GFX1064-NEXT: s_addc_u32 s37, s37, 0
; GFX1064-NEXT: s_mov_b32 s12, s8
; GFX1064-NEXT: s_add_u32 s8, s34, 44
; GFX1064-NEXT: s_mov_b32 s13, s9
@@ -442,8 +442,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-NEXT: s_mov_b32 s32, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -477,13 +477,13 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
;
; GFX1032-LABEL: global_atomic_fmin_uni_address_div_value_agent_scope_unsafe:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s50, -1
-; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s38, -1
+; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s36, s36, s11
; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_addc_u32 s49, s49, 0
+; GFX1032-NEXT: s_addc_u32 s37, s37, 0
; GFX1032-NEXT: s_mov_b32 s12, s8
; GFX1032-NEXT: s_add_u32 s8, s34, 44
; GFX1032-NEXT: s_mov_b32 s13, s9
@@ -499,8 +499,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-NEXT: s_mov_b32 s32, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -635,19 +635,19 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
; GFX7LESS-DPP-LABEL: global_atomic_fmin_uni_address_div_value_agent_scope_unsafe:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5]
@@ -660,11 +660,11 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[36:39], 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0
; GFX7LESS-DPP-NEXT: v_mul_f32_e32 v2, 1.0, v0
; GFX7LESS-DPP-NEXT: .LBB1_1: ; %atomicrmw.start
@@ -675,7 +675,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v1
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v0
-; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc
+; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v1
; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
@@ -687,12 +687,12 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
;
; GFX9-DPP-LABEL: global_atomic_fmin_uni_address_div_value_agent_scope_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s50, -1
-; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11
-; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s38, -1
+; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-DPP-NEXT: s_mov_b32 s12, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44
@@ -708,9 +708,9 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -778,13 +778,13 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
;
; GFX1064-DPP-LABEL: global_atomic_fmin_uni_address_div_value_agent_scope_unsafe:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX1064-DPP-NEXT: s_mov_b32 s12, s8
; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1064-DPP-NEXT: s_mov_b32 s13, s9
@@ -800,8 +800,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -851,13 +851,13 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
;
; GFX1032-DPP-LABEL: global_atomic_fmin_uni_address_div_value_agent_scope_unsafe:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX1032-DPP-NEXT: s_mov_b32 s12, s8
; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1032-DPP-NEXT: s_mov_b32 s13, s9
@@ -873,8 +873,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -1312,12 +1312,12 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
; GFX7LESS-LABEL: global_atomic_fmin_uni_address_div_value_one_as_scope_unsafe:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
-; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
+; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s36, s36, s11
+; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0
; GFX7LESS-NEXT: s_mov_b32 s14, s10
; GFX7LESS-NEXT: s_mov_b32 s13, s9
; GFX7LESS-NEXT: s_mov_b32 s12, s8
@@ -1335,8 +1335,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -1388,12 +1388,12 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
;
; GFX9-LABEL: global_atomic_fmin_uni_address_div_value_one_as_scope_unsafe:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s11
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s11
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-NEXT: s_mov_b32 s12, s8
; GFX9-NEXT: s_add_u32 s8, s34, 44
@@ -1409,9 +1409,9 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -1459,13 +1459,13 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
;
; GFX1064-LABEL: global_atomic_fmin_uni_address_div_value_one_as_scope_unsafe:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s50, -1
-; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s38, -1
+; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s36, s36, s11
; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_addc_u32 s49, s49, 0
+; GFX1064-NEXT: s_addc_u32 s37, s37, 0
; GFX1064-NEXT: s_mov_b32 s12, s8
; GFX1064-NEXT: s_add_u32 s8, s34, 44
; GFX1064-NEXT: s_mov_b32 s13, s9
@@ -1481,8 +1481,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-NEXT: s_mov_b32 s32, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -1516,13 +1516,13 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
;
; GFX1032-LABEL: global_atomic_fmin_uni_address_div_value_one_as_scope_unsafe:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s50, -1
-; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s38, -1
+; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s36, s36, s11
; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_addc_u32 s49, s49, 0
+; GFX1032-NEXT: s_addc_u32 s37, s37, 0
; GFX1032-NEXT: s_mov_b32 s12, s8
; GFX1032-NEXT: s_add_u32 s8, s34, 44
; GFX1032-NEXT: s_mov_b32 s13, s9
@@ -1538,8 +1538,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-NEXT: s_mov_b32 s32, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -1674,19 +1674,19 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
; GFX7LESS-DPP-LABEL: global_atomic_fmin_uni_address_div_value_one_as_scope_unsafe:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5]
@@ -1699,11 +1699,11 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[36:39], 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0
; GFX7LESS-DPP-NEXT: v_mul_f32_e32 v2, 1.0, v0
; GFX7LESS-DPP-NEXT: .LBB3_1: ; %atomicrmw.start
@@ -1714,7 +1714,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v1
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v0
-; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc
+; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v1
; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
@@ -1726,12 +1726,12 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
;
; GFX9-DPP-LABEL: global_atomic_fmin_uni_address_div_value_one_as_scope_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s50, -1
-; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11
-; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s38, -1
+; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-DPP-NEXT: s_mov_b32 s12, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44
@@ -1747,9 +1747,9 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -1817,13 +1817,13 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
;
; GFX1064-DPP-LABEL: global_atomic_fmin_uni_address_div_value_one_as_scope_unsafe:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX1064-DPP-NEXT: s_mov_b32 s12, s8
; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1064-DPP-NEXT: s_mov_b32 s13, s9
@@ -1839,8 +1839,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -1890,13 +1890,13 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
;
; GFX1032-DPP-LABEL: global_atomic_fmin_uni_address_div_value_one_as_scope_unsafe:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX1032-DPP-NEXT: s_mov_b32 s12, s8
; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1032-DPP-NEXT: s_mov_b32 s13, s9
@@ -1912,8 +1912,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -2351,12 +2351,12 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
; GFX7LESS-LABEL: global_atomic_fmin_uni_address_div_value_default_scope_unsafe:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
-; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
+; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s36, s36, s11
+; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0
; GFX7LESS-NEXT: s_mov_b32 s14, s10
; GFX7LESS-NEXT: s_mov_b32 s13, s9
; GFX7LESS-NEXT: s_mov_b32 s12, s8
@@ -2374,8 +2374,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -2427,12 +2427,12 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
;
; GFX9-LABEL: global_atomic_fmin_uni_address_div_value_default_scope_unsafe:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s11
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s11
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-NEXT: s_mov_b32 s12, s8
; GFX9-NEXT: s_add_u32 s8, s34, 44
@@ -2448,9 +2448,9 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -2498,13 +2498,13 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
;
; GFX1064-LABEL: global_atomic_fmin_uni_address_div_value_default_scope_unsafe:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s50, -1
-; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s38, -1
+; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s36, s36, s11
; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_addc_u32 s49, s49, 0
+; GFX1064-NEXT: s_addc_u32 s37, s37, 0
; GFX1064-NEXT: s_mov_b32 s12, s8
; GFX1064-NEXT: s_add_u32 s8, s34, 44
; GFX1064-NEXT: s_mov_b32 s13, s9
@@ -2520,8 +2520,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-NEXT: s_mov_b32 s32, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -2555,13 +2555,13 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
;
; GFX1032-LABEL: global_atomic_fmin_uni_address_div_value_default_scope_unsafe:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s50, -1
-; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s38, -1
+; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s36, s36, s11
; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_addc_u32 s49, s49, 0
+; GFX1032-NEXT: s_addc_u32 s37, s37, 0
; GFX1032-NEXT: s_mov_b32 s12, s8
; GFX1032-NEXT: s_add_u32 s8, s34, 44
; GFX1032-NEXT: s_mov_b32 s13, s9
@@ -2577,8 +2577,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-NEXT: s_mov_b32 s32, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -2713,19 +2713,19 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
; GFX7LESS-DPP-LABEL: global_atomic_fmin_uni_address_div_value_default_scope_unsafe:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5]
@@ -2738,11 +2738,11 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[36:39], 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0
; GFX7LESS-DPP-NEXT: v_mul_f32_e32 v2, 1.0, v0
; GFX7LESS-DPP-NEXT: .LBB5_1: ; %atomicrmw.start
@@ -2753,7 +2753,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v1
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v0
-; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc
+; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v1
; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
@@ -2765,12 +2765,12 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
;
; GFX9-DPP-LABEL: global_atomic_fmin_uni_address_div_value_default_scope_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s50, -1
-; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11
-; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s38, -1
+; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-DPP-NEXT: s_mov_b32 s12, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44
@@ -2786,9 +2786,9 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -2856,13 +2856,13 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
;
; GFX1064-DPP-LABEL: global_atomic_fmin_uni_address_div_value_default_scope_unsafe:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX1064-DPP-NEXT: s_mov_b32 s12, s8
; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1064-DPP-NEXT: s_mov_b32 s13, s9
@@ -2878,8 +2878,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -2929,13 +2929,13 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
;
; GFX1032-DPP-LABEL: global_atomic_fmin_uni_address_div_value_default_scope_unsafe:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX1032-DPP-NEXT: s_mov_b32 s12, s8
; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1032-DPP-NEXT: s_mov_b32 s13, s9
@@ -2951,8 +2951,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -3153,11 +3153,11 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX7LESS-NEXT: s_mov_b32 s51, s8
; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX7LESS-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
-; GFX7LESS-NEXT: s_mov_b64 s[62:63], 0
+; GFX7LESS-NEXT: s_mov_b64 s[54:55], 0
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v3, v0, v1
@@ -3189,7 +3189,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX7LESS-NEXT: s_mov_b32 s12, s51
; GFX7LESS-NEXT: s_mov_b32 s13, s50
@@ -3205,8 +3205,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX7LESS-NEXT: buffer_load_dword v0, off, s[64:67], 0
; GFX7LESS-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX7LESS-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX7LESS-NEXT: s_cbranch_execnz .LBB6_2
; GFX7LESS-NEXT: .LBB6_3:
; GFX7LESS-NEXT: s_endpgm
@@ -3237,10 +3237,10 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX9-NEXT: s_mov_b32 s51, s8
; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX9-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v2, s1
-; GFX9-NEXT: s_mov_b64 s[62:63], 0
+; GFX9-NEXT: s_mov_b64 s[54:55], 0
; GFX9-NEXT: v_mov_b32_e32 v1, s0
; GFX9-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX9-NEXT: .LBB6_2: ; %atomicrmw.start
@@ -3258,7 +3258,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX9-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-NEXT: v_min_f64 v[3:4], v[3:4], 4.0
-; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX9-NEXT: s_mov_b32 s12, s51
; GFX9-NEXT: s_mov_b32 s13, s50
@@ -3281,8 +3281,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX9-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX9-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX9-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX9-NEXT: s_cbranch_execnz .LBB6_2
; GFX9-NEXT: .LBB6_3:
; GFX9-NEXT: s_endpgm
@@ -3312,8 +3312,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX1064-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1064-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX1064-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3]
-; GFX1064-NEXT: s_mov_b64 s[62:63], 0
+; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1064-NEXT: s_mov_b64 s[54:55], 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
@@ -3340,7 +3340,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX1064-NEXT: v_mov_b32_e32 v7, 0
; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1064-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1064-NEXT: s_mov_b32 s12, s51
; GFX1064-NEXT: s_mov_b32 s13, s50
@@ -3358,8 +3358,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX1064-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX1064-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1064-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX1064-NEXT: s_cbranch_execnz .LBB6_2
; GFX1064-NEXT: .LBB6_3:
; GFX1064-NEXT: s_endpgm
@@ -3375,7 +3375,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX1032-NEXT: s_add_u32 s64, s64, s11
; GFX1032-NEXT: s_addc_u32 s65, s65, 0
; GFX1032-NEXT: s_mov_b64 s[48:49], s[0:1]
-; GFX1032-NEXT: s_mov_b32 s62, 0
+; GFX1032-NEXT: s_mov_b32 s54, 0
; GFX1032-NEXT: s_movk_i32 s32, 0x400
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-NEXT: s_cbranch_execz .LBB6_3
@@ -3389,7 +3389,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX1032-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1032-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX1032-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
@@ -3416,7 +3416,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX1032-NEXT: v_mov_b32_e32 v7, 0
; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1032-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1032-NEXT: s_mov_b32 s12, s51
; GFX1032-NEXT: s_mov_b32 s13, s50
@@ -3434,8 +3434,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX1032-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
+; GFX1032-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s54
; GFX1032-NEXT: s_cbranch_execnz .LBB6_2
; GFX1032-NEXT: .LBB6_3:
; GFX1032-NEXT: s_endpgm
@@ -3458,8 +3458,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX1164-NEXT: s_mov_b32 s51, s8
; GFX1164-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1164-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3]
-; GFX1164-NEXT: s_mov_b64 s[62:63], 0
+; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1164-NEXT: s_mov_b64 s[54:55], 0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
@@ -3484,7 +3484,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX1164-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-NEXT: v_mov_b32_e32 v7, 0
; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1164-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1164-NEXT: s_mov_b32 s12, s51
; GFX1164-NEXT: s_mov_b32 s13, s50
@@ -3502,8 +3502,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63]
+; GFX1164-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[54:55]
; GFX1164-NEXT: s_cbranch_execnz .LBB6_2
; GFX1164-NEXT: .LBB6_3:
; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2
@@ -3514,7 +3514,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX1132-NEXT: v_mov_b32_e32 v40, v0
; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1132-NEXT: s_mov_b64 s[48:49], s[0:1]
-; GFX1132-NEXT: s_mov_b32 s62, 0
+; GFX1132-NEXT: s_mov_b32 s54, 0
; GFX1132-NEXT: s_mov_b32 s32, 32
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0
@@ -3526,7 +3526,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX1132-NEXT: s_mov_b32 s51, s13
; GFX1132-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1132-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
@@ -3548,7 +3548,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
; GFX1132-NEXT: v_mov_b32_e32 v7, 0
; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1132-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1132-NEXT: s_mov_b32 s12, s51
; GFX1132-NEXT: s_mov_b32 s13, s50
@@ -3565,8 +3565,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
+; GFX1132-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54
; GFX1132-NEXT: s_cbranch_execnz .LBB6_2
; GFX1132-NEXT: .LBB6_3:
; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2
@@ -3593,11 +3593,11 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[62:63], 0
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[54:55], 0
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v3, v0, v1
@@ -3629,7 +3629,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50
@@ -3645,8 +3645,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[64:67], 0
; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX7LESS-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB6_2
; GFX7LESS-DPP-NEXT: .LBB6_3:
; GFX7LESS-DPP-NEXT: s_endpgm
@@ -3677,10 +3677,10 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX9-DPP-NEXT: s_mov_b32 s51, s8
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s1
-; GFX9-DPP-NEXT: s_mov_b64 s[62:63], 0
+; GFX9-DPP-NEXT: s_mov_b64 s[54:55], 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v1, s0
; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX9-DPP-NEXT: .LBB6_2: ; %atomicrmw.start
@@ -3698,7 +3698,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-DPP-NEXT: v_min_f64 v[3:4], v[3:4], 4.0
-; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX9-DPP-NEXT: s_mov_b32 s12, s51
; GFX9-DPP-NEXT: s_mov_b32 s13, s50
@@ -3721,8 +3721,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX9-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX9-DPP-NEXT: s_cbranch_execnz .LBB6_2
; GFX9-DPP-NEXT: .LBB6_3:
; GFX9-DPP-NEXT: s_endpgm
@@ -3752,8 +3752,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
-; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0
+; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1064-DPP-NEXT: s_mov_b64 s[54:55], 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
@@ -3780,7 +3780,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1064-DPP-NEXT: s_mov_b32 s12, s51
; GFX1064-DPP-NEXT: s_mov_b32 s13, s50
@@ -3798,8 +3798,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX1064-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB6_2
; GFX1064-DPP-NEXT: .LBB6_3:
; GFX1064-DPP-NEXT: s_endpgm
@@ -3815,7 +3815,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX1032-DPP-NEXT: s_add_u32 s64, s64, s11
; GFX1032-DPP-NEXT: s_addc_u32 s65, s65, 0
; GFX1032-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
-; GFX1032-DPP-NEXT: s_mov_b32 s62, 0
+; GFX1032-DPP-NEXT: s_mov_b32 s54, 0
; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB6_3
@@ -3829,7 +3829,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
@@ -3856,7 +3856,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1032-DPP-NEXT: s_mov_b32 s12, s51
; GFX1032-DPP-NEXT: s_mov_b32 s13, s50
@@ -3874,8 +3874,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
+; GFX1032-DPP-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s54
; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB6_2
; GFX1032-DPP-NEXT: .LBB6_3:
; GFX1032-DPP-NEXT: s_endpgm
@@ -3898,8 +3898,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX1164-DPP-NEXT: s_mov_b32 s51, s8
; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
-; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0
+; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1164-DPP-NEXT: s_mov_b64 s[54:55], 0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
@@ -3924,7 +3924,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1164-DPP-NEXT: s_mov_b32 s12, s51
; GFX1164-DPP-NEXT: s_mov_b32 s13, s50
@@ -3942,8 +3942,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63]
+; GFX1164-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[54:55]
; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB6_2
; GFX1164-DPP-NEXT: .LBB6_3:
; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2
@@ -3954,7 +3954,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0
; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1132-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
-; GFX1132-DPP-NEXT: s_mov_b32 s62, 0
+; GFX1132-DPP-NEXT: s_mov_b32 s54, 0
; GFX1132-DPP-NEXT: s_mov_b32 s32, 32
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
@@ -3966,7 +3966,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX1132-DPP-NEXT: s_mov_b32 s51, s13
; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
@@ -3988,7 +3988,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1132-DPP-NEXT: s_mov_b32 s12, s51
; GFX1132-DPP-NEXT: s_mov_b32 s13, s50
@@ -4005,8 +4005,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent
; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
+; GFX1132-DPP-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54
; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB6_2
; GFX1132-DPP-NEXT: .LBB6_3:
; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2
@@ -4019,18 +4019,18 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX7LESS-LABEL: global_atomic_fmin_double_uni_address_div_value_agent_scope_unsafe:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s82, -1
-; GFX7LESS-NEXT: s_mov_b32 s83, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s80, s80, s11
-; GFX7LESS-NEXT: s_addc_u32 s81, s81, 0
+; GFX7LESS-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-NEXT: s_mov_b32 s67, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s64, s64, s11
+; GFX7LESS-NEXT: s_addc_u32 s65, s65, 0
; GFX7LESS-NEXT: s_mov_b32 s33, s10
; GFX7LESS-NEXT: s_mov_b32 s50, s9
; GFX7LESS-NEXT: s_mov_b32 s51, s8
; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX7LESS-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0
@@ -4049,8 +4049,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX7LESS-NEXT: s_mov_b32 s13, s50
; GFX7LESS-NEXT: s_mov_b32 s14, s33
; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -4077,19 +4077,19 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX7LESS-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX7LESS-NEXT: s_cbranch_execz .LBB7_5
; GFX7LESS-NEXT: ; %bb.3:
-; GFX7LESS-NEXT: s_load_dwordx2 s[64:65], s[36:37], 0x9
-; GFX7LESS-NEXT: s_mov_b32 s67, 0xf000
-; GFX7LESS-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-NEXT: s_load_dwordx2 s[52:53], s[36:37], 0x9
+; GFX7LESS-NEXT: s_mov_b32 s55, 0xf000
+; GFX7LESS-NEXT: s_mov_b32 s54, -1
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[64:67], 0
-; GFX7LESS-NEXT: s_mov_b64 s[52:53], 0
+; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[52:55], 0
+; GFX7LESS-NEXT: s_mov_b64 s[54:55], 0
; GFX7LESS-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
; GFX7LESS-NEXT: .LBB7_4: ; %atomicrmw.start
; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7LESS-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-NEXT: v_max_f64 v[2:3], v[0:1], v[0:1]
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:4
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[80:83], 0
+; GFX7LESS-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:4
+; GFX7LESS-NEXT: buffer_store_dword v0, off, s[64:67], 0
; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
; GFX7LESS-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-NEXT: v_min_f64 v[0:1], v[2:3], v[41:42]
@@ -4097,8 +4097,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX7LESS-NEXT: s_getpc_b64 s[0:1]
; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX7LESS-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:12
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[80:83], 0 offset:8
+; GFX7LESS-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12
+; GFX7LESS-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8
; GFX7LESS-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX7LESS-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-NEXT: v_mov_b32_e32 v0, 8
@@ -4108,24 +4108,24 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX7LESS-NEXT: s_mov_b32 s12, s51
; GFX7LESS-NEXT: s_mov_b32 s13, s50
; GFX7LESS-NEXT: s_mov_b32 s14, s33
; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83]
-; GFX7LESS-NEXT: v_mov_b32_e32 v2, s64
-; GFX7LESS-NEXT: v_mov_b32_e32 v3, s65
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX7LESS-NEXT: v_mov_b32_e32 v2, s52
+; GFX7LESS-NEXT: v_mov_b32_e32 v3, s53
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-NEXT: buffer_load_dword v0, off, s[80:83], 0
-; GFX7LESS-NEXT: buffer_load_dword v1, off, s[80:83], 0 offset:4
+; GFX7LESS-NEXT: buffer_load_dword v0, off, s[64:67], 0
+; GFX7LESS-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-NEXT: s_or_b64 s[52:53], vcc, s[52:53]
-; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[52:53]
+; GFX7LESS-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX7LESS-NEXT: s_cbranch_execnz .LBB7_4
; GFX7LESS-NEXT: .LBB7_5:
; GFX7LESS-NEXT: s_endpgm
@@ -4152,7 +4152,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b32 s33, s10
; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7]
-; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX9-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65]
@@ -4192,7 +4192,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX9-NEXT: s_load_dwordx2 s[52:53], s[36:37], 0x24
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
-; GFX9-NEXT: s_mov_b64 s[62:63], 0
+; GFX9-NEXT: s_mov_b64 s[54:55], 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_dwordx2 v[4:5], v0, s[52:53]
; GFX9-NEXT: .LBB7_4: ; %atomicrmw.start
@@ -4210,7 +4210,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX9-NEXT: buffer_store_dword v4, off, s[64:67], 0
; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[41:42]
-; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX9-NEXT: s_mov_b32 s12, s51
; GFX9-NEXT: s_mov_b32 s13, s50
@@ -4233,8 +4233,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX9-NEXT: buffer_load_dword v5, off, s[64:67], 0 offset:4
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX9-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX9-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX9-NEXT: s_cbranch_execnz .LBB7_4
; GFX9-NEXT: .LBB7_5:
; GFX9-NEXT: s_endpgm
@@ -4261,7 +4261,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1064-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1064-NEXT: s_mov_b32 s33, s10
; GFX1064-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65]
@@ -4301,7 +4301,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1064-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24
; GFX1064-NEXT: v_mov_b32_e32 v0, 0
; GFX1064-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
-; GFX1064-NEXT: s_mov_b64 s[62:63], 0
+; GFX1064-NEXT: s_mov_b64 s[54:55], 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: global_load_dwordx2 v[4:5], v0, s[52:53]
; GFX1064-NEXT: .LBB7_4: ; %atomicrmw.start
@@ -4325,7 +4325,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1064-NEXT: v_mov_b32_e32 v7, 0
; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1064-NEXT: s_mov_b32 s12, s51
; GFX1064-NEXT: s_mov_b32 s13, s50
@@ -4343,8 +4343,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1064-NEXT: buffer_load_dword v5, off, s[64:67], 0 offset:4
; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX1064-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1064-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX1064-NEXT: s_cbranch_execnz .LBB7_4
; GFX1064-NEXT: .LBB7_5:
; GFX1064-NEXT: s_endpgm
@@ -4371,7 +4371,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1032-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1032-NEXT: s_mov_b32 s33, s10
; GFX1032-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65]
@@ -4402,7 +4402,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1032-NEXT: s_cbranch_scc1 .LBB7_1
; GFX1032-NEXT: ; %bb.2: ; %ComputeEnd
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1032-NEXT: s_mov_b32 s62, 0
+; GFX1032-NEXT: s_mov_b32 s54, 0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-NEXT: s_xor_b32 s0, exec_lo, s0
@@ -4434,7 +4434,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1032-NEXT: v_mov_b32_e32 v7, 0
; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1032-NEXT: s_mov_b32 s12, s51
; GFX1032-NEXT: s_mov_b32 s13, s50
@@ -4452,8 +4452,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1032-NEXT: buffer_load_dword v5, off, s[64:67], 0 offset:4
; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
+; GFX1032-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s54
; GFX1032-NEXT: s_cbranch_execnz .LBB7_4
; GFX1032-NEXT: .LBB7_5:
; GFX1032-NEXT: s_endpgm
@@ -4481,7 +4481,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1164-NEXT: s_mov_b32 s14, s33
; GFX1164-NEXT: s_mov_b32 s32, 32
; GFX1164-NEXT: v_mov_b32_e32 v40, v0
-; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1164-NEXT: v_mov_b32_e32 v2, 0
@@ -4514,7 +4514,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1164-NEXT: s_load_b64 s[52:53], s[34:35], 0x24
; GFX1164-NEXT: v_mov_b32_e32 v0, 0
; GFX1164-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
-; GFX1164-NEXT: s_mov_b64 s[62:63], 0
+; GFX1164-NEXT: s_mov_b64 s[54:55], 0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: global_load_b64 v[4:5], v0, s[52:53]
; GFX1164-NEXT: s_set_inst_prefetch_distance 0x1
@@ -4535,7 +4535,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1164-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-NEXT: v_mov_b32_e32 v7, 0
; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1164-NEXT: s_mov_b32 s12, s51
; GFX1164-NEXT: s_mov_b32 s13, s50
@@ -4553,8 +4553,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63]
+; GFX1164-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[54:55]
; GFX1164-NEXT: s_cbranch_execnz .LBB7_4
; GFX1164-NEXT: .LBB7_5:
; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2
@@ -4583,7 +4583,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1132-NEXT: s_mov_b32 s32, 32
; GFX1132-NEXT: s_mov_b32 s33, s15
; GFX1132-NEXT: v_mov_b32_e32 v40, v0
-; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-NEXT: v_mov_b32_e32 v2, 0
@@ -4606,7 +4606,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1132-NEXT: s_cbranch_scc1 .LBB7_1
; GFX1132-NEXT: ; %bb.2: ; %ComputeEnd
; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1132-NEXT: s_mov_b32 s62, 0
+; GFX1132-NEXT: s_mov_b32 s54, 0
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0
@@ -4635,7 +4635,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1132-NEXT: v_mov_b32_e32 v3, s53
; GFX1132-NEXT: v_mov_b32_e32 v7, 0
; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1132-NEXT: s_mov_b32 s12, s51
; GFX1132-NEXT: s_mov_b32 s13, s50
@@ -4653,8 +4653,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
+; GFX1132-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54
; GFX1132-NEXT: s_cbranch_execnz .LBB7_4
; GFX1132-NEXT: .LBB7_5:
; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2
@@ -4663,22 +4663,22 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX7LESS-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_agent_scope_unsafe:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s82, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s83, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s80, s80, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s81, s81, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s10
; GFX7LESS-DPP-NEXT: s_mov_b32 s50, s9
; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[64:65], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s55, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s54, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
@@ -4696,27 +4696,27 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[2:3], off, s[64:67], 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[52:53], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[2:3], off, s[52:55], 0
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[54:55], 0
; GFX7LESS-DPP-NEXT: v_max_f64 v[41:42], v[0:1], v[0:1]
; GFX7LESS-DPP-NEXT: .LBB7_1: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_max_f64 v[0:1], v[2:3], v[2:3]
-; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:4
-; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0
+; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:4
+; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
; GFX7LESS-DPP-NEXT: v_min_f64 v[0:1], v[0:1], v[41:42]
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX7LESS-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[80:83], 0 offset:8
+; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12
+; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8
; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v0, 8
@@ -4726,36 +4726,36 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s64
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s65
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s52
+; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[80:83], 0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v3, off, s[80:83], 0 offset:4
+; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dword v3, off, s[64:67], 0 offset:4
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX7LESS-DPP-NEXT: s_or_b64 s[52:53], vcc, s[52:53]
-; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[52:53]
+; GFX7LESS-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB7_1
; GFX7LESS-DPP-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX7LESS-DPP-NEXT: s_endpgm
;
; GFX9-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_agent_scope_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s82, -1
-; GFX9-DPP-NEXT: s_mov_b32 s83, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s80, s80, s11
-; GFX9-DPP-NEXT: s_addc_u32 s81, s81, 0
+; GFX9-DPP-NEXT: s_mov_b32 s68, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s69, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s70, -1
+; GFX9-DPP-NEXT: s_mov_b32 s71, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s68, s68, s11
+; GFX9-DPP-NEXT: s_addc_u32 s69, s69, 0
; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
; GFX9-DPP-NEXT: s_mov_b32 s51, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44
@@ -4770,17 +4770,17 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b32 s33, s10
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
-; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[68:69]
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX9-DPP-NEXT: s_mov_b32 s12, s51
; GFX9-DPP-NEXT: s_mov_b32 s13, s50
; GFX9-DPP-NEXT: s_mov_b32 s14, s33
; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[70:71]
; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -4841,10 +4841,10 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9-DPP-NEXT: s_cbranch_execz .LBB7_3
; GFX9-DPP-NEXT: ; %bb.1:
-; GFX9-DPP-NEXT: s_load_dwordx2 s[62:63], s[36:37], 0x24
+; GFX9-DPP-NEXT: s_load_dwordx2 s[54:55], s[36:37], 0x24
; GFX9-DPP-NEXT: s_mov_b64 s[64:65], 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[62:63]
+; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[54:55]
; GFX9-DPP-NEXT: .LBB7_2: ; %atomicrmw.start
; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-DPP-NEXT: v_max_f64 v[3:4], s[52:53], s[52:53]
@@ -4856,32 +4856,32 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
-; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:4
-; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[68:69]
+; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[68:71], 0 offset:4
+; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[68:71], 0
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-DPP-NEXT: v_min_f64 v[3:4], v[5:6], v[3:4]
-; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX9-DPP-NEXT: s_mov_b32 s12, s51
; GFX9-DPP-NEXT: s_mov_b32 s13, s50
; GFX9-DPP-NEXT: s_mov_b32 s14, s33
; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[80:83], 0 offset:12
-; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:8
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
+; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[68:71], 0 offset:12
+; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[68:71], 0 offset:8
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[70:71]
; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s62
-; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s63
+; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s54
+; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s55
; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[80:83], 0
-; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[80:83], 0 offset:4
+; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[68:71], 0
+; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[68:71], 0 offset:4
; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
; GFX9-DPP-NEXT: s_or_b64 s[64:65], vcc, s[64:65]
@@ -4912,7 +4912,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1064-DPP-NEXT: s_mov_b32 s33, s10
; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
@@ -4976,7 +4976,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1064-DPP-NEXT: s_cbranch_execz .LBB7_3
; GFX1064-DPP-NEXT: ; %bb.1:
; GFX1064-DPP-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24
-; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0
+; GFX1064-DPP-NEXT: s_mov_b64 s[54:55], 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53]
; GFX1064-DPP-NEXT: .LBB7_2: ; %atomicrmw.start
@@ -4999,7 +4999,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1064-DPP-NEXT: s_mov_b32 s12, s51
; GFX1064-DPP-NEXT: s_mov_b32 s13, s50
@@ -5019,8 +5019,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX1064-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB7_2
; GFX1064-DPP-NEXT: .LBB7_3:
; GFX1064-DPP-NEXT: s_endpgm
@@ -5047,7 +5047,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1032-DPP-NEXT: s_mov_b32 s33, s10
; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
@@ -5098,7 +5098,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, v8
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, v9
-; GFX1032-DPP-NEXT: s_mov_b32 s62, 0
+; GFX1032-DPP-NEXT: s_mov_b32 s54, 0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB7_3
@@ -5128,7 +5128,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1032-DPP-NEXT: s_mov_b32 s12, s51
; GFX1032-DPP-NEXT: s_mov_b32 s13, s50
@@ -5146,8 +5146,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
+; GFX1032-DPP-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s54
; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB7_2
; GFX1032-DPP-NEXT: .LBB7_3:
; GFX1032-DPP-NEXT: s_endpgm
@@ -5175,7 +5175,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1164-DPP-NEXT: s_mov_b32 s14, s33
; GFX1164-DPP-NEXT: s_mov_b32 s32, 32
; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0
-; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -5241,7 +5241,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1164-DPP-NEXT: ; %bb.1:
; GFX1164-DPP-NEXT: s_load_b64 s[52:53], s[34:35], 0x24
; GFX1164-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4]
-; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0
+; GFX1164-DPP-NEXT: s_mov_b64 s[54:55], 0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[52:53]
; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x1
@@ -5262,7 +5262,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1164-DPP-NEXT: s_mov_b32 s12, s51
; GFX1164-DPP-NEXT: s_mov_b32 s13, s50
@@ -5280,8 +5280,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63]
+; GFX1164-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[54:55]
; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB7_2
; GFX1164-DPP-NEXT: .LBB7_3:
; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2
@@ -5310,7 +5310,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1132-DPP-NEXT: s_mov_b32 s32, 32
; GFX1132-DPP-NEXT: s_mov_b32 s33, s15
; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0
-; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
@@ -5356,7 +5356,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1132-DPP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v3, v8
; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
; GFX1132-DPP-NEXT: v_mov_b32_e32 v4, v9
-; GFX1132-DPP-NEXT: s_mov_b32 s62, 0
+; GFX1132-DPP-NEXT: s_mov_b32 s54, 0
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1
@@ -5382,7 +5382,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1132-DPP-NEXT: s_mov_b32 s12, s51
; GFX1132-DPP-NEXT: s_mov_b32 s13, s50
@@ -5399,8 +5399,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
+; GFX1132-DPP-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54
; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB7_2
; GFX1132-DPP-NEXT: .LBB7_3:
; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2
@@ -5750,12 +5750,12 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
; GFX7LESS-LABEL: global_atomic_fmin_double_uni_address_div_value_one_as_scope_unsafe:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
-; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
+; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s36, s36, s11
+; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0
; GFX7LESS-NEXT: s_mov_b32 s14, s10
; GFX7LESS-NEXT: s_mov_b32 s13, s9
; GFX7LESS-NEXT: s_mov_b32 s12, s8
@@ -5773,8 +5773,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -5831,12 +5831,12 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
;
; GFX9-LABEL: global_atomic_fmin_double_uni_address_div_value_one_as_scope_unsafe:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s11
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s11
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-NEXT: s_mov_b32 s12, s8
; GFX9-NEXT: s_add_u32 s8, s34, 44
@@ -5852,9 +5852,9 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -5905,13 +5905,13 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
;
; GFX1064-LABEL: global_atomic_fmin_double_uni_address_div_value_one_as_scope_unsafe:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s50, -1
-; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s38, -1
+; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s36, s36, s11
; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_addc_u32 s49, s49, 0
+; GFX1064-NEXT: s_addc_u32 s37, s37, 0
; GFX1064-NEXT: s_mov_b32 s12, s8
; GFX1064-NEXT: s_add_u32 s8, s34, 44
; GFX1064-NEXT: s_mov_b32 s13, s9
@@ -5927,8 +5927,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-NEXT: s_mov_b32 s32, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -5964,13 +5964,13 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
;
; GFX1032-LABEL: global_atomic_fmin_double_uni_address_div_value_one_as_scope_unsafe:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s50, -1
-; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s38, -1
+; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s36, s36, s11
; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_addc_u32 s49, s49, 0
+; GFX1032-NEXT: s_addc_u32 s37, s37, 0
; GFX1032-NEXT: s_mov_b32 s12, s8
; GFX1032-NEXT: s_add_u32 s8, s34, 44
; GFX1032-NEXT: s_mov_b32 s13, s9
@@ -5986,8 +5986,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-NEXT: s_mov_b32 s32, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -6162,19 +6162,19 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
; GFX7LESS-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_one_as_scope_unsafe:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5]
@@ -6187,11 +6187,11 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[2:3], off, s[48:51], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[2:3], off, s[36:39], 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0
; GFX7LESS-DPP-NEXT: v_max_f64 v[4:5], v[0:1], v[0:1]
; GFX7LESS-DPP-NEXT: .LBB9_1: ; %atomicrmw.start
@@ -6204,7 +6204,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v8, v2
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, v1
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, v0
-; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[48:51], 0 glc
+; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[36:39], 0 glc
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_cmp_eq_u64_e32 vcc, v[6:7], v[2:3]
; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
@@ -6217,12 +6217,12 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
;
; GFX9-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_one_as_scope_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s50, -1
-; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11
-; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s38, -1
+; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-DPP-NEXT: s_mov_b32 s12, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44
@@ -6238,9 +6238,9 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -6325,13 +6325,13 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
;
; GFX1064-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_one_as_scope_unsafe:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX1064-DPP-NEXT: s_mov_b32 s12, s8
; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1064-DPP-NEXT: s_mov_b32 s13, s9
@@ -6347,8 +6347,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -6409,13 +6409,13 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
;
; GFX1032-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_one_as_scope_unsafe:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX1032-DPP-NEXT: s_mov_b32 s12, s8
; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1032-DPP-NEXT: s_mov_b32 s13, s9
@@ -6431,8 +6431,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -6701,11 +6701,11 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX7LESS-NEXT: s_mov_b32 s51, s8
; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX7LESS-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
-; GFX7LESS-NEXT: s_mov_b64 s[62:63], 0
+; GFX7LESS-NEXT: s_mov_b64 s[54:55], 0
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v3, v0, v1
@@ -6737,7 +6737,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX7LESS-NEXT: s_mov_b32 s12, s51
; GFX7LESS-NEXT: s_mov_b32 s13, s50
@@ -6753,8 +6753,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX7LESS-NEXT: buffer_load_dword v0, off, s[64:67], 0
; GFX7LESS-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX7LESS-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX7LESS-NEXT: s_cbranch_execnz .LBB10_2
; GFX7LESS-NEXT: .LBB10_3:
; GFX7LESS-NEXT: s_endpgm
@@ -6785,10 +6785,10 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX9-NEXT: s_mov_b32 s51, s8
; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX9-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v2, s1
-; GFX9-NEXT: s_mov_b64 s[62:63], 0
+; GFX9-NEXT: s_mov_b64 s[54:55], 0
; GFX9-NEXT: v_mov_b32_e32 v1, s0
; GFX9-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX9-NEXT: .LBB10_2: ; %atomicrmw.start
@@ -6806,7 +6806,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX9-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-NEXT: v_min_f64 v[3:4], v[3:4], 4.0
-; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX9-NEXT: s_mov_b32 s12, s51
; GFX9-NEXT: s_mov_b32 s13, s50
@@ -6829,8 +6829,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX9-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX9-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX9-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX9-NEXT: s_cbranch_execnz .LBB10_2
; GFX9-NEXT: .LBB10_3:
; GFX9-NEXT: s_endpgm
@@ -6860,8 +6860,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX1064-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1064-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX1064-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3]
-; GFX1064-NEXT: s_mov_b64 s[62:63], 0
+; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1064-NEXT: s_mov_b64 s[54:55], 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
@@ -6888,7 +6888,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX1064-NEXT: v_mov_b32_e32 v7, 0
; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1064-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1064-NEXT: s_mov_b32 s12, s51
; GFX1064-NEXT: s_mov_b32 s13, s50
@@ -6906,8 +6906,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX1064-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX1064-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1064-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX1064-NEXT: s_cbranch_execnz .LBB10_2
; GFX1064-NEXT: .LBB10_3:
; GFX1064-NEXT: s_endpgm
@@ -6923,7 +6923,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX1032-NEXT: s_add_u32 s64, s64, s11
; GFX1032-NEXT: s_addc_u32 s65, s65, 0
; GFX1032-NEXT: s_mov_b64 s[48:49], s[0:1]
-; GFX1032-NEXT: s_mov_b32 s62, 0
+; GFX1032-NEXT: s_mov_b32 s54, 0
; GFX1032-NEXT: s_movk_i32 s32, 0x400
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-NEXT: s_cbranch_execz .LBB10_3
@@ -6937,7 +6937,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX1032-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1032-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX1032-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
@@ -6964,7 +6964,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX1032-NEXT: v_mov_b32_e32 v7, 0
; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1032-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1032-NEXT: s_mov_b32 s12, s51
; GFX1032-NEXT: s_mov_b32 s13, s50
@@ -6982,8 +6982,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX1032-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
+; GFX1032-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s54
; GFX1032-NEXT: s_cbranch_execnz .LBB10_2
; GFX1032-NEXT: .LBB10_3:
; GFX1032-NEXT: s_endpgm
@@ -7006,8 +7006,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX1164-NEXT: s_mov_b32 s51, s8
; GFX1164-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1164-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3]
-; GFX1164-NEXT: s_mov_b64 s[62:63], 0
+; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1164-NEXT: s_mov_b64 s[54:55], 0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
@@ -7032,7 +7032,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX1164-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-NEXT: v_mov_b32_e32 v7, 0
; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1164-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1164-NEXT: s_mov_b32 s12, s51
; GFX1164-NEXT: s_mov_b32 s13, s50
@@ -7050,8 +7050,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63]
+; GFX1164-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[54:55]
; GFX1164-NEXT: s_cbranch_execnz .LBB10_2
; GFX1164-NEXT: .LBB10_3:
; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2
@@ -7062,7 +7062,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX1132-NEXT: v_mov_b32_e32 v40, v0
; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1132-NEXT: s_mov_b64 s[48:49], s[0:1]
-; GFX1132-NEXT: s_mov_b32 s62, 0
+; GFX1132-NEXT: s_mov_b32 s54, 0
; GFX1132-NEXT: s_mov_b32 s32, 32
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0
@@ -7074,7 +7074,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX1132-NEXT: s_mov_b32 s51, s13
; GFX1132-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1132-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
@@ -7096,7 +7096,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
; GFX1132-NEXT: v_mov_b32_e32 v7, 0
; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1132-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1132-NEXT: s_mov_b32 s12, s51
; GFX1132-NEXT: s_mov_b32 s13, s50
@@ -7113,8 +7113,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
+; GFX1132-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54
; GFX1132-NEXT: s_cbranch_execnz .LBB10_2
; GFX1132-NEXT: .LBB10_3:
; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2
@@ -7141,11 +7141,11 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[62:63], 0
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[54:55], 0
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v3, v0, v1
@@ -7177,7 +7177,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50
@@ -7193,8 +7193,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[64:67], 0
; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX7LESS-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB10_2
; GFX7LESS-DPP-NEXT: .LBB10_3:
; GFX7LESS-DPP-NEXT: s_endpgm
@@ -7225,10 +7225,10 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX9-DPP-NEXT: s_mov_b32 s51, s8
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s1
-; GFX9-DPP-NEXT: s_mov_b64 s[62:63], 0
+; GFX9-DPP-NEXT: s_mov_b64 s[54:55], 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v1, s0
; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX9-DPP-NEXT: .LBB10_2: ; %atomicrmw.start
@@ -7246,7 +7246,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-DPP-NEXT: v_min_f64 v[3:4], v[3:4], 4.0
-; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX9-DPP-NEXT: s_mov_b32 s12, s51
; GFX9-DPP-NEXT: s_mov_b32 s13, s50
@@ -7269,8 +7269,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX9-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX9-DPP-NEXT: s_cbranch_execnz .LBB10_2
; GFX9-DPP-NEXT: .LBB10_3:
; GFX9-DPP-NEXT: s_endpgm
@@ -7300,8 +7300,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
-; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0
+; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1064-DPP-NEXT: s_mov_b64 s[54:55], 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
@@ -7328,7 +7328,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1064-DPP-NEXT: s_mov_b32 s12, s51
; GFX1064-DPP-NEXT: s_mov_b32 s13, s50
@@ -7346,8 +7346,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX1064-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB10_2
; GFX1064-DPP-NEXT: .LBB10_3:
; GFX1064-DPP-NEXT: s_endpgm
@@ -7363,7 +7363,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX1032-DPP-NEXT: s_add_u32 s64, s64, s11
; GFX1032-DPP-NEXT: s_addc_u32 s65, s65, 0
; GFX1032-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
-; GFX1032-DPP-NEXT: s_mov_b32 s62, 0
+; GFX1032-DPP-NEXT: s_mov_b32 s54, 0
; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB10_3
@@ -7377,7 +7377,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
@@ -7404,7 +7404,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1032-DPP-NEXT: s_mov_b32 s12, s51
; GFX1032-DPP-NEXT: s_mov_b32 s13, s50
@@ -7422,8 +7422,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
+; GFX1032-DPP-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s54
; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB10_2
; GFX1032-DPP-NEXT: .LBB10_3:
; GFX1032-DPP-NEXT: s_endpgm
@@ -7446,8 +7446,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX1164-DPP-NEXT: s_mov_b32 s51, s8
; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
-; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0
+; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1164-DPP-NEXT: s_mov_b64 s[54:55], 0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
@@ -7472,7 +7472,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1164-DPP-NEXT: s_mov_b32 s12, s51
; GFX1164-DPP-NEXT: s_mov_b32 s13, s50
@@ -7490,8 +7490,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63]
+; GFX1164-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[54:55]
; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB10_2
; GFX1164-DPP-NEXT: .LBB10_3:
; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2
@@ -7502,7 +7502,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0
; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
; GFX1132-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
-; GFX1132-DPP-NEXT: s_mov_b32 s62, 0
+; GFX1132-DPP-NEXT: s_mov_b32 s54, 0
; GFX1132-DPP-NEXT: s_mov_b32 s32, 32
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
@@ -7514,7 +7514,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX1132-DPP-NEXT: s_mov_b32 s51, s13
; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
@@ -7536,7 +7536,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1132-DPP-NEXT: s_mov_b32 s12, s51
; GFX1132-DPP-NEXT: s_mov_b32 s13, s50
@@ -7553,8 +7553,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau
; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
+; GFX1132-DPP-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54
; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB10_2
; GFX1132-DPP-NEXT: .LBB10_3:
; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2
@@ -7567,18 +7567,18 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX7LESS-LABEL: global_atomic_fmin_double_uni_address_div_value_default_scope_unsafe:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s82, -1
-; GFX7LESS-NEXT: s_mov_b32 s83, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s80, s80, s11
-; GFX7LESS-NEXT: s_addc_u32 s81, s81, 0
+; GFX7LESS-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-NEXT: s_mov_b32 s67, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s64, s64, s11
+; GFX7LESS-NEXT: s_addc_u32 s65, s65, 0
; GFX7LESS-NEXT: s_mov_b32 s33, s10
; GFX7LESS-NEXT: s_mov_b32 s50, s9
; GFX7LESS-NEXT: s_mov_b32 s51, s8
; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX7LESS-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0
@@ -7597,8 +7597,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX7LESS-NEXT: s_mov_b32 s13, s50
; GFX7LESS-NEXT: s_mov_b32 s14, s33
; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -7625,19 +7625,19 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX7LESS-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX7LESS-NEXT: s_cbranch_execz .LBB11_5
; GFX7LESS-NEXT: ; %bb.3:
-; GFX7LESS-NEXT: s_load_dwordx2 s[64:65], s[36:37], 0x9
-; GFX7LESS-NEXT: s_mov_b32 s67, 0xf000
-; GFX7LESS-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-NEXT: s_load_dwordx2 s[52:53], s[36:37], 0x9
+; GFX7LESS-NEXT: s_mov_b32 s55, 0xf000
+; GFX7LESS-NEXT: s_mov_b32 s54, -1
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[64:67], 0
-; GFX7LESS-NEXT: s_mov_b64 s[52:53], 0
+; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[52:55], 0
+; GFX7LESS-NEXT: s_mov_b64 s[54:55], 0
; GFX7LESS-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
; GFX7LESS-NEXT: .LBB11_4: ; %atomicrmw.start
; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7LESS-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-NEXT: v_max_f64 v[2:3], v[0:1], v[0:1]
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:4
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[80:83], 0
+; GFX7LESS-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:4
+; GFX7LESS-NEXT: buffer_store_dword v0, off, s[64:67], 0
; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
; GFX7LESS-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-NEXT: v_min_f64 v[0:1], v[2:3], v[41:42]
@@ -7645,8 +7645,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX7LESS-NEXT: s_getpc_b64 s[0:1]
; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX7LESS-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:12
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[80:83], 0 offset:8
+; GFX7LESS-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12
+; GFX7LESS-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8
; GFX7LESS-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX7LESS-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-NEXT: v_mov_b32_e32 v0, 8
@@ -7656,24 +7656,24 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX7LESS-NEXT: s_mov_b32 s12, s51
; GFX7LESS-NEXT: s_mov_b32 s13, s50
; GFX7LESS-NEXT: s_mov_b32 s14, s33
; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83]
-; GFX7LESS-NEXT: v_mov_b32_e32 v2, s64
-; GFX7LESS-NEXT: v_mov_b32_e32 v3, s65
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX7LESS-NEXT: v_mov_b32_e32 v2, s52
+; GFX7LESS-NEXT: v_mov_b32_e32 v3, s53
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-NEXT: buffer_load_dword v0, off, s[80:83], 0
-; GFX7LESS-NEXT: buffer_load_dword v1, off, s[80:83], 0 offset:4
+; GFX7LESS-NEXT: buffer_load_dword v0, off, s[64:67], 0
+; GFX7LESS-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-NEXT: s_or_b64 s[52:53], vcc, s[52:53]
-; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[52:53]
+; GFX7LESS-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX7LESS-NEXT: s_cbranch_execnz .LBB11_4
; GFX7LESS-NEXT: .LBB11_5:
; GFX7LESS-NEXT: s_endpgm
@@ -7700,7 +7700,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b32 s33, s10
; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7]
-; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX9-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65]
@@ -7740,7 +7740,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX9-NEXT: s_load_dwordx2 s[52:53], s[36:37], 0x24
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
-; GFX9-NEXT: s_mov_b64 s[62:63], 0
+; GFX9-NEXT: s_mov_b64 s[54:55], 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_dwordx2 v[4:5], v0, s[52:53]
; GFX9-NEXT: .LBB11_4: ; %atomicrmw.start
@@ -7758,7 +7758,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX9-NEXT: buffer_store_dword v4, off, s[64:67], 0
; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[41:42]
-; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX9-NEXT: s_mov_b32 s12, s51
; GFX9-NEXT: s_mov_b32 s13, s50
@@ -7781,8 +7781,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX9-NEXT: buffer_load_dword v5, off, s[64:67], 0 offset:4
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX9-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX9-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX9-NEXT: s_cbranch_execnz .LBB11_4
; GFX9-NEXT: .LBB11_5:
; GFX9-NEXT: s_endpgm
@@ -7809,7 +7809,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1064-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1064-NEXT: s_mov_b32 s33, s10
; GFX1064-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65]
@@ -7849,7 +7849,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1064-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24
; GFX1064-NEXT: v_mov_b32_e32 v0, 0
; GFX1064-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
-; GFX1064-NEXT: s_mov_b64 s[62:63], 0
+; GFX1064-NEXT: s_mov_b64 s[54:55], 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: global_load_dwordx2 v[4:5], v0, s[52:53]
; GFX1064-NEXT: .LBB11_4: ; %atomicrmw.start
@@ -7873,7 +7873,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1064-NEXT: v_mov_b32_e32 v7, 0
; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1064-NEXT: s_mov_b32 s12, s51
; GFX1064-NEXT: s_mov_b32 s13, s50
@@ -7891,8 +7891,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1064-NEXT: buffer_load_dword v5, off, s[64:67], 0 offset:4
; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX1064-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1064-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX1064-NEXT: s_cbranch_execnz .LBB11_4
; GFX1064-NEXT: .LBB11_5:
; GFX1064-NEXT: s_endpgm
@@ -7919,7 +7919,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1032-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1032-NEXT: s_mov_b32 s33, s10
; GFX1032-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65]
@@ -7950,7 +7950,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1032-NEXT: s_cbranch_scc1 .LBB11_1
; GFX1032-NEXT: ; %bb.2: ; %ComputeEnd
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1032-NEXT: s_mov_b32 s62, 0
+; GFX1032-NEXT: s_mov_b32 s54, 0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-NEXT: s_xor_b32 s0, exec_lo, s0
@@ -7982,7 +7982,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1032-NEXT: v_mov_b32_e32 v7, 0
; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1032-NEXT: s_mov_b32 s12, s51
; GFX1032-NEXT: s_mov_b32 s13, s50
@@ -8000,8 +8000,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1032-NEXT: buffer_load_dword v5, off, s[64:67], 0 offset:4
; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
+; GFX1032-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s54
; GFX1032-NEXT: s_cbranch_execnz .LBB11_4
; GFX1032-NEXT: .LBB11_5:
; GFX1032-NEXT: s_endpgm
@@ -8029,7 +8029,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1164-NEXT: s_mov_b32 s14, s33
; GFX1164-NEXT: s_mov_b32 s32, 32
; GFX1164-NEXT: v_mov_b32_e32 v40, v0
-; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1164-NEXT: v_mov_b32_e32 v2, 0
@@ -8062,7 +8062,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1164-NEXT: s_load_b64 s[52:53], s[34:35], 0x24
; GFX1164-NEXT: v_mov_b32_e32 v0, 0
; GFX1164-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3]
-; GFX1164-NEXT: s_mov_b64 s[62:63], 0
+; GFX1164-NEXT: s_mov_b64 s[54:55], 0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: global_load_b64 v[4:5], v0, s[52:53]
; GFX1164-NEXT: s_set_inst_prefetch_distance 0x1
@@ -8083,7 +8083,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1164-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-NEXT: v_mov_b32_e32 v7, 0
; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1164-NEXT: s_mov_b32 s12, s51
; GFX1164-NEXT: s_mov_b32 s13, s50
@@ -8101,8 +8101,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63]
+; GFX1164-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[54:55]
; GFX1164-NEXT: s_cbranch_execnz .LBB11_4
; GFX1164-NEXT: .LBB11_5:
; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2
@@ -8131,7 +8131,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1132-NEXT: s_mov_b32 s32, 32
; GFX1132-NEXT: s_mov_b32 s33, s15
; GFX1132-NEXT: v_mov_b32_e32 v40, v0
-; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-NEXT: v_mov_b32_e32 v2, 0
@@ -8154,7 +8154,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1132-NEXT: s_cbranch_scc1 .LBB11_1
; GFX1132-NEXT: ; %bb.2: ; %ComputeEnd
; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1132-NEXT: s_mov_b32 s62, 0
+; GFX1132-NEXT: s_mov_b32 s54, 0
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0
@@ -8183,7 +8183,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1132-NEXT: v_mov_b32_e32 v3, s53
; GFX1132-NEXT: v_mov_b32_e32 v7, 0
; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1132-NEXT: s_mov_b32 s12, s51
; GFX1132-NEXT: s_mov_b32 s13, s50
@@ -8201,8 +8201,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
+; GFX1132-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54
; GFX1132-NEXT: s_cbranch_execnz .LBB11_4
; GFX1132-NEXT: .LBB11_5:
; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2
@@ -8211,22 +8211,22 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX7LESS-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_default_scope_unsafe:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s82, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s83, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s80, s80, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s81, s81, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s10
; GFX7LESS-DPP-NEXT: s_mov_b32 s50, s9
; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[64:65], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s55, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s54, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
@@ -8244,27 +8244,27 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[2:3], off, s[64:67], 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[52:53], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[2:3], off, s[52:55], 0
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[54:55], 0
; GFX7LESS-DPP-NEXT: v_max_f64 v[41:42], v[0:1], v[0:1]
; GFX7LESS-DPP-NEXT: .LBB11_1: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_max_f64 v[0:1], v[2:3], v[2:3]
-; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:4
-; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0
+; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:4
+; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
; GFX7LESS-DPP-NEXT: v_min_f64 v[0:1], v[0:1], v[41:42]
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX7LESS-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[80:83], 0 offset:8
+; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12
+; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8
; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v0, 8
@@ -8274,36 +8274,36 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s64
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s65
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s52
+; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-DPP-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[80:83], 0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v3, off, s[80:83], 0 offset:4
+; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dword v3, off, s[64:67], 0 offset:4
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX7LESS-DPP-NEXT: s_or_b64 s[52:53], vcc, s[52:53]
-; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[52:53]
+; GFX7LESS-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB11_1
; GFX7LESS-DPP-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX7LESS-DPP-NEXT: s_endpgm
;
; GFX9-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_default_scope_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s82, -1
-; GFX9-DPP-NEXT: s_mov_b32 s83, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s80, s80, s11
-; GFX9-DPP-NEXT: s_addc_u32 s81, s81, 0
+; GFX9-DPP-NEXT: s_mov_b32 s68, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s69, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s70, -1
+; GFX9-DPP-NEXT: s_mov_b32 s71, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s68, s68, s11
+; GFX9-DPP-NEXT: s_addc_u32 s69, s69, 0
; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
; GFX9-DPP-NEXT: s_mov_b32 s51, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44
@@ -8318,17 +8318,17 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b32 s33, s10
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
-; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[68:69]
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX9-DPP-NEXT: s_mov_b32 s12, s51
; GFX9-DPP-NEXT: s_mov_b32 s13, s50
; GFX9-DPP-NEXT: s_mov_b32 s14, s33
; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[70:71]
; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -8389,10 +8389,10 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9-DPP-NEXT: s_cbranch_execz .LBB11_3
; GFX9-DPP-NEXT: ; %bb.1:
-; GFX9-DPP-NEXT: s_load_dwordx2 s[62:63], s[36:37], 0x24
+; GFX9-DPP-NEXT: s_load_dwordx2 s[54:55], s[36:37], 0x24
; GFX9-DPP-NEXT: s_mov_b64 s[64:65], 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[62:63]
+; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[54:55]
; GFX9-DPP-NEXT: .LBB11_2: ; %atomicrmw.start
; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-DPP-NEXT: v_max_f64 v[3:4], s[52:53], s[52:53]
@@ -8404,32 +8404,32 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
-; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:4
-; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[68:69]
+; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[68:71], 0 offset:4
+; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[68:71], 0
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-DPP-NEXT: v_min_f64 v[3:4], v[5:6], v[3:4]
-; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX9-DPP-NEXT: s_mov_b32 s12, s51
; GFX9-DPP-NEXT: s_mov_b32 s13, s50
; GFX9-DPP-NEXT: s_mov_b32 s14, s33
; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[80:83], 0 offset:12
-; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:8
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
+; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[68:71], 0 offset:12
+; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[68:71], 0 offset:8
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[70:71]
; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s62
-; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s63
+; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s54
+; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s55
; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[80:83], 0
-; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[80:83], 0 offset:4
+; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[68:71], 0
+; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[68:71], 0 offset:4
; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
; GFX9-DPP-NEXT: s_or_b64 s[64:65], vcc, s[64:65]
@@ -8460,7 +8460,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1064-DPP-NEXT: s_mov_b32 s33, s10
; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
@@ -8524,7 +8524,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1064-DPP-NEXT: s_cbranch_execz .LBB11_3
; GFX1064-DPP-NEXT: ; %bb.1:
; GFX1064-DPP-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24
-; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0
+; GFX1064-DPP-NEXT: s_mov_b64 s[54:55], 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53]
; GFX1064-DPP-NEXT: .LBB11_2: ; %atomicrmw.start
@@ -8547,7 +8547,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1064-DPP-NEXT: s_mov_b32 s12, s51
; GFX1064-DPP-NEXT: s_mov_b32 s13, s50
@@ -8567,8 +8567,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX1064-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB11_2
; GFX1064-DPP-NEXT: .LBB11_3:
; GFX1064-DPP-NEXT: s_endpgm
@@ -8595,7 +8595,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1032-DPP-NEXT: s_mov_b32 s33, s10
; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
@@ -8646,7 +8646,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, v8
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, v9
-; GFX1032-DPP-NEXT: s_mov_b32 s62, 0
+; GFX1032-DPP-NEXT: s_mov_b32 s54, 0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB11_3
@@ -8676,7 +8676,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1032-DPP-NEXT: s_mov_b32 s12, s51
; GFX1032-DPP-NEXT: s_mov_b32 s13, s50
@@ -8694,8 +8694,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
+; GFX1032-DPP-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s54
; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB11_2
; GFX1032-DPP-NEXT: .LBB11_3:
; GFX1032-DPP-NEXT: s_endpgm
@@ -8723,7 +8723,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1164-DPP-NEXT: s_mov_b32 s14, s33
; GFX1164-DPP-NEXT: s_mov_b32 s32, 32
; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0
-; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -8789,7 +8789,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1164-DPP-NEXT: ; %bb.1:
; GFX1164-DPP-NEXT: s_load_b64 s[52:53], s[34:35], 0x24
; GFX1164-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4]
-; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0
+; GFX1164-DPP-NEXT: s_mov_b64 s[54:55], 0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[52:53]
; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x1
@@ -8810,7 +8810,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1164-DPP-NEXT: s_mov_b32 s12, s51
; GFX1164-DPP-NEXT: s_mov_b32 s13, s50
@@ -8828,8 +8828,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63]
+; GFX1164-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[54:55]
; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB11_2
; GFX1164-DPP-NEXT: .LBB11_3:
; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2
@@ -8858,7 +8858,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1132-DPP-NEXT: s_mov_b32 s32, 32
; GFX1132-DPP-NEXT: s_mov_b32 s33, s15
; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0
-; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
@@ -8904,7 +8904,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1132-DPP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v3, v8
; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
; GFX1132-DPP-NEXT: v_mov_b32_e32 v4, v9
-; GFX1132-DPP-NEXT: s_mov_b32 s62, 0
+; GFX1132-DPP-NEXT: s_mov_b32 s54, 0
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1
@@ -8930,7 +8930,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1132-DPP-NEXT: s_mov_b32 s12, s51
; GFX1132-DPP-NEXT: s_mov_b32 s13, s50
@@ -8947,8 +8947,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
+; GFX1132-DPP-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54
; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB11_2
; GFX1132-DPP-NEXT: .LBB11_3:
; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll
index 416ce5a031810..d575605f102b7 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll
@@ -425,12 +425,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX7LESS-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_align4_unsafe:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
-; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
+; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s36, s36, s11
+; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0
; GFX7LESS-NEXT: s_mov_b32 s14, s10
; GFX7LESS-NEXT: s_mov_b32 s13, s9
; GFX7LESS-NEXT: s_mov_b32 s12, s8
@@ -448,8 +448,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -497,12 +497,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
;
; GFX9-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_align4_unsafe:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s11
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s11
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-NEXT: s_mov_b32 s12, s8
; GFX9-NEXT: s_add_u32 s8, s34, 44
@@ -518,9 +518,9 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -564,13 +564,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
;
; GFX1064-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_align4_unsafe:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s50, -1
-; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s38, -1
+; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s36, s36, s11
; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_addc_u32 s49, s49, 0
+; GFX1064-NEXT: s_addc_u32 s37, s37, 0
; GFX1064-NEXT: s_mov_b32 s12, s8
; GFX1064-NEXT: s_add_u32 s8, s34, 44
; GFX1064-NEXT: s_mov_b32 s13, s9
@@ -586,8 +586,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-NEXT: s_mov_b32 s32, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -631,13 +631,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
;
; GFX1032-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_align4_unsafe:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s50, -1
-; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s38, -1
+; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s36, s36, s11
; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_addc_u32 s49, s49, 0
+; GFX1032-NEXT: s_addc_u32 s37, s37, 0
; GFX1032-NEXT: s_mov_b32 s12, s8
; GFX1032-NEXT: s_add_u32 s8, s34, 44
; GFX1032-NEXT: s_mov_b32 s13, s9
@@ -653,8 +653,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-NEXT: s_mov_b32 s32, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -821,19 +821,19 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX7LESS-DPP-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_align4_unsafe:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5]
@@ -846,11 +846,11 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[36:39], 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0
; GFX7LESS-DPP-NEXT: .LBB1_1: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -859,7 +859,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v2
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v1
-; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc
+; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v2
; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
@@ -871,12 +871,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
;
; GFX9-DPP-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_align4_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s50, -1
-; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11
-; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s38, -1
+; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-DPP-NEXT: s_mov_b32 s12, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44
@@ -892,9 +892,9 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -953,13 +953,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
;
; GFX1064-DPP-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_align4_unsafe:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX1064-DPP-NEXT: s_mov_b32 s12, s8
; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1064-DPP-NEXT: s_mov_b32 s13, s9
@@ -975,8 +975,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -1030,13 +1030,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
;
; GFX1032-DPP-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_align4_unsafe:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX1032-DPP-NEXT: s_mov_b32 s12, s8
; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1032-DPP-NEXT: s_mov_b32 s13, s9
@@ -1052,8 +1052,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -1757,12 +1757,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
; GFX7LESS-LABEL: global_atomic_fsub_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
-; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
+; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s36, s36, s11
+; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0
; GFX7LESS-NEXT: s_mov_b32 s14, s10
; GFX7LESS-NEXT: s_mov_b32 s13, s9
; GFX7LESS-NEXT: s_mov_b32 s12, s8
@@ -1780,8 +1780,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -1829,12 +1829,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
;
; GFX9-LABEL: global_atomic_fsub_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s11
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s11
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-NEXT: s_mov_b32 s12, s8
; GFX9-NEXT: s_add_u32 s8, s34, 44
@@ -1850,9 +1850,9 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -1896,13 +1896,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
;
; GFX1064-LABEL: global_atomic_fsub_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s50, -1
-; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s38, -1
+; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s36, s36, s11
; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_addc_u32 s49, s49, 0
+; GFX1064-NEXT: s_addc_u32 s37, s37, 0
; GFX1064-NEXT: s_mov_b32 s12, s8
; GFX1064-NEXT: s_add_u32 s8, s34, 44
; GFX1064-NEXT: s_mov_b32 s13, s9
@@ -1918,8 +1918,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-NEXT: s_mov_b32 s32, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -1963,13 +1963,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
;
; GFX1032-LABEL: global_atomic_fsub_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s50, -1
-; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s38, -1
+; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s36, s36, s11
; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_addc_u32 s49, s49, 0
+; GFX1032-NEXT: s_addc_u32 s37, s37, 0
; GFX1032-NEXT: s_mov_b32 s12, s8
; GFX1032-NEXT: s_add_u32 s8, s34, 44
; GFX1032-NEXT: s_mov_b32 s13, s9
@@ -1985,8 +1985,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-NEXT: s_mov_b32 s32, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -2153,19 +2153,19 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
; GFX7LESS-DPP-LABEL: global_atomic_fsub_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5]
@@ -2178,11 +2178,11 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[36:39], 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0
; GFX7LESS-DPP-NEXT: .LBB3_1: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -2191,7 +2191,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v2
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v1
-; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc
+; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v2
; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
@@ -2203,12 +2203,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
;
; GFX9-DPP-LABEL: global_atomic_fsub_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s50, -1
-; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11
-; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s38, -1
+; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-DPP-NEXT: s_mov_b32 s12, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44
@@ -2224,9 +2224,9 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -2285,13 +2285,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
;
; GFX1064-DPP-LABEL: global_atomic_fsub_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX1064-DPP-NEXT: s_mov_b32 s12, s8
; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1064-DPP-NEXT: s_mov_b32 s13, s9
@@ -2307,8 +2307,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -2362,13 +2362,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
;
; GFX1032-DPP-LABEL: global_atomic_fsub_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX1032-DPP-NEXT: s_mov_b32 s12, s8
; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1032-DPP-NEXT: s_mov_b32 s13, s9
@@ -2384,8 +2384,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -3089,12 +3089,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX7LESS-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
-; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
+; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s36, s36, s11
+; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0
; GFX7LESS-NEXT: s_mov_b32 s14, s10
; GFX7LESS-NEXT: s_mov_b32 s13, s9
; GFX7LESS-NEXT: s_mov_b32 s12, s8
@@ -3112,8 +3112,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -3161,12 +3161,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
;
; GFX9-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s11
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s11
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-NEXT: s_mov_b32 s12, s8
; GFX9-NEXT: s_add_u32 s8, s34, 44
@@ -3182,9 +3182,9 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -3228,13 +3228,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
;
; GFX1064-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s50, -1
-; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s38, -1
+; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s36, s36, s11
; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_addc_u32 s49, s49, 0
+; GFX1064-NEXT: s_addc_u32 s37, s37, 0
; GFX1064-NEXT: s_mov_b32 s12, s8
; GFX1064-NEXT: s_add_u32 s8, s34, 44
; GFX1064-NEXT: s_mov_b32 s13, s9
@@ -3250,8 +3250,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-NEXT: s_mov_b32 s32, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -3295,13 +3295,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
;
; GFX1032-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s50, -1
-; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s38, -1
+; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s36, s36, s11
; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_addc_u32 s49, s49, 0
+; GFX1032-NEXT: s_addc_u32 s37, s37, 0
; GFX1032-NEXT: s_mov_b32 s12, s8
; GFX1032-NEXT: s_add_u32 s8, s34, 44
; GFX1032-NEXT: s_mov_b32 s13, s9
@@ -3317,8 +3317,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-NEXT: s_mov_b32 s32, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -3485,19 +3485,19 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX7LESS-DPP-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5]
@@ -3510,11 +3510,11 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[36:39], 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0
; GFX7LESS-DPP-NEXT: .LBB5_1: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -3523,7 +3523,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v2
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v1
-; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc
+; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v2
; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
@@ -3535,12 +3535,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
;
; GFX9-DPP-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s50, -1
-; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11
-; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s38, -1
+; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-DPP-NEXT: s_mov_b32 s12, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44
@@ -3556,9 +3556,9 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -3617,13 +3617,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
;
; GFX1064-DPP-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX1064-DPP-NEXT: s_mov_b32 s12, s8
; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1064-DPP-NEXT: s_mov_b32 s13, s9
@@ -3639,8 +3639,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -3694,13 +3694,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
;
; GFX1032-DPP-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX1032-DPP-NEXT: s_mov_b32 s12, s8
; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1032-DPP-NEXT: s_mov_b32 s13, s9
@@ -3716,8 +3716,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -3917,12 +3917,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX7LESS-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
-; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
+; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s36, s36, s11
+; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0
; GFX7LESS-NEXT: s_mov_b32 s14, s10
; GFX7LESS-NEXT: s_mov_b32 s13, s9
; GFX7LESS-NEXT: s_mov_b32 s12, s8
@@ -3940,8 +3940,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -3989,12 +3989,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
;
; GFX9-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s11
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s11
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-NEXT: s_mov_b32 s12, s8
; GFX9-NEXT: s_add_u32 s8, s34, 44
@@ -4010,9 +4010,9 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -4056,13 +4056,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
;
; GFX1064-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s50, -1
-; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s38, -1
+; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s36, s36, s11
; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_addc_u32 s49, s49, 0
+; GFX1064-NEXT: s_addc_u32 s37, s37, 0
; GFX1064-NEXT: s_mov_b32 s12, s8
; GFX1064-NEXT: s_add_u32 s8, s34, 44
; GFX1064-NEXT: s_mov_b32 s13, s9
@@ -4078,8 +4078,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-NEXT: s_mov_b32 s32, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -4123,13 +4123,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
;
; GFX1032-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s50, -1
-; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s38, -1
+; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s36, s36, s11
; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_addc_u32 s49, s49, 0
+; GFX1032-NEXT: s_addc_u32 s37, s37, 0
; GFX1032-NEXT: s_mov_b32 s12, s8
; GFX1032-NEXT: s_add_u32 s8, s34, 44
; GFX1032-NEXT: s_mov_b32 s13, s9
@@ -4145,8 +4145,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-NEXT: s_mov_b32 s32, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -4313,19 +4313,19 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX7LESS-DPP-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5]
@@ -4338,11 +4338,11 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[36:39], 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0
; GFX7LESS-DPP-NEXT: .LBB6_1: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -4351,7 +4351,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v2
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v1
-; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc
+; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v2
; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
@@ -4363,12 +4363,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
;
; GFX9-DPP-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s50, -1
-; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11
-; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s38, -1
+; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-DPP-NEXT: s_mov_b32 s12, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44
@@ -4384,9 +4384,9 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -4445,13 +4445,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
;
; GFX1064-DPP-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX1064-DPP-NEXT: s_mov_b32 s12, s8
; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1064-DPP-NEXT: s_mov_b32 s13, s9
@@ -4467,8 +4467,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -4522,13 +4522,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
;
; GFX1032-DPP-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX1032-DPP-NEXT: s_mov_b32 s12, s8
; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1032-DPP-NEXT: s_mov_b32 s13, s9
@@ -4544,8 +4544,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -5248,12 +5248,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
; GFX7LESS-LABEL: global_atomic_fsub_uni_address_div_value_default_scope_strictfp:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
-; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
+; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s36, s36, s11
+; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0
; GFX7LESS-NEXT: s_mov_b32 s14, s10
; GFX7LESS-NEXT: s_mov_b32 s13, s9
; GFX7LESS-NEXT: s_mov_b32 s12, s8
@@ -5271,8 +5271,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -5320,12 +5320,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
;
; GFX9-LABEL: global_atomic_fsub_uni_address_div_value_default_scope_strictfp:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s11
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s11
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-NEXT: s_mov_b32 s12, s8
; GFX9-NEXT: s_add_u32 s8, s34, 44
@@ -5341,9 +5341,9 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -5387,13 +5387,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
;
; GFX1064-LABEL: global_atomic_fsub_uni_address_div_value_default_scope_strictfp:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s50, -1
-; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s38, -1
+; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s36, s36, s11
; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_addc_u32 s49, s49, 0
+; GFX1064-NEXT: s_addc_u32 s37, s37, 0
; GFX1064-NEXT: s_mov_b32 s12, s8
; GFX1064-NEXT: s_add_u32 s8, s34, 44
; GFX1064-NEXT: s_mov_b32 s13, s9
@@ -5409,8 +5409,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-NEXT: s_mov_b32 s32, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -5454,13 +5454,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
;
; GFX1032-LABEL: global_atomic_fsub_uni_address_div_value_default_scope_strictfp:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s50, -1
-; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s38, -1
+; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s36, s36, s11
; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_addc_u32 s49, s49, 0
+; GFX1032-NEXT: s_addc_u32 s37, s37, 0
; GFX1032-NEXT: s_mov_b32 s12, s8
; GFX1032-NEXT: s_add_u32 s8, s34, 44
; GFX1032-NEXT: s_mov_b32 s13, s9
@@ -5476,8 +5476,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-NEXT: s_mov_b32 s32, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -5644,19 +5644,19 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
; GFX7LESS-DPP-LABEL: global_atomic_fsub_uni_address_div_value_default_scope_strictfp:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5]
@@ -5669,11 +5669,11 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[36:39], 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0
; GFX7LESS-DPP-NEXT: .LBB8_1: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -5682,7 +5682,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v2
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v1
-; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc
+; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v2
; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
@@ -5694,12 +5694,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
;
; GFX9-DPP-LABEL: global_atomic_fsub_uni_address_div_value_default_scope_strictfp:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s50, -1
-; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11
-; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s38, -1
+; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-DPP-NEXT: s_mov_b32 s12, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44
@@ -5715,9 +5715,9 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -5776,13 +5776,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
;
; GFX1064-DPP-LABEL: global_atomic_fsub_uni_address_div_value_default_scope_strictfp:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX1064-DPP-NEXT: s_mov_b32 s12, s8
; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1064-DPP-NEXT: s_mov_b32 s13, s9
@@ -5798,8 +5798,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -5853,13 +5853,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
;
; GFX1032-DPP-LABEL: global_atomic_fsub_uni_address_div_value_default_scope_strictfp:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX1032-DPP-NEXT: s_mov_b32 s12, s8
; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1032-DPP-NEXT: s_mov_b32 s13, s9
@@ -5875,8 +5875,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -6081,7 +6081,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX7LESS-NEXT: s_mov_b32 s67, 0xe8f000
; GFX7LESS-NEXT: s_add_u32 s64, s64, s11
; GFX7LESS-NEXT: s_addc_u32 s65, s65, 0
-; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX7LESS-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v3, s0, 0
@@ -6097,7 +6097,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5]
; GFX7LESS-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9
; GFX7LESS-NEXT: s_bcnt1_i32_b64 s2, s[0:1]
-; GFX7LESS-NEXT: s_mov_b64 s[62:63], 0
+; GFX7LESS-NEXT: s_mov_b64 s[54:55], 0
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v3, 20, v2
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
@@ -6131,7 +6131,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX7LESS-NEXT: s_mov_b32 s12, s51
; GFX7LESS-NEXT: s_mov_b32 s13, s50
@@ -6148,8 +6148,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX7LESS-NEXT: buffer_load_dword v0, off, s[64:67], 0
; GFX7LESS-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX7LESS-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX7LESS-NEXT: s_cbranch_execnz .LBB9_2
; GFX7LESS-NEXT: .LBB9_3:
; GFX7LESS-NEXT: s_endpgm
@@ -6166,7 +6166,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX9-NEXT: s_add_u32 s64, s64, s11
; GFX9-NEXT: v_mbcnt_hi_u32_b32 v3, s1, v3
; GFX9-NEXT: s_addc_u32 s65, s65, 0
-; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
; GFX9-NEXT: s_movk_i32 s32, 0x800
; GFX9-NEXT: s_and_saveexec_b64 s[2:3], vcc
@@ -6187,7 +6187,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX9-NEXT: s_mov_b64 s[36:37], s[4:5]
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v4, s1
-; GFX9-NEXT: s_mov_b64 s[62:63], 0
+; GFX9-NEXT: s_mov_b64 s[54:55], 0
; GFX9-NEXT: v_mov_b32_e32 v3, s0
; GFX9-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX9-NEXT: .LBB9_2: ; %atomicrmw.start
@@ -6206,7 +6206,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12
; GFX9-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8
-; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX9-NEXT: s_mov_b32 s12, s51
; GFX9-NEXT: s_mov_b32 s13, s50
@@ -6227,8 +6227,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX9-NEXT: buffer_load_dword v4, off, s[64:67], 0 offset:4
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX9-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX9-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX9-NEXT: s_cbranch_execnz .LBB9_2
; GFX9-NEXT: .LBB9_3:
; GFX9-NEXT: s_endpgm
@@ -6258,8 +6258,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX1064-NEXT: s_mov_b32 s51, s8
; GFX1064-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1064-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3]
-; GFX1064-NEXT: s_mov_b64 s[62:63], 0
+; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1064-NEXT: s_mov_b64 s[54:55], 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1064-NEXT: v_mul_f64 v[41:42], v[3:4], 4.0
@@ -6290,7 +6290,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX1064-NEXT: v_mov_b32_e32 v7, 0
; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1064-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1064-NEXT: s_mov_b32 s12, s51
; GFX1064-NEXT: s_mov_b32 s13, s50
@@ -6307,8 +6307,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX1064-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX1064-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1064-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX1064-NEXT: s_cbranch_execnz .LBB9_2
; GFX1064-NEXT: .LBB9_3:
; GFX1064-NEXT: s_endpgm
@@ -6326,7 +6326,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
; GFX1032-NEXT: s_addc_u32 s65, s65, 0
; GFX1032-NEXT: s_mov_b64 s[48:49], s[0:1]
-; GFX1032-NEXT: s_mov_b32 s62, 0
+; GFX1032-NEXT: s_mov_b32 s54, 0
; GFX1032-NEXT: s_movk_i32 s32, 0x400
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-NEXT: s_cbranch_execz .LBB9_3
@@ -6338,7 +6338,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX1032-NEXT: s_mov_b32 s51, s8
; GFX1032-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1032-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1032-NEXT: v_mul_f64 v[41:42], v[3:4], 4.0
@@ -6369,7 +6369,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX1032-NEXT: v_mov_b32_e32 v7, 0
; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1032-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1032-NEXT: s_mov_b32 s12, s51
; GFX1032-NEXT: s_mov_b32 s13, s50
@@ -6386,8 +6386,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX1032-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
+; GFX1032-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s54
; GFX1032-NEXT: s_cbranch_execnz .LBB9_2
; GFX1032-NEXT: .LBB9_3:
; GFX1032-NEXT: s_endpgm
@@ -6413,8 +6413,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX1164-NEXT: s_mov_b32 s51, s8
; GFX1164-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1164-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3]
-; GFX1164-NEXT: s_mov_b64 s[62:63], 0
+; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1164-NEXT: s_mov_b64 s[54:55], 0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1)
@@ -6444,7 +6444,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX1164-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-NEXT: v_mov_b32_e32 v7, 0
; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1164-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1164-NEXT: s_mov_b32 s12, s51
; GFX1164-NEXT: s_mov_b32 s13, s50
@@ -6458,8 +6458,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63]
+; GFX1164-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[54:55]
; GFX1164-NEXT: s_cbranch_execnz .LBB9_2
; GFX1164-NEXT: .LBB9_3:
; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2
@@ -6471,7 +6471,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX1132-NEXT: v_mov_b32_e32 v40, v0
; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, s8, 0
; GFX1132-NEXT: s_mov_b64 s[48:49], s[0:1]
-; GFX1132-NEXT: s_mov_b32 s62, 0
+; GFX1132-NEXT: s_mov_b32 s54, 0
; GFX1132-NEXT: s_mov_b32 s32, 32
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0
@@ -6485,7 +6485,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX1132-NEXT: s_mov_b32 s51, s13
; GFX1132-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1132-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
@@ -6511,7 +6511,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
; GFX1132-NEXT: v_mov_b32_e32 v7, 0
; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1132-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1132-NEXT: s_mov_b32 s12, s51
; GFX1132-NEXT: s_mov_b32 s13, s50
@@ -6524,8 +6524,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
+; GFX1132-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54
; GFX1132-NEXT: s_cbranch_execnz .LBB9_2
; GFX1132-NEXT: .LBB9_3:
; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2
@@ -6540,7 +6540,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], exec
; GFX7LESS-DPP-NEXT: v_mbcnt_lo_u32_b32_e64 v3, s0, 0
@@ -6556,7 +6556,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9
; GFX7LESS-DPP-NEXT: s_bcnt1_i32_b64 s2, s[0:1]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[62:63], 0
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[54:55], 0
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
@@ -6590,7 +6590,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50
@@ -6607,8 +6607,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[64:67], 0
; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX7LESS-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB9_2
; GFX7LESS-DPP-NEXT: .LBB9_3:
; GFX7LESS-DPP-NEXT: s_endpgm
@@ -6625,7 +6625,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX9-DPP-NEXT: s_add_u32 s64, s64, s11
; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v3, s1, v3
; GFX9-DPP-NEXT: s_addc_u32 s65, s65, 0
-; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800
; GFX9-DPP-NEXT: s_and_saveexec_b64 s[2:3], vcc
@@ -6646,7 +6646,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: v_mov_b32_e32 v4, s1
-; GFX9-DPP-NEXT: s_mov_b64 s[62:63], 0
+; GFX9-DPP-NEXT: s_mov_b64 s[54:55], 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s0
; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX9-DPP-NEXT: .LBB9_2: ; %atomicrmw.start
@@ -6665,7 +6665,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12
; GFX9-DPP-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8
-; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX9-DPP-NEXT: s_mov_b32 s12, s51
; GFX9-DPP-NEXT: s_mov_b32 s13, s50
@@ -6686,8 +6686,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX9-DPP-NEXT: buffer_load_dword v4, off, s[64:67], 0 offset:4
; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX9-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX9-DPP-NEXT: s_cbranch_execnz .LBB9_2
; GFX9-DPP-NEXT: .LBB9_3:
; GFX9-DPP-NEXT: s_endpgm
@@ -6717,8 +6717,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX1064-DPP-NEXT: s_mov_b32 s51, s8
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
-; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0
+; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1064-DPP-NEXT: s_mov_b64 s[54:55], 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1064-DPP-NEXT: v_mul_f64 v[41:42], v[3:4], 4.0
@@ -6749,7 +6749,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1064-DPP-NEXT: s_mov_b32 s12, s51
; GFX1064-DPP-NEXT: s_mov_b32 s13, s50
@@ -6766,8 +6766,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX1064-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB9_2
; GFX1064-DPP-NEXT: .LBB9_3:
; GFX1064-DPP-NEXT: s_endpgm
@@ -6785,7 +6785,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
; GFX1032-DPP-NEXT: s_addc_u32 s65, s65, 0
; GFX1032-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
-; GFX1032-DPP-NEXT: s_mov_b32 s62, 0
+; GFX1032-DPP-NEXT: s_mov_b32 s54, 0
; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB9_3
@@ -6797,7 +6797,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX1032-DPP-NEXT: s_mov_b32 s51, s8
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1032-DPP-NEXT: v_mul_f64 v[41:42], v[3:4], 4.0
@@ -6828,7 +6828,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1032-DPP-NEXT: s_mov_b32 s12, s51
; GFX1032-DPP-NEXT: s_mov_b32 s13, s50
@@ -6845,8 +6845,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
+; GFX1032-DPP-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s54
; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB9_2
; GFX1032-DPP-NEXT: .LBB9_3:
; GFX1032-DPP-NEXT: s_endpgm
@@ -6872,8 +6872,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX1164-DPP-NEXT: s_mov_b32 s51, s8
; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
-; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0
+; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1164-DPP-NEXT: s_mov_b64 s[54:55], 0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
@@ -6903,7 +6903,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1164-DPP-NEXT: s_mov_b32 s12, s51
; GFX1164-DPP-NEXT: s_mov_b32 s13, s50
@@ -6917,8 +6917,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63]
+; GFX1164-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[54:55]
; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB9_2
; GFX1164-DPP-NEXT: .LBB9_3:
; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2
@@ -6930,7 +6930,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0
; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s8, 0
; GFX1132-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
-; GFX1132-DPP-NEXT: s_mov_b32 s62, 0
+; GFX1132-DPP-NEXT: s_mov_b32 s54, 0
; GFX1132-DPP-NEXT: s_mov_b32 s32, 32
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0
@@ -6944,7 +6944,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX1132-DPP-NEXT: s_mov_b32 s51, s13
; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1)
@@ -6970,7 +6970,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1132-DPP-NEXT: s_mov_b32 s12, s51
; GFX1132-DPP-NEXT: s_mov_b32 s13, s50
@@ -6983,8 +6983,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent
; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
+; GFX1132-DPP-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54
; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB9_2
; GFX1132-DPP-NEXT: .LBB9_3:
; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2
@@ -6997,18 +6997,18 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX7LESS-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_align4_unsafe:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s82, -1
-; GFX7LESS-NEXT: s_mov_b32 s83, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s80, s80, s11
-; GFX7LESS-NEXT: s_addc_u32 s81, s81, 0
+; GFX7LESS-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-NEXT: s_mov_b32 s67, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s64, s64, s11
+; GFX7LESS-NEXT: s_addc_u32 s65, s65, 0
; GFX7LESS-NEXT: s_mov_b32 s33, s10
; GFX7LESS-NEXT: s_mov_b32 s50, s9
; GFX7LESS-NEXT: s_mov_b32 s51, s8
; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX7LESS-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0
@@ -7027,8 +7027,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX7LESS-NEXT: s_mov_b32 s13, s50
; GFX7LESS-NEXT: s_mov_b32 s14, s33
; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -7053,21 +7053,21 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX7LESS-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX7LESS-NEXT: s_cbranch_execz .LBB10_5
; GFX7LESS-NEXT: ; %bb.3:
-; GFX7LESS-NEXT: s_load_dwordx2 s[64:65], s[36:37], 0x9
-; GFX7LESS-NEXT: s_mov_b32 s67, 0xf000
-; GFX7LESS-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-NEXT: s_load_dwordx2 s[52:53], s[36:37], 0x9
+; GFX7LESS-NEXT: s_mov_b32 s55, 0xf000
+; GFX7LESS-NEXT: s_mov_b32 s54, -1
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[64:67], 0
-; GFX7LESS-NEXT: s_mov_b64 s[52:53], 0
+; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[52:55], 0
+; GFX7LESS-NEXT: s_mov_b64 s[54:55], 0
; GFX7LESS-NEXT: .LBB10_4: ; %atomicrmw.start
; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7LESS-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-NEXT: v_add_f64 v[2:3], v[0:1], -v[41:42]
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:4
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[80:83], 0
+; GFX7LESS-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:4
+; GFX7LESS-NEXT: buffer_store_dword v0, off, s[64:67], 0
; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:12
-; GFX7LESS-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:8
+; GFX7LESS-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:12
+; GFX7LESS-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:8
; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0
; GFX7LESS-NEXT: s_getpc_b64 s[0:1]
; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
@@ -7081,25 +7081,25 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX7LESS-NEXT: s_mov_b32 s12, s51
; GFX7LESS-NEXT: s_mov_b32 s13, s50
; GFX7LESS-NEXT: s_mov_b32 s14, s33
; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX7LESS-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-NEXT: v_mov_b32_e32 v2, s64
-; GFX7LESS-NEXT: v_mov_b32_e32 v3, s65
+; GFX7LESS-NEXT: v_mov_b32_e32 v2, s52
+; GFX7LESS-NEXT: v_mov_b32_e32 v3, s53
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-NEXT: buffer_load_dword v0, off, s[80:83], 0
-; GFX7LESS-NEXT: buffer_load_dword v1, off, s[80:83], 0 offset:4
+; GFX7LESS-NEXT: buffer_load_dword v0, off, s[64:67], 0
+; GFX7LESS-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-NEXT: s_or_b64 s[52:53], vcc, s[52:53]
-; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[52:53]
+; GFX7LESS-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX7LESS-NEXT: s_cbranch_execnz .LBB10_4
; GFX7LESS-NEXT: .LBB10_5:
; GFX7LESS-NEXT: s_endpgm
@@ -7126,7 +7126,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b32 s33, s10
; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7]
-; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX9-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65]
@@ -7163,7 +7163,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX9-NEXT: ; %bb.3:
; GFX9-NEXT: s_load_dwordx2 s[52:53], s[36:37], 0x24
; GFX9-NEXT: v_mov_b32_e32 v0, 0
-; GFX9-NEXT: s_mov_b64 s[62:63], 0
+; GFX9-NEXT: s_mov_b64 s[54:55], 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53]
; GFX9-NEXT: .LBB10_4: ; %atomicrmw.start
@@ -7182,7 +7182,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
; GFX9-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
-; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX9-NEXT: s_mov_b32 s12, s51
; GFX9-NEXT: s_mov_b32 s13, s50
@@ -7203,8 +7203,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX9-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX9-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX9-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX9-NEXT: s_cbranch_execnz .LBB10_4
; GFX9-NEXT: .LBB10_5:
; GFX9-NEXT: s_endpgm
@@ -7231,7 +7231,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1064-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1064-NEXT: s_mov_b32 s33, s10
; GFX1064-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65]
@@ -7268,7 +7268,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1064-NEXT: ; %bb.3:
; GFX1064-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24
; GFX1064-NEXT: v_mov_b32_e32 v0, 0
-; GFX1064-NEXT: s_mov_b64 s[62:63], 0
+; GFX1064-NEXT: s_mov_b64 s[54:55], 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53]
; GFX1064-NEXT: .LBB10_4: ; %atomicrmw.start
@@ -7292,7 +7292,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1064-NEXT: v_mov_b32_e32 v7, 0
; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1064-NEXT: s_mov_b32 s12, s51
; GFX1064-NEXT: s_mov_b32 s13, s50
@@ -7309,8 +7309,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1064-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX1064-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1064-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX1064-NEXT: s_cbranch_execnz .LBB10_4
; GFX1064-NEXT: .LBB10_5:
; GFX1064-NEXT: s_endpgm
@@ -7337,7 +7337,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1032-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1032-NEXT: s_mov_b32 s33, s10
; GFX1032-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65]
@@ -7366,7 +7366,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1032-NEXT: s_cbranch_scc1 .LBB10_1
; GFX1032-NEXT: ; %bb.2: ; %ComputeEnd
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1032-NEXT: s_mov_b32 s62, 0
+; GFX1032-NEXT: s_mov_b32 s54, 0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-NEXT: s_xor_b32 s0, exec_lo, s0
@@ -7397,7 +7397,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1032-NEXT: v_mov_b32_e32 v7, 0
; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1032-NEXT: s_mov_b32 s12, s51
; GFX1032-NEXT: s_mov_b32 s13, s50
@@ -7414,8 +7414,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1032-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
+; GFX1032-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s54
; GFX1032-NEXT: s_cbranch_execnz .LBB10_4
; GFX1032-NEXT: .LBB10_5:
; GFX1032-NEXT: s_endpgm
@@ -7443,7 +7443,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1164-NEXT: s_mov_b32 s14, s33
; GFX1164-NEXT: s_mov_b32 s32, 32
; GFX1164-NEXT: v_mov_b32_e32 v40, v0
-; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1164-NEXT: v_mov_b32_e32 v41, 0
@@ -7473,7 +7473,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1164-NEXT: ; %bb.3:
; GFX1164-NEXT: s_load_b64 s[52:53], s[34:35], 0x24
; GFX1164-NEXT: v_mov_b32_e32 v0, 0
-; GFX1164-NEXT: s_mov_b64 s[62:63], 0
+; GFX1164-NEXT: s_mov_b64 s[54:55], 0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: global_load_b64 v[1:2], v0, s[52:53]
; GFX1164-NEXT: s_set_inst_prefetch_distance 0x1
@@ -7497,7 +7497,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1164-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-NEXT: v_mov_b32_e32 v7, 0
; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1164-NEXT: s_mov_b32 s12, s51
; GFX1164-NEXT: s_mov_b32 s13, s50
@@ -7511,8 +7511,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63]
+; GFX1164-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[54:55]
; GFX1164-NEXT: s_cbranch_execnz .LBB10_4
; GFX1164-NEXT: .LBB10_5:
; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2
@@ -7541,7 +7541,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1132-NEXT: s_mov_b32 s32, 32
; GFX1132-NEXT: s_mov_b32 s33, s15
; GFX1132-NEXT: v_mov_b32_e32 v40, v0
-; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-NEXT: v_mov_b32_e32 v41, 0
@@ -7561,7 +7561,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1132-NEXT: s_cbranch_scc1 .LBB10_1
; GFX1132-NEXT: ; %bb.2: ; %ComputeEnd
; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1132-NEXT: s_mov_b32 s62, 0
+; GFX1132-NEXT: s_mov_b32 s54, 0
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0
@@ -7590,7 +7590,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
; GFX1132-NEXT: v_mov_b32_e32 v7, 0
; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1132-NEXT: s_mov_b32 s12, s51
; GFX1132-NEXT: s_mov_b32 s13, s50
@@ -7603,8 +7603,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
+; GFX1132-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54
; GFX1132-NEXT: s_cbranch_execnz .LBB10_4
; GFX1132-NEXT: .LBB10_5:
; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2
@@ -7613,22 +7613,22 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX7LESS-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_align4_unsafe:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s82, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s83, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s80, s80, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s81, s81, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s10
; GFX7LESS-DPP-NEXT: s_mov_b32 s50, s9
; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[64:65], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s55, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s54, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
@@ -7646,23 +7646,23 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v42
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v40, v0
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v41, v1
-; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[0:1], off, s[64:67], 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[52:53], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[0:1], off, s[52:55], 0
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[54:55], 0
; GFX7LESS-DPP-NEXT: .LBB10_1: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_add_f64 v[2:3], v[0:1], -v[40:41]
-; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:4
-; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[80:83], 0
+; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:4
+; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[64:67], 0
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:8
+; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:12
+; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:8
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
@@ -7676,37 +7676,37 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v42
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s64
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s65
+; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s52
+; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-DPP-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[80:83], 0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[80:83], 0 offset:4
+; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[64:67], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-DPP-NEXT: s_or_b64 s[52:53], vcc, s[52:53]
-; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[52:53]
+; GFX7LESS-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB10_1
; GFX7LESS-DPP-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX7LESS-DPP-NEXT: s_endpgm
;
; GFX9-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_align4_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s82, -1
-; GFX9-DPP-NEXT: s_mov_b32 s83, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s80, s80, s11
-; GFX9-DPP-NEXT: s_addc_u32 s81, s81, 0
+; GFX9-DPP-NEXT: s_mov_b32 s68, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s69, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s70, -1
+; GFX9-DPP-NEXT: s_mov_b32 s71, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s68, s68, s11
+; GFX9-DPP-NEXT: s_addc_u32 s69, s69, 0
; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
; GFX9-DPP-NEXT: s_mov_b32 s51, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44
@@ -7721,17 +7721,17 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b32 s33, s10
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
-; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[68:69]
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX9-DPP-NEXT: s_mov_b32 s12, s51
; GFX9-DPP-NEXT: s_mov_b32 s13, s50
; GFX9-DPP-NEXT: s_mov_b32 s14, s33
; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[70:71]
; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -7785,10 +7785,10 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9-DPP-NEXT: s_cbranch_execz .LBB10_3
; GFX9-DPP-NEXT: ; %bb.1:
-; GFX9-DPP-NEXT: s_load_dwordx2 s[62:63], s[36:37], 0x24
+; GFX9-DPP-NEXT: s_load_dwordx2 s[54:55], s[36:37], 0x24
; GFX9-DPP-NEXT: s_mov_b64 s[64:65], 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[62:63]
+; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[54:55]
; GFX9-DPP-NEXT: .LBB10_2: ; %atomicrmw.start
; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-DPP-NEXT: s_waitcnt vmcnt(0)
@@ -7799,31 +7799,31 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
-; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:4
-; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[68:69]
+; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[68:71], 0 offset:4
+; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[68:71], 0
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[80:83], 0 offset:12
-; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:8
-; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[68:71], 0 offset:12
+; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[68:71], 0 offset:8
+; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX9-DPP-NEXT: s_mov_b32 s12, s51
; GFX9-DPP-NEXT: s_mov_b32 s13, s50
; GFX9-DPP-NEXT: s_mov_b32 s14, s33
; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[70:71]
; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s62
-; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s63
+; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s54
+; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s55
; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[80:83], 0
-; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[80:83], 0 offset:4
+; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[68:71], 0
+; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[68:71], 0 offset:4
; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
; GFX9-DPP-NEXT: s_or_b64 s[64:65], vcc, s[64:65]
@@ -7854,7 +7854,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1064-DPP-NEXT: s_mov_b32 s33, s10
; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
@@ -7910,7 +7910,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1064-DPP-NEXT: s_cbranch_execz .LBB10_3
; GFX1064-DPP-NEXT: ; %bb.1:
; GFX1064-DPP-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24
-; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0
+; GFX1064-DPP-NEXT: s_mov_b64 s[54:55], 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53]
; GFX1064-DPP-NEXT: .LBB10_2: ; %atomicrmw.start
@@ -7934,7 +7934,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1064-DPP-NEXT: s_mov_b32 s12, s51
; GFX1064-DPP-NEXT: s_mov_b32 s13, s50
@@ -7951,8 +7951,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX1064-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB10_2
; GFX1064-DPP-NEXT: .LBB10_3:
; GFX1064-DPP-NEXT: s_endpgm
@@ -7979,7 +7979,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1032-DPP-NEXT: s_mov_b32 s33, s10
; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
@@ -8024,7 +8024,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1032-DPP-NEXT: v_mov_b32_e32 v41, v8
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v42, v9
-; GFX1032-DPP-NEXT: s_mov_b32 s62, 0
+; GFX1032-DPP-NEXT: s_mov_b32 s54, 0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB10_3
@@ -8053,7 +8053,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1032-DPP-NEXT: s_mov_b32 s12, s51
; GFX1032-DPP-NEXT: s_mov_b32 s13, s50
@@ -8070,8 +8070,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
+; GFX1032-DPP-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s54
; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB10_2
; GFX1032-DPP-NEXT: .LBB10_3:
; GFX1032-DPP-NEXT: s_endpgm
@@ -8099,7 +8099,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1164-DPP-NEXT: s_mov_b32 s14, s33
; GFX1164-DPP-NEXT: s_mov_b32 s32, 32
; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0
-; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -8154,7 +8154,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1164-DPP-NEXT: s_cbranch_execz .LBB10_3
; GFX1164-DPP-NEXT: ; %bb.1:
; GFX1164-DPP-NEXT: s_load_b64 s[52:53], s[34:35], 0x24
-; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0
+; GFX1164-DPP-NEXT: s_mov_b64 s[54:55], 0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[52:53]
; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x1
@@ -8178,7 +8178,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1164-DPP-NEXT: s_mov_b32 s12, s51
; GFX1164-DPP-NEXT: s_mov_b32 s13, s50
@@ -8192,8 +8192,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63]
+; GFX1164-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[54:55]
; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB10_2
; GFX1164-DPP-NEXT: .LBB10_3:
; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2
@@ -8222,7 +8222,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1132-DPP-NEXT: s_mov_b32 s32, 32
; GFX1132-DPP-NEXT: s_mov_b32 s33, s15
; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0
-; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
@@ -8264,7 +8264,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX1132-DPP-NEXT: v_mov_b32_e32 v42, v9
-; GFX1132-DPP-NEXT: s_mov_b32 s62, 0
+; GFX1132-DPP-NEXT: s_mov_b32 s54, 0
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1
; GFX1132-DPP-NEXT: s_cbranch_execz .LBB10_3
@@ -8290,7 +8290,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1132-DPP-NEXT: s_mov_b32 s12, s51
; GFX1132-DPP-NEXT: s_mov_b32 s13, s50
@@ -8303,8 +8303,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
+; GFX1132-DPP-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54
; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB10_2
; GFX1132-DPP-NEXT: .LBB10_3:
; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2
@@ -8835,12 +8835,12 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
; GFX7LESS-LABEL: global_atomic_fsub_double_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
-; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
+; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s36, s36, s11
+; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0
; GFX7LESS-NEXT: s_mov_b32 s14, s10
; GFX7LESS-NEXT: s_mov_b32 s13, s9
; GFX7LESS-NEXT: s_mov_b32 s12, s8
@@ -8858,8 +8858,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -8912,12 +8912,12 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
;
; GFX9-LABEL: global_atomic_fsub_double_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s11
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s11
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-NEXT: s_mov_b32 s12, s8
; GFX9-NEXT: s_add_u32 s8, s34, 44
@@ -8933,9 +8933,9 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -8982,13 +8982,13 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
;
; GFX1064-LABEL: global_atomic_fsub_double_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s50, -1
-; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s38, -1
+; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s36, s36, s11
; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_addc_u32 s49, s49, 0
+; GFX1064-NEXT: s_addc_u32 s37, s37, 0
; GFX1064-NEXT: s_mov_b32 s12, s8
; GFX1064-NEXT: s_add_u32 s8, s34, 44
; GFX1064-NEXT: s_mov_b32 s13, s9
@@ -9004,8 +9004,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-NEXT: s_mov_b32 s32, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -9052,13 +9052,13 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
;
; GFX1032-LABEL: global_atomic_fsub_double_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s50, -1
-; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s38, -1
+; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s36, s36, s11
; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_addc_u32 s49, s49, 0
+; GFX1032-NEXT: s_addc_u32 s37, s37, 0
; GFX1032-NEXT: s_mov_b32 s12, s8
; GFX1032-NEXT: s_add_u32 s8, s34, 44
; GFX1032-NEXT: s_mov_b32 s13, s9
@@ -9074,8 +9074,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-NEXT: s_mov_b32 s32, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -9249,19 +9249,19 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
; GFX7LESS-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5]
@@ -9274,11 +9274,11 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[4:5], off, s[48:51], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[4:5], off, s[36:39], 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0
; GFX7LESS-DPP-NEXT: .LBB12_1: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -9289,7 +9289,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v8, v4
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, v3
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, v2
-; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[48:51], 0 glc
+; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[36:39], 0 glc
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_cmp_eq_u64_e32 vcc, v[6:7], v[4:5]
; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
@@ -9302,12 +9302,12 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
;
; GFX9-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s50, -1
-; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11
-; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s38, -1
+; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-DPP-NEXT: s_mov_b32 s12, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44
@@ -9323,9 +9323,9 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -9401,13 +9401,13 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
;
; GFX1064-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX1064-DPP-NEXT: s_mov_b32 s12, s8
; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1064-DPP-NEXT: s_mov_b32 s13, s9
@@ -9423,8 +9423,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -9490,13 +9490,13 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
;
; GFX1032-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_one_as_scope_unsafe_structfp:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX1032-DPP-NEXT: s_mov_b32 s12, s8
; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1032-DPP-NEXT: s_mov_b32 s13, s9
@@ -9512,8 +9512,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -10268,12 +10268,12 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX7LESS-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
-; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
+; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s36, s36, s11
+; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0
; GFX7LESS-NEXT: s_mov_b32 s14, s10
; GFX7LESS-NEXT: s_mov_b32 s13, s9
; GFX7LESS-NEXT: s_mov_b32 s12, s8
@@ -10291,8 +10291,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -10345,12 +10345,12 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
;
; GFX9-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s11
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s11
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-NEXT: s_mov_b32 s12, s8
; GFX9-NEXT: s_add_u32 s8, s34, 44
@@ -10366,9 +10366,9 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -10415,13 +10415,13 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
;
; GFX1064-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s50, -1
-; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s38, -1
+; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s36, s36, s11
; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_addc_u32 s49, s49, 0
+; GFX1064-NEXT: s_addc_u32 s37, s37, 0
; GFX1064-NEXT: s_mov_b32 s12, s8
; GFX1064-NEXT: s_add_u32 s8, s34, 44
; GFX1064-NEXT: s_mov_b32 s13, s9
@@ -10437,8 +10437,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-NEXT: s_mov_b32 s32, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -10485,13 +10485,13 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
;
; GFX1032-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s50, -1
-; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s38, -1
+; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s36, s36, s11
; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_addc_u32 s49, s49, 0
+; GFX1032-NEXT: s_addc_u32 s37, s37, 0
; GFX1032-NEXT: s_mov_b32 s12, s8
; GFX1032-NEXT: s_add_u32 s8, s34, 44
; GFX1032-NEXT: s_mov_b32 s13, s9
@@ -10507,8 +10507,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-NEXT: s_mov_b32 s32, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -10682,19 +10682,19 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX7LESS-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5]
@@ -10707,11 +10707,11 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[4:5], off, s[48:51], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[4:5], off, s[36:39], 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0
; GFX7LESS-DPP-NEXT: .LBB14_1: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -10722,7 +10722,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v8, v4
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, v3
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, v2
-; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[48:51], 0 glc
+; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[36:39], 0 glc
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_cmp_eq_u64_e32 vcc, v[6:7], v[4:5]
; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
@@ -10735,12 +10735,12 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
;
; GFX9-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s50, -1
-; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11
-; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s38, -1
+; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-DPP-NEXT: s_mov_b32 s12, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44
@@ -10756,9 +10756,9 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -10834,13 +10834,13 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
;
; GFX1064-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX1064-DPP-NEXT: s_mov_b32 s12, s8
; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1064-DPP-NEXT: s_mov_b32 s13, s9
@@ -10856,8 +10856,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -10923,13 +10923,13 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
;
; GFX1032-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX1032-DPP-NEXT: s_mov_b32 s12, s8
; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1032-DPP-NEXT: s_mov_b32 s13, s9
@@ -10945,8 +10945,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -11183,12 +11183,12 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX7LESS-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s50, -1
-; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s48, s48, s11
-; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0
+; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s38, -1
+; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s36, s36, s11
+; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0
; GFX7LESS-NEXT: s_mov_b32 s14, s10
; GFX7LESS-NEXT: s_mov_b32 s13, s9
; GFX7LESS-NEXT: s_mov_b32 s12, s8
@@ -11206,8 +11206,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -11260,12 +11260,12 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
;
; GFX9-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s11
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s11
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-NEXT: s_mov_b32 s12, s8
; GFX9-NEXT: s_add_u32 s8, s34, 44
@@ -11281,9 +11281,9 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -11330,13 +11330,13 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
;
; GFX1064-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX1064: ; %bb.0:
-; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-NEXT: s_mov_b32 s50, -1
-; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-NEXT: s_mov_b32 s38, -1
+; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-NEXT: s_add_u32 s36, s36, s11
; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-NEXT: s_addc_u32 s49, s49, 0
+; GFX1064-NEXT: s_addc_u32 s37, s37, 0
; GFX1064-NEXT: s_mov_b32 s12, s8
; GFX1064-NEXT: s_add_u32 s8, s34, 44
; GFX1064-NEXT: s_mov_b32 s13, s9
@@ -11352,8 +11352,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-NEXT: s_mov_b32 s32, 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -11400,13 +11400,13 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
;
; GFX1032-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX1032: ; %bb.0:
-; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-NEXT: s_mov_b32 s50, -1
-; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-NEXT: s_mov_b32 s38, -1
+; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-NEXT: s_add_u32 s36, s36, s11
; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-NEXT: s_addc_u32 s49, s49, 0
+; GFX1032-NEXT: s_addc_u32 s37, s37, 0
; GFX1032-NEXT: s_mov_b32 s12, s8
; GFX1032-NEXT: s_add_u32 s8, s34, 44
; GFX1032-NEXT: s_mov_b32 s13, s9
@@ -11422,8 +11422,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-NEXT: s_mov_b32 s32, 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -11597,19 +11597,19 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX7LESS-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5]
@@ -11622,11 +11622,11 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[4:5], off, s[48:51], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[4:5], off, s[36:39], 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0
; GFX7LESS-DPP-NEXT: .LBB15_1: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -11637,7 +11637,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v8, v4
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, v3
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, v2
-; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[48:51], 0 glc
+; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[36:39], 0 glc
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_cmp_eq_u64_e32 vcc, v[6:7], v[4:5]
; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
@@ -11650,12 +11650,12 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
;
; GFX9-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s50, -1
-; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11
-; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s38, -1
+; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11
+; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
; GFX9-DPP-NEXT: s_mov_b32 s12, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44
@@ -11671,9 +11671,9 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -11749,13 +11749,13 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
;
; GFX1064-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX1064-DPP: ; %bb.0:
-; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1064-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000
-; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1064-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000
+; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX1064-DPP-NEXT: s_mov_b32 s12, s8
; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1064-DPP-NEXT: s_mov_b32 s13, s9
@@ -11771,8 +11771,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -11838,13 +11838,13 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
;
; GFX1032-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe_structfp:
; GFX1032-DPP: ; %bb.0:
-; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX1032-DPP-NEXT: s_mov_b32 s50, -1
-; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11
+; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX1032-DPP-NEXT: s_mov_b32 s38, -1
+; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5]
-; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0
+; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0
; GFX1032-DPP-NEXT: s_mov_b32 s12, s8
; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44
; GFX1032-DPP-NEXT: s_mov_b32 s13, s9
@@ -11860,8 +11860,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -12121,11 +12121,11 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX7LESS-NEXT: s_mov_b32 s51, s8
; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX7LESS-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
-; GFX7LESS-NEXT: s_mov_b64 s[62:63], 0
+; GFX7LESS-NEXT: s_mov_b64 s[54:55], 0
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v3, v0, v1
@@ -12155,7 +12155,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX7LESS-NEXT: s_mov_b32 s12, s51
; GFX7LESS-NEXT: s_mov_b32 s13, s50
@@ -12172,8 +12172,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX7LESS-NEXT: buffer_load_dword v0, off, s[64:67], 0
; GFX7LESS-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX7LESS-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX7LESS-NEXT: s_cbranch_execnz .LBB16_2
; GFX7LESS-NEXT: .LBB16_3:
; GFX7LESS-NEXT: s_endpgm
@@ -12210,10 +12210,10 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX9-NEXT: s_mov_b32 s51, s8
; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX9-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v2, s1
-; GFX9-NEXT: s_mov_b64 s[62:63], 0
+; GFX9-NEXT: s_mov_b64 s[54:55], 0
; GFX9-NEXT: v_mov_b32_e32 v1, s0
; GFX9-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX9-NEXT: .LBB16_2: ; %atomicrmw.start
@@ -12232,7 +12232,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
; GFX9-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
-; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX9-NEXT: s_mov_b32 s12, s51
; GFX9-NEXT: s_mov_b32 s13, s50
@@ -12253,8 +12253,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX9-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX9-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX9-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX9-NEXT: s_cbranch_execnz .LBB16_2
; GFX9-NEXT: .LBB16_3:
; GFX9-NEXT: s_endpgm
@@ -12288,8 +12288,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX1064-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1064-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX1064-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3]
-; GFX1064-NEXT: s_mov_b64 s[62:63], 0
+; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1064-NEXT: s_mov_b64 s[54:55], 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
@@ -12316,7 +12316,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX1064-NEXT: v_mov_b32_e32 v7, 0
; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1064-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1064-NEXT: s_mov_b32 s12, s51
; GFX1064-NEXT: s_mov_b32 s13, s50
@@ -12333,8 +12333,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX1064-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX1064-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1064-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX1064-NEXT: s_cbranch_execnz .LBB16_2
; GFX1064-NEXT: .LBB16_3:
; GFX1064-NEXT: s_endpgm
@@ -12350,7 +12350,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX1032-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1032-NEXT: s_bcnt1_i32_b32 s0, exec_lo
; GFX1032-NEXT: s_mov_b32 s1, 0x43300000
-; GFX1032-NEXT: s_mov_b32 s62, 0
+; GFX1032-NEXT: s_mov_b32 s54, 0
; GFX1032-NEXT: v_add_f64 v[3:4], 0xc3300000, s[0:1]
; GFX1032-NEXT: s_movk_i32 s32, 0x400
; GFX1032-NEXT: v_mul_f64 v[41:42], 4.0, v[3:4]
@@ -12368,7 +12368,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX1032-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1032-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX1032-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
@@ -12395,7 +12395,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX1032-NEXT: v_mov_b32_e32 v7, 0
; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1032-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1032-NEXT: s_mov_b32 s12, s51
; GFX1032-NEXT: s_mov_b32 s13, s50
@@ -12412,8 +12412,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX1032-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
+; GFX1032-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s54
; GFX1032-NEXT: s_cbranch_execnz .LBB16_2
; GFX1032-NEXT: .LBB16_3:
; GFX1032-NEXT: s_endpgm
@@ -12447,8 +12447,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX1164-NEXT: s_mov_b32 s51, s8
; GFX1164-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1164-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3]
-; GFX1164-NEXT: s_mov_b64 s[62:63], 0
+; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1164-NEXT: s_mov_b64 s[54:55], 0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
@@ -12476,7 +12476,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX1164-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-NEXT: v_mov_b32_e32 v7, 0
; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1164-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1164-NEXT: s_mov_b32 s12, s51
; GFX1164-NEXT: s_mov_b32 s13, s50
@@ -12490,8 +12490,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63]
+; GFX1164-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[54:55]
; GFX1164-NEXT: s_cbranch_execnz .LBB16_2
; GFX1164-NEXT: .LBB16_3:
; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2
@@ -12504,7 +12504,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX1132-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1132-NEXT: v_dual_mov_b32 v40, v0 :: v_dual_mov_b32 v1, s0
; GFX1132-NEXT: v_mov_b32_e32 v0, 0x43300000
-; GFX1132-NEXT: s_mov_b32 s62, 0
+; GFX1132-NEXT: s_mov_b32 s54, 0
; GFX1132-NEXT: s_clause 0x1
; GFX1132-NEXT: scratch_store_b32 off, v0, off offset:20
; GFX1132-NEXT: scratch_store_b32 off, v1, off offset:16
@@ -12525,7 +12525,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX1132-NEXT: s_mov_b32 s51, s13
; GFX1132-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1132-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
@@ -12549,7 +12549,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
; GFX1132-NEXT: v_mov_b32_e32 v7, 0
; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1132-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1132-NEXT: s_mov_b32 s12, s51
; GFX1132-NEXT: s_mov_b32 s13, s50
@@ -12562,8 +12562,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
+; GFX1132-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54
; GFX1132-NEXT: s_cbranch_execnz .LBB16_2
; GFX1132-NEXT: .LBB16_3:
; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2
@@ -12596,11 +12596,11 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[62:63], 0
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[54:55], 0
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v3, v0, v1
@@ -12630,7 +12630,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50
@@ -12647,8 +12647,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[64:67], 0
; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX7LESS-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB16_2
; GFX7LESS-DPP-NEXT: .LBB16_3:
; GFX7LESS-DPP-NEXT: s_endpgm
@@ -12685,10 +12685,10 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX9-DPP-NEXT: s_mov_b32 s51, s8
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s1
-; GFX9-DPP-NEXT: s_mov_b64 s[62:63], 0
+; GFX9-DPP-NEXT: s_mov_b64 s[54:55], 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v1, s0
; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX9-DPP-NEXT: .LBB16_2: ; %atomicrmw.start
@@ -12707,7 +12707,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
-; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX9-DPP-NEXT: s_mov_b32 s12, s51
; GFX9-DPP-NEXT: s_mov_b32 s13, s50
@@ -12728,8 +12728,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX9-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX9-DPP-NEXT: s_cbranch_execnz .LBB16_2
; GFX9-DPP-NEXT: .LBB16_3:
; GFX9-DPP-NEXT: s_endpgm
@@ -12763,8 +12763,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
-; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0
+; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1064-DPP-NEXT: s_mov_b64 s[54:55], 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
@@ -12791,7 +12791,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1064-DPP-NEXT: s_mov_b32 s12, s51
; GFX1064-DPP-NEXT: s_mov_b32 s13, s50
@@ -12808,8 +12808,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX1064-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB16_2
; GFX1064-DPP-NEXT: .LBB16_3:
; GFX1064-DPP-NEXT: s_endpgm
@@ -12825,7 +12825,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX1032-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX1032-DPP-NEXT: s_bcnt1_i32_b32 s0, exec_lo
; GFX1032-DPP-NEXT: s_mov_b32 s1, 0x43300000
-; GFX1032-DPP-NEXT: s_mov_b32 s62, 0
+; GFX1032-DPP-NEXT: s_mov_b32 s54, 0
; GFX1032-DPP-NEXT: v_add_f64 v[3:4], 0xc3300000, s[0:1]
; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400
; GFX1032-DPP-NEXT: v_mul_f64 v[41:42], 4.0, v[3:4]
@@ -12843,7 +12843,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v4, v3
; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
@@ -12870,7 +12870,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1032-DPP-NEXT: s_mov_b32 s12, s51
; GFX1032-DPP-NEXT: s_mov_b32 s13, s50
@@ -12887,8 +12887,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
+; GFX1032-DPP-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s54
; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB16_2
; GFX1032-DPP-NEXT: .LBB16_3:
; GFX1032-DPP-NEXT: s_endpgm
@@ -12922,8 +12922,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX1164-DPP-NEXT: s_mov_b32 s51, s8
; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
-; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0
+; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
+; GFX1164-DPP-NEXT: s_mov_b64 s[54:55], 0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
@@ -12951,7 +12951,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1164-DPP-NEXT: s_mov_b32 s12, s51
; GFX1164-DPP-NEXT: s_mov_b32 s13, s50
@@ -12965,8 +12965,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63]
+; GFX1164-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[54:55]
; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB16_2
; GFX1164-DPP-NEXT: .LBB16_3:
; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2
@@ -12979,7 +12979,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX1132-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1132-DPP-NEXT: v_dual_mov_b32 v40, v0 :: v_dual_mov_b32 v1, s0
; GFX1132-DPP-NEXT: v_mov_b32_e32 v0, 0x43300000
-; GFX1132-DPP-NEXT: s_mov_b32 s62, 0
+; GFX1132-DPP-NEXT: s_mov_b32 s54, 0
; GFX1132-DPP-NEXT: s_clause 0x1
; GFX1132-DPP-NEXT: scratch_store_b32 off, v0, off offset:20
; GFX1132-DPP-NEXT: scratch_store_b32 off, v1, off offset:16
@@ -13000,7 +13000,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX1132-DPP-NEXT: s_mov_b32 s51, s13
; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
@@ -13024,7 +13024,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX1132-DPP-NEXT: s_mov_b32 s12, s51
; GFX1132-DPP-NEXT: s_mov_b32 s13, s50
@@ -13037,8 +13037,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau
; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
+; GFX1132-DPP-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54
; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB16_2
; GFX1132-DPP-NEXT: .LBB16_3:
; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2
@@ -13051,18 +13051,18 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX7LESS-LABEL: global_atomic_fsub_double_uni_address_div_value_default_scope_strictfp:
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0
-; GFX7LESS-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1
-; GFX7LESS-NEXT: s_mov_b32 s82, -1
-; GFX7LESS-NEXT: s_mov_b32 s83, 0xe8f000
-; GFX7LESS-NEXT: s_add_u32 s80, s80, s11
-; GFX7LESS-NEXT: s_addc_u32 s81, s81, 0
+; GFX7LESS-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX7LESS-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX7LESS-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-NEXT: s_mov_b32 s67, 0xe8f000
+; GFX7LESS-NEXT: s_add_u32 s64, s64, s11
+; GFX7LESS-NEXT: s_addc_u32 s65, s65, 0
; GFX7LESS-NEXT: s_mov_b32 s33, s10
; GFX7LESS-NEXT: s_mov_b32 s50, s9
; GFX7LESS-NEXT: s_mov_b32 s51, s8
; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX7LESS-NEXT: s_mov_b64 s[48:49], s[0:1]
; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0
@@ -13081,8 +13081,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX7LESS-NEXT: s_mov_b32 s13, s50
; GFX7LESS-NEXT: s_mov_b32 s14, s33
; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec
@@ -13107,21 +13107,21 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX7LESS-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX7LESS-NEXT: s_cbranch_execz .LBB17_5
; GFX7LESS-NEXT: ; %bb.3:
-; GFX7LESS-NEXT: s_load_dwordx2 s[64:65], s[36:37], 0x9
-; GFX7LESS-NEXT: s_mov_b32 s67, 0xf000
-; GFX7LESS-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-NEXT: s_load_dwordx2 s[52:53], s[36:37], 0x9
+; GFX7LESS-NEXT: s_mov_b32 s55, 0xf000
+; GFX7LESS-NEXT: s_mov_b32 s54, -1
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[64:67], 0
-; GFX7LESS-NEXT: s_mov_b64 s[52:53], 0
+; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[52:55], 0
+; GFX7LESS-NEXT: s_mov_b64 s[54:55], 0
; GFX7LESS-NEXT: .LBB17_4: ; %atomicrmw.start
; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7LESS-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-NEXT: v_add_f64 v[2:3], v[0:1], -v[41:42]
-; GFX7LESS-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:4
-; GFX7LESS-NEXT: buffer_store_dword v0, off, s[80:83], 0
+; GFX7LESS-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:4
+; GFX7LESS-NEXT: buffer_store_dword v0, off, s[64:67], 0
; GFX7LESS-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:12
-; GFX7LESS-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:8
+; GFX7LESS-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:12
+; GFX7LESS-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:8
; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0
; GFX7LESS-NEXT: s_getpc_b64 s[0:1]
; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
@@ -13135,25 +13135,25 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX7LESS-NEXT: s_mov_b32 s12, s51
; GFX7LESS-NEXT: s_mov_b32 s13, s50
; GFX7LESS-NEXT: s_mov_b32 s14, s33
; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40
-; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81]
-; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83]
+; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX7LESS-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-NEXT: v_mov_b32_e32 v2, s64
-; GFX7LESS-NEXT: v_mov_b32_e32 v3, s65
+; GFX7LESS-NEXT: v_mov_b32_e32 v2, s52
+; GFX7LESS-NEXT: v_mov_b32_e32 v3, s53
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-NEXT: buffer_load_dword v0, off, s[80:83], 0
-; GFX7LESS-NEXT: buffer_load_dword v1, off, s[80:83], 0 offset:4
+; GFX7LESS-NEXT: buffer_load_dword v0, off, s[64:67], 0
+; GFX7LESS-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-NEXT: s_or_b64 s[52:53], vcc, s[52:53]
-; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[52:53]
+; GFX7LESS-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX7LESS-NEXT: s_cbranch_execnz .LBB17_4
; GFX7LESS-NEXT: .LBB17_5:
; GFX7LESS-NEXT: s_endpgm
@@ -13180,7 +13180,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-NEXT: s_mov_b32 s33, s10
; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7]
-; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX9-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65]
@@ -13217,7 +13217,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX9-NEXT: ; %bb.3:
; GFX9-NEXT: s_load_dwordx2 s[52:53], s[36:37], 0x24
; GFX9-NEXT: v_mov_b32_e32 v0, 0
-; GFX9-NEXT: s_mov_b64 s[62:63], 0
+; GFX9-NEXT: s_mov_b64 s[54:55], 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53]
; GFX9-NEXT: .LBB17_4: ; %atomicrmw.start
@@ -13236,7 +13236,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12
; GFX9-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8
-; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX9-NEXT: s_mov_b32 s12, s51
; GFX9-NEXT: s_mov_b32 s13, s50
@@ -13257,8 +13257,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX9-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX9-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX9-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX9-NEXT: s_cbranch_execnz .LBB17_4
; GFX9-NEXT: .LBB17_5:
; GFX9-NEXT: s_endpgm
@@ -13285,7 +13285,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1064-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1064-NEXT: s_mov_b32 s33, s10
; GFX1064-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65]
@@ -13322,7 +13322,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1064-NEXT: ; %bb.3:
; GFX1064-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24
; GFX1064-NEXT: v_mov_b32_e32 v0, 0
-; GFX1064-NEXT: s_mov_b64 s[62:63], 0
+; GFX1064-NEXT: s_mov_b64 s[54:55], 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53]
; GFX1064-NEXT: .LBB17_4: ; %atomicrmw.start
@@ -13346,7 +13346,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1064-NEXT: v_mov_b32_e32 v7, 0
; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1064-NEXT: s_mov_b32 s12, s51
; GFX1064-NEXT: s_mov_b32 s13, s50
@@ -13363,8 +13363,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1064-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX1064-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1064-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX1064-NEXT: s_cbranch_execnz .LBB17_4
; GFX1064-NEXT: .LBB17_5:
; GFX1064-NEXT: s_endpgm
@@ -13391,7 +13391,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1032-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1032-NEXT: s_mov_b32 s33, s10
; GFX1032-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65]
@@ -13420,7 +13420,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1032-NEXT: s_cbranch_scc1 .LBB17_1
; GFX1032-NEXT: ; %bb.2: ; %ComputeEnd
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1032-NEXT: s_mov_b32 s62, 0
+; GFX1032-NEXT: s_mov_b32 s54, 0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-NEXT: s_xor_b32 s0, exec_lo, s0
@@ -13451,7 +13451,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1032-NEXT: v_mov_b32_e32 v7, 0
; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1032-NEXT: s_mov_b32 s12, s51
; GFX1032-NEXT: s_mov_b32 s13, s50
@@ -13468,8 +13468,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1032-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
+; GFX1032-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s54
; GFX1032-NEXT: s_cbranch_execnz .LBB17_4
; GFX1032-NEXT: .LBB17_5:
; GFX1032-NEXT: s_endpgm
@@ -13497,7 +13497,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1164-NEXT: s_mov_b32 s14, s33
; GFX1164-NEXT: s_mov_b32 s32, 32
; GFX1164-NEXT: v_mov_b32_e32 v40, v0
-; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1164-NEXT: v_mov_b32_e32 v41, 0
@@ -13527,7 +13527,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1164-NEXT: ; %bb.3:
; GFX1164-NEXT: s_load_b64 s[52:53], s[34:35], 0x24
; GFX1164-NEXT: v_mov_b32_e32 v0, 0
-; GFX1164-NEXT: s_mov_b64 s[62:63], 0
+; GFX1164-NEXT: s_mov_b64 s[54:55], 0
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: global_load_b64 v[1:2], v0, s[52:53]
; GFX1164-NEXT: s_set_inst_prefetch_distance 0x1
@@ -13551,7 +13551,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1164-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-NEXT: v_mov_b32_e32 v7, 0
; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1164-NEXT: s_mov_b32 s12, s51
; GFX1164-NEXT: s_mov_b32 s13, s50
@@ -13565,8 +13565,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63]
+; GFX1164-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[54:55]
; GFX1164-NEXT: s_cbranch_execnz .LBB17_4
; GFX1164-NEXT: .LBB17_5:
; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2
@@ -13595,7 +13595,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1132-NEXT: s_mov_b32 s32, 32
; GFX1132-NEXT: s_mov_b32 s33, s15
; GFX1132-NEXT: v_mov_b32_e32 v40, v0
-; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-NEXT: v_mov_b32_e32 v41, 0
@@ -13615,7 +13615,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1132-NEXT: s_cbranch_scc1 .LBB17_1
; GFX1132-NEXT: ; %bb.2: ; %ComputeEnd
; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
-; GFX1132-NEXT: s_mov_b32 s62, 0
+; GFX1132-NEXT: s_mov_b32 s54, 0
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0
@@ -13644,7 +13644,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
; GFX1132-NEXT: v_mov_b32_e32 v7, 0
; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1132-NEXT: s_mov_b32 s12, s51
; GFX1132-NEXT: s_mov_b32 s13, s50
@@ -13657,8 +13657,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
+; GFX1132-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54
; GFX1132-NEXT: s_cbranch_execnz .LBB17_4
; GFX1132-NEXT: .LBB17_5:
; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2
@@ -13667,22 +13667,22 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX7LESS-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_default_scope_strictfp:
; GFX7LESS-DPP: ; %bb.0:
; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800
-; GFX7LESS-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0
-; GFX7LESS-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s82, -1
-; GFX7LESS-DPP-NEXT: s_mov_b32 s83, 0xe8f000
-; GFX7LESS-DPP-NEXT: s_add_u32 s80, s80, s11
-; GFX7LESS-DPP-NEXT: s_addc_u32 s81, s81, 0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
+; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000
+; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11
+; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0
; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s10
; GFX7LESS-DPP-NEXT: s_mov_b32 s50, s9
; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8
; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[48:49], s[0:1]
-; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[64:65], s[4:5], 0x9
-; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xf000
-; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1
+; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9
+; GFX7LESS-DPP-NEXT: s_mov_b32 s55, 0xf000
+; GFX7LESS-DPP-NEXT: s_mov_b32 s54, -1
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
@@ -13700,23 +13700,23 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v42
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v40, v0
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v41, v1
-; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[0:1], off, s[64:67], 0
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[52:53], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[0:1], off, s[52:55], 0
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[54:55], 0
; GFX7LESS-DPP-NEXT: .LBB17_1: ; %atomicrmw.start
; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0)
; GFX7LESS-DPP-NEXT: v_add_f64 v[2:3], v[0:1], -v[40:41]
-; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:4
-; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[80:83], 0
+; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:4
+; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[64:67], 0
; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44
-; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:12
-; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:8
+; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:12
+; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:8
; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0
; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1]
; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
@@ -13730,37 +13730,37 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51
; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50
; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33
; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v42
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
-; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67]
; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0)
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s64
-; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s65
+; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s52
+; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s53
; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7LESS-DPP-NEXT: v_and_b32_e32 v2, 1, v0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[80:83], 0
-; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[80:83], 0 offset:4
+; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[64:67], 0
+; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4
; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GFX7LESS-DPP-NEXT: s_or_b64 s[52:53], vcc, s[52:53]
-; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[52:53]
+; GFX7LESS-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB17_1
; GFX7LESS-DPP-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX7LESS-DPP-NEXT: s_endpgm
;
; GFX9-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_default_scope_strictfp:
; GFX9-DPP: ; %bb.0:
-; GFX9-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0
-; GFX9-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1
-; GFX9-DPP-NEXT: s_mov_b32 s82, -1
-; GFX9-DPP-NEXT: s_mov_b32 s83, 0xe00000
-; GFX9-DPP-NEXT: s_add_u32 s80, s80, s11
-; GFX9-DPP-NEXT: s_addc_u32 s81, s81, 0
+; GFX9-DPP-NEXT: s_mov_b32 s68, SCRATCH_RSRC_DWORD0
+; GFX9-DPP-NEXT: s_mov_b32 s69, SCRATCH_RSRC_DWORD1
+; GFX9-DPP-NEXT: s_mov_b32 s70, -1
+; GFX9-DPP-NEXT: s_mov_b32 s71, 0xe00000
+; GFX9-DPP-NEXT: s_add_u32 s68, s68, s11
+; GFX9-DPP-NEXT: s_addc_u32 s69, s69, 0
; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5]
; GFX9-DPP-NEXT: s_mov_b32 s51, s8
; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44
@@ -13775,17 +13775,17 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-DPP-NEXT: s_mov_b32 s33, s10
; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7]
-; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[68:69]
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX9-DPP-NEXT: s_mov_b32 s12, s51
; GFX9-DPP-NEXT: s_mov_b32 s13, s50
; GFX9-DPP-NEXT: s_mov_b32 s14, s33
; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[70:71]
; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -13839,10 +13839,10 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9-DPP-NEXT: s_cbranch_execz .LBB17_3
; GFX9-DPP-NEXT: ; %bb.1:
-; GFX9-DPP-NEXT: s_load_dwordx2 s[62:63], s[36:37], 0x24
+; GFX9-DPP-NEXT: s_load_dwordx2 s[54:55], s[36:37], 0x24
; GFX9-DPP-NEXT: s_mov_b64 s[64:65], 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[62:63]
+; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[54:55]
; GFX9-DPP-NEXT: .LBB17_2: ; %atomicrmw.start
; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-DPP-NEXT: s_waitcnt vmcnt(0)
@@ -13853,31 +13853,31 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange at gotpcrel32@lo+4
; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange at gotpcrel32@hi+12
; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0
-; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81]
-; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:4
-; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0
+; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[68:69]
+; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[68:71], 0 offset:4
+; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[68:71], 0
; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[80:83], 0 offset:12
-; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:8
-; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[68:71], 0 offset:12
+; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[68:71], 0 offset:8
+; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX9-DPP-NEXT: s_mov_b32 s12, s51
; GFX9-DPP-NEXT: s_mov_b32 s13, s50
; GFX9-DPP-NEXT: s_mov_b32 s14, s33
; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40
-; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83]
+; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[70:71]
; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8
; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s62
-; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s63
+; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s54
+; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s55
; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8
; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[80:83], 0
-; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[80:83], 0 offset:4
+; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[68:71], 0
+; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[68:71], 0 offset:4
; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
; GFX9-DPP-NEXT: s_or_b64 s[64:65], vcc, s[64:65]
@@ -13908,7 +13908,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1064-DPP-NEXT: s_mov_b32 s33, s10
; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
@@ -13964,7 +13964,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1064-DPP-NEXT: s_cbranch_execz .LBB17_3
; GFX1064-DPP-NEXT: ; %bb.1:
; GFX1064-DPP-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24
-; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0
+; GFX1064-DPP-NEXT: s_mov_b64 s[54:55], 0
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53]
; GFX1064-DPP-NEXT: .LBB17_2: ; %atomicrmw.start
@@ -13988,7 +13988,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1064-DPP-NEXT: s_mov_b32 s12, s51
; GFX1064-DPP-NEXT: s_mov_b32 s13, s50
@@ -14005,8 +14005,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63]
+; GFX1064-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55]
; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB17_2
; GFX1064-DPP-NEXT: .LBB17_3:
; GFX1064-DPP-NEXT: s_endpgm
@@ -14033,7 +14033,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX1032-DPP-NEXT: s_mov_b32 s33, s10
; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v1, v2
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
@@ -14078,7 +14078,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1032-DPP-NEXT: v_mov_b32_e32 v41, v8
; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0
; GFX1032-DPP-NEXT: v_mov_b32_e32 v42, v9
-; GFX1032-DPP-NEXT: s_mov_b32 s62, 0
+; GFX1032-DPP-NEXT: s_mov_b32 s54, 0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032-DPP-NEXT: s_cbranch_execz .LBB17_3
@@ -14107,7 +14107,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65]
; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1032-DPP-NEXT: s_mov_b32 s12, s51
; GFX1032-DPP-NEXT: s_mov_b32 s13, s50
@@ -14124,8 +14124,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4
; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62
+; GFX1032-DPP-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s54
; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB17_2
; GFX1032-DPP-NEXT: .LBB17_3:
; GFX1032-DPP-NEXT: s_endpgm
@@ -14153,7 +14153,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1164-DPP-NEXT: s_mov_b32 s14, s33
; GFX1164-DPP-NEXT: s_mov_b32 s32, 32
; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0
-; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -14208,7 +14208,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1164-DPP-NEXT: s_cbranch_execz .LBB17_3
; GFX1164-DPP-NEXT: ; %bb.1:
; GFX1164-DPP-NEXT: s_load_b64 s[52:53], s[34:35], 0x24
-; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0
+; GFX1164-DPP-NEXT: s_mov_b64 s[54:55], 0
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[52:53]
; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x1
@@ -14232,7 +14232,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0
; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1164-DPP-NEXT: s_mov_b32 s12, s51
; GFX1164-DPP-NEXT: s_mov_b32 s13, s50
@@ -14246,8 +14246,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63]
-; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63]
+; GFX1164-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55]
+; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[54:55]
; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB17_2
; GFX1164-DPP-NEXT: .LBB17_3:
; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2
@@ -14276,7 +14276,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1132-DPP-NEXT: s_mov_b32 s32, 32
; GFX1132-DPP-NEXT: s_mov_b32 s33, s15
; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0
-; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3]
+; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3]
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
@@ -14318,7 +14318,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX1132-DPP-NEXT: v_mov_b32_e32 v42, v9
-; GFX1132-DPP-NEXT: s_mov_b32 s62, 0
+; GFX1132-DPP-NEXT: s_mov_b32 s54, 0
; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1
; GFX1132-DPP-NEXT: s_cbranch_execz .LBB17_3
@@ -14344,7 +14344,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0
; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37]
; GFX1132-DPP-NEXT: s_mov_b32 s12, s51
; GFX1132-DPP-NEXT: s_mov_b32 s13, s50
@@ -14357,8 +14357,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0
; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62
-; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62
+; GFX1132-DPP-NEXT: s_or_b32 s54, vcc_lo, s54
+; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54
; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB17_2
; GFX1132-DPP-NEXT: .LBB17_3:
; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2
diff --git a/llvm/test/CodeGen/AMDGPU/greedy-alloc-fail-sgpr1024-spill.mir b/llvm/test/CodeGen/AMDGPU/greedy-alloc-fail-sgpr1024-spill.mir
index 8ae89ad96a16b..da1175c02e94a 100644
--- a/llvm/test/CodeGen/AMDGPU/greedy-alloc-fail-sgpr1024-spill.mir
+++ b/llvm/test/CodeGen/AMDGPU/greedy-alloc-fail-sgpr1024-spill.mir
@@ -33,13 +33,9 @@ body: |
; CHECK-NEXT: renamable $sgpr33 = COPY $sgpr15
; CHECK-NEXT: renamable $sgpr50 = COPY $sgpr14
; CHECK-NEXT: renamable $sgpr36_sgpr37 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: renamable $sgpr46_sgpr47 = COPY $sgpr6_sgpr7
+ ; CHECK-NEXT: renamable $sgpr38_sgpr39 = COPY $sgpr6_sgpr7
; CHECK-NEXT: renamable $sgpr48_sgpr49 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: renamable $sgpr62_sgpr63 = S_LOAD_DWORDX2_IMM renamable $sgpr34_sgpr35, 0, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
- ; CHECK-NEXT: renamable $sgpr64 = S_MOV_B32 0
- ; CHECK-NEXT: renamable $sgpr65 = S_MOV_B32 0
- ; CHECK-NEXT: renamable $sgpr66 = S_MOV_B32 0
- ; CHECK-NEXT: renamable $sgpr67 = S_MOV_B32 0
+ ; CHECK-NEXT: renamable $sgpr64_sgpr65 = S_LOAD_DWORDX2_IMM renamable $sgpr34_sgpr35, 0, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
; CHECK-NEXT: renamable $sgpr68 = S_MOV_B32 0
; CHECK-NEXT: renamable $sgpr69 = S_MOV_B32 0
; CHECK-NEXT: renamable $sgpr70 = S_MOV_B32 0
@@ -57,14 +53,18 @@ body: |
; CHECK-NEXT: renamable $sgpr82 = S_MOV_B32 0
; CHECK-NEXT: renamable $sgpr83 = S_MOV_B32 0
; CHECK-NEXT: renamable $sgpr84 = S_MOV_B32 0
- ; CHECK-NEXT: SI_SPILL_S1024_SAVE renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s1024) into %stack.0, align 4, addrspace 5)
+ ; CHECK-NEXT: renamable $sgpr85 = S_MOV_B32 0
+ ; CHECK-NEXT: renamable $sgpr86 = S_MOV_B32 0
+ ; CHECK-NEXT: renamable $sgpr87 = S_MOV_B32 0
+ ; CHECK-NEXT: renamable $sgpr88 = S_MOV_B32 0
+ ; CHECK-NEXT: SI_SPILL_S1024_SAVE renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s1024) into %stack.0, align 4, addrspace 5)
; CHECK-NEXT: renamable $sgpr52_sgpr53 = IMPLICIT_DEF
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL renamable $sgpr52_sgpr53, 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
; CHECK-NEXT: $sgpr4_sgpr5 = COPY killed renamable $sgpr48_sgpr49
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY killed renamable $sgpr46_sgpr47
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY killed renamable $sgpr38_sgpr39
; CHECK-NEXT: $sgpr8_sgpr9 = COPY killed renamable $sgpr34_sgpr35
; CHECK-NEXT: $sgpr10_sgpr11 = COPY killed renamable $sgpr36_sgpr37
; CHECK-NEXT: $sgpr12 = COPY killed renamable $sgpr50
@@ -76,47 +76,47 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000)
- ; CHECK-NEXT: liveins: $sgpr4_sgpr5, $sgpr62_sgpr63:0x000000000000000F
+ ; CHECK-NEXT: liveins: $sgpr4_sgpr5, $sgpr64_sgpr65:0x000000000000000F
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $sgpr6_sgpr7 = COPY $exec, implicit-def $exec
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
- ; CHECK-NEXT: liveins: $sgpr4_sgpr5, $sgpr62_sgpr63:0x000000000000000F
+ ; CHECK-NEXT: liveins: $sgpr4_sgpr5, $sgpr64_sgpr65:0x000000000000000F
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = SI_SPILL_S1024_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.0, align 4, addrspace 5)
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_1024 = COPY renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
- ; CHECK-NEXT: renamable $sgpr6 = S_LSHL_B32 renamable $sgpr63, 1, implicit-def dead $scc
+ ; CHECK-NEXT: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99 = SI_SPILL_S1024_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.0, align 4, addrspace 5)
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_1024 = COPY renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99
+ ; CHECK-NEXT: renamable $sgpr6 = S_LSHL_B32 renamable $sgpr65, 1, implicit-def dead $scc
; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vreg_1024 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32 [[COPY]], 0, killed $sgpr6, 3, implicit-def $m0, implicit $m0, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.1(0x40000000)
- ; CHECK-NEXT: liveins: $sgpr4_sgpr5, $sgpr62_sgpr63:0x000000000000000F, $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95:0x0000000000000003
+ ; CHECK-NEXT: liveins: $sgpr4_sgpr5, $sgpr64_sgpr65:0x000000000000000F, $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99:0x0000000000000003
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $sgpr6_sgpr7 = S_OR_SAVEEXEC_B64 renamable $sgpr4_sgpr5, implicit-def $exec, implicit-def $scc, implicit $exec
- ; CHECK-NEXT: renamable $sgpr36 = COPY renamable $sgpr64
- ; CHECK-NEXT: renamable $sgpr37 = COPY renamable $sgpr64
- ; CHECK-NEXT: renamable $sgpr38 = COPY renamable $sgpr64
- ; CHECK-NEXT: renamable $sgpr39 = COPY renamable $sgpr64
- ; CHECK-NEXT: renamable $sgpr40 = COPY renamable $sgpr64
- ; CHECK-NEXT: renamable $sgpr41 = COPY renamable $sgpr64
- ; CHECK-NEXT: renamable $sgpr42 = COPY renamable $sgpr64
- ; CHECK-NEXT: renamable $sgpr43 = COPY renamable $sgpr64
- ; CHECK-NEXT: renamable $sgpr44 = COPY renamable $sgpr64
- ; CHECK-NEXT: renamable $sgpr45 = COPY renamable $sgpr64
- ; CHECK-NEXT: renamable $sgpr46 = COPY renamable $sgpr64
- ; CHECK-NEXT: renamable $sgpr47 = COPY renamable $sgpr64
- ; CHECK-NEXT: renamable $sgpr48 = COPY renamable $sgpr64
- ; CHECK-NEXT: renamable $sgpr49 = COPY renamable $sgpr64
- ; CHECK-NEXT: renamable $sgpr50 = COPY renamable $sgpr64
- ; CHECK-NEXT: renamable $sgpr51 = COPY renamable $sgpr64
- ; CHECK-NEXT: renamable $sgpr52 = COPY renamable $sgpr64
- ; CHECK-NEXT: renamable $sgpr53 = COPY renamable $sgpr64
- ; CHECK-NEXT: renamable $sgpr54 = COPY renamable $sgpr64
- ; CHECK-NEXT: renamable $sgpr55 = COPY renamable $sgpr64
- ; CHECK-NEXT: renamable $sgpr56 = COPY renamable $sgpr64
- ; CHECK-NEXT: renamable $sgpr57 = COPY killed renamable $sgpr64
+ ; CHECK-NEXT: renamable $sgpr36 = COPY renamable $sgpr68
+ ; CHECK-NEXT: renamable $sgpr37 = COPY renamable $sgpr68
+ ; CHECK-NEXT: renamable $sgpr38 = COPY renamable $sgpr68
+ ; CHECK-NEXT: renamable $sgpr39 = COPY renamable $sgpr68
+ ; CHECK-NEXT: renamable $sgpr40 = COPY renamable $sgpr68
+ ; CHECK-NEXT: renamable $sgpr41 = COPY renamable $sgpr68
+ ; CHECK-NEXT: renamable $sgpr42 = COPY renamable $sgpr68
+ ; CHECK-NEXT: renamable $sgpr43 = COPY renamable $sgpr68
+ ; CHECK-NEXT: renamable $sgpr44 = COPY renamable $sgpr68
+ ; CHECK-NEXT: renamable $sgpr45 = COPY renamable $sgpr68
+ ; CHECK-NEXT: renamable $sgpr46 = COPY renamable $sgpr68
+ ; CHECK-NEXT: renamable $sgpr47 = COPY renamable $sgpr68
+ ; CHECK-NEXT: renamable $sgpr48 = COPY renamable $sgpr68
+ ; CHECK-NEXT: renamable $sgpr49 = COPY renamable $sgpr68
+ ; CHECK-NEXT: renamable $sgpr50 = COPY renamable $sgpr68
+ ; CHECK-NEXT: renamable $sgpr51 = COPY renamable $sgpr68
+ ; CHECK-NEXT: renamable $sgpr52 = COPY renamable $sgpr68
+ ; CHECK-NEXT: renamable $sgpr53 = COPY renamable $sgpr68
+ ; CHECK-NEXT: renamable $sgpr54 = COPY renamable $sgpr68
+ ; CHECK-NEXT: renamable $sgpr55 = COPY renamable $sgpr68
+ ; CHECK-NEXT: renamable $sgpr56 = COPY renamable $sgpr68
+ ; CHECK-NEXT: renamable $sgpr57 = COPY killed renamable $sgpr68
; CHECK-NEXT: dead [[COPY1:%[0-9]+]]:vreg_1024 = COPY renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67, implicit $exec
; CHECK-NEXT: $exec = S_XOR_B64_term $exec, killed renamable $sgpr6_sgpr7, implicit-def $scc
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.5, implicit $exec
@@ -124,10 +124,10 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.5(0x80000000)
- ; CHECK-NEXT: liveins: $sgpr6_sgpr7, $sgpr62_sgpr63:0x0000000000000003
+ ; CHECK-NEXT: liveins: $sgpr6_sgpr7, $sgpr64_sgpr65:0x0000000000000003
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr6_sgpr7, implicit-def $scc
- ; CHECK-NEXT: dead renamable $sgpr4 = S_LSHL_B32 killed renamable $sgpr62, 1, implicit-def dead $scc
+ ; CHECK-NEXT: dead renamable $sgpr4 = S_LSHL_B32 killed renamable $sgpr64, 1, implicit-def dead $scc
; CHECK-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 = SI_SPILL_S1024_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.0, align 4, addrspace 5)
; CHECK-NEXT: dead [[COPY2:%[0-9]+]]:vreg_1024 = COPY killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67
; CHECK-NEXT: {{ $}}
diff --git a/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll b/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll
index 1a8557d25fb92..53c4a9cd229aa 100644
--- a/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll
+++ b/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll
@@ -12,17 +12,19 @@ define void @main(i1 %arg) #0 {
; CHECK-NEXT: s_mov_b64 exec, s[4:5]
; CHECK-NEXT: v_writelane_b32 v5, s36, 0
; CHECK-NEXT: v_writelane_b32 v5, s37, 1
-; CHECK-NEXT: v_writelane_b32 v5, s46, 2
-; CHECK-NEXT: v_writelane_b32 v5, s47, 3
+; CHECK-NEXT: v_writelane_b32 v5, s38, 2
+; CHECK-NEXT: v_writelane_b32 v5, s39, 3
; CHECK-NEXT: v_writelane_b32 v5, s48, 4
; CHECK-NEXT: v_writelane_b32 v5, s49, 5
; CHECK-NEXT: v_writelane_b32 v5, s50, 6
; CHECK-NEXT: v_writelane_b32 v5, s51, 7
-; CHECK-NEXT: s_getpc_b64 s[24:25]
; CHECK-NEXT: v_writelane_b32 v5, s52, 8
+; CHECK-NEXT: v_writelane_b32 v5, s53, 9
+; CHECK-NEXT: s_getpc_b64 s[24:25]
+; CHECK-NEXT: v_writelane_b32 v5, s54, 10
; CHECK-NEXT: s_movk_i32 s4, 0xf0
; CHECK-NEXT: s_mov_b32 s5, s24
-; CHECK-NEXT: v_writelane_b32 v5, s53, 9
+; CHECK-NEXT: v_writelane_b32 v5, s55, 11
; CHECK-NEXT: s_load_dwordx16 s[44:59], s[4:5], 0x0
; CHECK-NEXT: ; implicit-def: $vgpr7 : SGPR spill to VGPR lane
; CHECK-NEXT: s_mov_b64 s[4:5], 0
@@ -77,10 +79,10 @@ define void @main(i1 %arg) #0 {
; CHECK-NEXT: s_mov_b32 s27, s24
; CHECK-NEXT: v_writelane_b32 v7, s19, 31
; CHECK-NEXT: s_load_dwordx8 s[4:11], s[26:27], 0x0
-; CHECK-NEXT: v_writelane_b32 v5, s62, 10
-; CHECK-NEXT: v_writelane_b32 v5, s63, 11
; CHECK-NEXT: v_writelane_b32 v5, s64, 12
; CHECK-NEXT: v_writelane_b32 v5, s65, 13
+; CHECK-NEXT: v_writelane_b32 v5, s66, 14
+; CHECK-NEXT: s_movk_i32 s28, 0x1f0
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: v_writelane_b32 v7, s4, 32
; CHECK-NEXT: v_writelane_b32 v7, s5, 33
@@ -88,16 +90,14 @@ define void @main(i1 %arg) #0 {
; CHECK-NEXT: v_writelane_b32 v7, s7, 35
; CHECK-NEXT: v_writelane_b32 v7, s8, 36
; CHECK-NEXT: v_writelane_b32 v7, s9, 37
-; CHECK-NEXT: v_writelane_b32 v5, s66, 14
-; CHECK-NEXT: s_movk_i32 s28, 0x1f0
-; CHECK-NEXT: s_movk_i32 s70, 0x2f0
+; CHECK-NEXT: s_movk_i32 s72, 0x2f0
; CHECK-NEXT: s_mov_b32 s29, s24
-; CHECK-NEXT: s_mov_b32 s71, s24
+; CHECK-NEXT: s_mov_b32 s73, s24
; CHECK-NEXT: v_writelane_b32 v7, s10, 38
; CHECK-NEXT: v_writelane_b32 v5, s67, 15
; CHECK-NEXT: v_writelane_b32 v7, s11, 39
; CHECK-NEXT: s_load_dwordx16 s[52:67], s[28:29], 0x0
-; CHECK-NEXT: s_load_dwordx16 s[4:19], s[70:71], 0x0
+; CHECK-NEXT: s_load_dwordx16 s[4:19], s[72:73], 0x0
; CHECK-NEXT: v_and_b32_e32 v0, 1, v0
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
; CHECK-NEXT: s_xor_b64 s[24:25], vcc, -1
@@ -128,19 +128,19 @@ define void @main(i1 %arg) #0 {
; CHECK-NEXT: v_readlane_b32 s43, v7, 7
; CHECK-NEXT: .LBB0_2: ; %bb50
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: v_readlane_b32 s36, v7, 32
-; CHECK-NEXT: v_readlane_b32 s40, v7, 36
-; CHECK-NEXT: v_readlane_b32 s41, v7, 37
-; CHECK-NEXT: v_readlane_b32 s42, v7, 38
-; CHECK-NEXT: v_readlane_b32 s43, v7, 39
+; CHECK-NEXT: v_readlane_b32 s40, v7, 32
+; CHECK-NEXT: v_readlane_b32 s44, v7, 36
+; CHECK-NEXT: v_readlane_b32 s45, v7, 37
+; CHECK-NEXT: v_readlane_b32 s46, v7, 38
+; CHECK-NEXT: v_readlane_b32 s47, v7, 39
; CHECK-NEXT: s_mov_b32 s21, s20
; CHECK-NEXT: s_mov_b32 s22, s20
; CHECK-NEXT: s_mov_b32 s23, s20
-; CHECK-NEXT: v_readlane_b32 s37, v7, 33
-; CHECK-NEXT: v_readlane_b32 s38, v7, 34
+; CHECK-NEXT: v_readlane_b32 s41, v7, 33
+; CHECK-NEXT: v_readlane_b32 s42, v7, 34
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
-; CHECK-NEXT: image_sample_lz v4, v[1:2], s[60:67], s[40:43] dmask:0x1
-; CHECK-NEXT: v_readlane_b32 s39, v7, 35
+; CHECK-NEXT: image_sample_lz v4, v[1:2], s[60:67], s[44:47] dmask:0x1
+; CHECK-NEXT: v_readlane_b32 s43, v7, 35
; CHECK-NEXT: image_sample_lz v1, v[1:2], s[12:19], s[20:23] dmask:0x1
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: v_sub_f32_e32 v1, v1, v4
@@ -320,16 +320,16 @@ define void @main(i1 %arg) #0 {
; CHECK-NEXT: v_readlane_b32 s66, v5, 14
; CHECK-NEXT: v_readlane_b32 s65, v5, 13
; CHECK-NEXT: v_readlane_b32 s64, v5, 12
-; CHECK-NEXT: v_readlane_b32 s63, v5, 11
-; CHECK-NEXT: v_readlane_b32 s62, v5, 10
+; CHECK-NEXT: v_readlane_b32 s55, v5, 11
+; CHECK-NEXT: v_readlane_b32 s54, v5, 10
; CHECK-NEXT: v_readlane_b32 s53, v5, 9
; CHECK-NEXT: v_readlane_b32 s52, v5, 8
; CHECK-NEXT: v_readlane_b32 s51, v5, 7
; CHECK-NEXT: v_readlane_b32 s50, v5, 6
; CHECK-NEXT: v_readlane_b32 s49, v5, 5
; CHECK-NEXT: v_readlane_b32 s48, v5, 4
-; CHECK-NEXT: v_readlane_b32 s47, v5, 3
-; CHECK-NEXT: v_readlane_b32 s46, v5, 2
+; CHECK-NEXT: v_readlane_b32 s39, v5, 3
+; CHECK-NEXT: v_readlane_b32 s38, v5, 2
; CHECK-NEXT: v_readlane_b32 s37, v5, 1
; CHECK-NEXT: v_readlane_b32 s36, v5, 0
; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-call.ll b/llvm/test/CodeGen/AMDGPU/indirect-call.ll
index 8487e195de8e2..d7c4f6afbdade 100644
--- a/llvm/test/CodeGen/AMDGPU/indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-call.ll
@@ -134,16 +134,16 @@ define void @test_indirect_call_vgpr_ptr(ptr %fptr) {
; GCN-NEXT: v_writelane_b32 v40, s35, 3
; GCN-NEXT: v_writelane_b32 v40, s36, 4
; GCN-NEXT: v_writelane_b32 v40, s37, 5
-; GCN-NEXT: v_writelane_b32 v40, s46, 6
-; GCN-NEXT: v_writelane_b32 v40, s47, 7
+; GCN-NEXT: v_writelane_b32 v40, s38, 6
+; GCN-NEXT: v_writelane_b32 v40, s39, 7
; GCN-NEXT: v_writelane_b32 v40, s48, 8
; GCN-NEXT: v_writelane_b32 v40, s49, 9
; GCN-NEXT: v_writelane_b32 v40, s50, 10
; GCN-NEXT: v_writelane_b32 v40, s51, 11
; GCN-NEXT: v_writelane_b32 v40, s52, 12
; GCN-NEXT: v_writelane_b32 v40, s53, 13
-; GCN-NEXT: v_writelane_b32 v40, s62, 14
-; GCN-NEXT: v_writelane_b32 v40, s63, 15
+; GCN-NEXT: v_writelane_b32 v40, s54, 14
+; GCN-NEXT: v_writelane_b32 v40, s55, 15
; GCN-NEXT: v_writelane_b32 v40, s64, 16
; GCN-NEXT: v_writelane_b32 v40, s65, 17
; GCN-NEXT: s_mov_b32 s50, s15
@@ -152,16 +152,16 @@ define void @test_indirect_call_vgpr_ptr(ptr %fptr) {
; GCN-NEXT: s_mov_b32 s53, s12
; GCN-NEXT: s_mov_b64 s[34:35], s[10:11]
; GCN-NEXT: s_mov_b64 s[36:37], s[8:9]
-; GCN-NEXT: s_mov_b64 s[46:47], s[6:7]
+; GCN-NEXT: s_mov_b64 s[38:39], s[6:7]
; GCN-NEXT: s_mov_b64 s[48:49], s[4:5]
-; GCN-NEXT: s_mov_b64 s[62:63], exec
+; GCN-NEXT: s_mov_b64 s[54:55], exec
; GCN-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1
; GCN-NEXT: v_readfirstlane_b32 s16, v0
; GCN-NEXT: v_readfirstlane_b32 s17, v1
; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
; GCN-NEXT: s_and_saveexec_b64 s[64:65], vcc
; GCN-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GCN-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GCN-NEXT: s_mov_b64 s[6:7], s[38:39]
; GCN-NEXT: s_mov_b64 s[8:9], s[36:37]
; GCN-NEXT: s_mov_b64 s[10:11], s[34:35]
; GCN-NEXT: s_mov_b32 s12, s53
@@ -174,19 +174,19 @@ define void @test_indirect_call_vgpr_ptr(ptr %fptr) {
; GCN-NEXT: s_xor_b64 exec, exec, s[64:65]
; GCN-NEXT: s_cbranch_execnz .LBB2_1
; GCN-NEXT: ; %bb.2:
-; GCN-NEXT: s_mov_b64 exec, s[62:63]
+; GCN-NEXT: s_mov_b64 exec, s[54:55]
; GCN-NEXT: v_readlane_b32 s65, v40, 17
; GCN-NEXT: v_readlane_b32 s64, v40, 16
-; GCN-NEXT: v_readlane_b32 s63, v40, 15
-; GCN-NEXT: v_readlane_b32 s62, v40, 14
+; GCN-NEXT: v_readlane_b32 s55, v40, 15
+; GCN-NEXT: v_readlane_b32 s54, v40, 14
; GCN-NEXT: v_readlane_b32 s53, v40, 13
; GCN-NEXT: v_readlane_b32 s52, v40, 12
; GCN-NEXT: v_readlane_b32 s51, v40, 11
; GCN-NEXT: v_readlane_b32 s50, v40, 10
; GCN-NEXT: v_readlane_b32 s49, v40, 9
; GCN-NEXT: v_readlane_b32 s48, v40, 8
-; GCN-NEXT: v_readlane_b32 s47, v40, 7
-; GCN-NEXT: v_readlane_b32 s46, v40, 6
+; GCN-NEXT: v_readlane_b32 s39, v40, 7
+; GCN-NEXT: v_readlane_b32 s38, v40, 6
; GCN-NEXT: v_readlane_b32 s37, v40, 5
; GCN-NEXT: v_readlane_b32 s36, v40, 4
; GCN-NEXT: v_readlane_b32 s35, v40, 3
@@ -218,16 +218,16 @@ define void @test_indirect_call_vgpr_ptr(ptr %fptr) {
; GISEL-NEXT: v_writelane_b32 v40, s35, 3
; GISEL-NEXT: v_writelane_b32 v40, s36, 4
; GISEL-NEXT: v_writelane_b32 v40, s37, 5
-; GISEL-NEXT: v_writelane_b32 v40, s46, 6
-; GISEL-NEXT: v_writelane_b32 v40, s47, 7
+; GISEL-NEXT: v_writelane_b32 v40, s38, 6
+; GISEL-NEXT: v_writelane_b32 v40, s39, 7
; GISEL-NEXT: v_writelane_b32 v40, s48, 8
; GISEL-NEXT: v_writelane_b32 v40, s49, 9
; GISEL-NEXT: v_writelane_b32 v40, s50, 10
; GISEL-NEXT: v_writelane_b32 v40, s51, 11
; GISEL-NEXT: v_writelane_b32 v40, s52, 12
; GISEL-NEXT: v_writelane_b32 v40, s53, 13
-; GISEL-NEXT: v_writelane_b32 v40, s62, 14
-; GISEL-NEXT: v_writelane_b32 v40, s63, 15
+; GISEL-NEXT: v_writelane_b32 v40, s54, 14
+; GISEL-NEXT: v_writelane_b32 v40, s55, 15
; GISEL-NEXT: v_writelane_b32 v40, s64, 16
; GISEL-NEXT: v_writelane_b32 v40, s65, 17
; GISEL-NEXT: s_mov_b32 s50, s15
@@ -236,16 +236,16 @@ define void @test_indirect_call_vgpr_ptr(ptr %fptr) {
; GISEL-NEXT: s_mov_b32 s53, s12
; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11]
; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9]
-; GISEL-NEXT: s_mov_b64 s[46:47], s[6:7]
+; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7]
; GISEL-NEXT: s_mov_b64 s[48:49], s[4:5]
-; GISEL-NEXT: s_mov_b64 s[62:63], exec
+; GISEL-NEXT: s_mov_b64 s[54:55], exec
; GISEL-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1
; GISEL-NEXT: v_readfirstlane_b32 s16, v0
; GISEL-NEXT: v_readfirstlane_b32 s17, v1
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
; GISEL-NEXT: s_and_saveexec_b64 s[64:65], vcc
; GISEL-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GISEL-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39]
; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37]
; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35]
; GISEL-NEXT: s_mov_b32 s12, s53
@@ -258,19 +258,19 @@ define void @test_indirect_call_vgpr_ptr(ptr %fptr) {
; GISEL-NEXT: s_xor_b64 exec, exec, s[64:65]
; GISEL-NEXT: s_cbranch_execnz .LBB2_1
; GISEL-NEXT: ; %bb.2:
-; GISEL-NEXT: s_mov_b64 exec, s[62:63]
+; GISEL-NEXT: s_mov_b64 exec, s[54:55]
; GISEL-NEXT: v_readlane_b32 s65, v40, 17
; GISEL-NEXT: v_readlane_b32 s64, v40, 16
-; GISEL-NEXT: v_readlane_b32 s63, v40, 15
-; GISEL-NEXT: v_readlane_b32 s62, v40, 14
+; GISEL-NEXT: v_readlane_b32 s55, v40, 15
+; GISEL-NEXT: v_readlane_b32 s54, v40, 14
; GISEL-NEXT: v_readlane_b32 s53, v40, 13
; GISEL-NEXT: v_readlane_b32 s52, v40, 12
; GISEL-NEXT: v_readlane_b32 s51, v40, 11
; GISEL-NEXT: v_readlane_b32 s50, v40, 10
; GISEL-NEXT: v_readlane_b32 s49, v40, 9
; GISEL-NEXT: v_readlane_b32 s48, v40, 8
-; GISEL-NEXT: v_readlane_b32 s47, v40, 7
-; GISEL-NEXT: v_readlane_b32 s46, v40, 6
+; GISEL-NEXT: v_readlane_b32 s39, v40, 7
+; GISEL-NEXT: v_readlane_b32 s38, v40, 6
; GISEL-NEXT: v_readlane_b32 s37, v40, 5
; GISEL-NEXT: v_readlane_b32 s36, v40, 4
; GISEL-NEXT: v_readlane_b32 s35, v40, 3
@@ -306,16 +306,16 @@ define void @test_indirect_call_vgpr_ptr_arg(ptr %fptr) {
; GCN-NEXT: v_writelane_b32 v40, s35, 3
; GCN-NEXT: v_writelane_b32 v40, s36, 4
; GCN-NEXT: v_writelane_b32 v40, s37, 5
-; GCN-NEXT: v_writelane_b32 v40, s46, 6
-; GCN-NEXT: v_writelane_b32 v40, s47, 7
+; GCN-NEXT: v_writelane_b32 v40, s38, 6
+; GCN-NEXT: v_writelane_b32 v40, s39, 7
; GCN-NEXT: v_writelane_b32 v40, s48, 8
; GCN-NEXT: v_writelane_b32 v40, s49, 9
; GCN-NEXT: v_writelane_b32 v40, s50, 10
; GCN-NEXT: v_writelane_b32 v40, s51, 11
; GCN-NEXT: v_writelane_b32 v40, s52, 12
; GCN-NEXT: v_writelane_b32 v40, s53, 13
-; GCN-NEXT: v_writelane_b32 v40, s62, 14
-; GCN-NEXT: v_writelane_b32 v40, s63, 15
+; GCN-NEXT: v_writelane_b32 v40, s54, 14
+; GCN-NEXT: v_writelane_b32 v40, s55, 15
; GCN-NEXT: v_writelane_b32 v40, s64, 16
; GCN-NEXT: v_writelane_b32 v40, s65, 17
; GCN-NEXT: s_mov_b32 s50, s15
@@ -324,9 +324,9 @@ define void @test_indirect_call_vgpr_ptr_arg(ptr %fptr) {
; GCN-NEXT: s_mov_b32 s53, s12
; GCN-NEXT: s_mov_b64 s[34:35], s[10:11]
; GCN-NEXT: s_mov_b64 s[36:37], s[8:9]
-; GCN-NEXT: s_mov_b64 s[46:47], s[6:7]
+; GCN-NEXT: s_mov_b64 s[38:39], s[6:7]
; GCN-NEXT: s_mov_b64 s[48:49], s[4:5]
-; GCN-NEXT: s_mov_b64 s[62:63], exec
+; GCN-NEXT: s_mov_b64 s[54:55], exec
; GCN-NEXT: v_mov_b32_e32 v2, 0x7b
; GCN-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
; GCN-NEXT: v_readfirstlane_b32 s16, v0
@@ -334,7 +334,7 @@ define void @test_indirect_call_vgpr_ptr_arg(ptr %fptr) {
; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
; GCN-NEXT: s_and_saveexec_b64 s[64:65], vcc
; GCN-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GCN-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GCN-NEXT: s_mov_b64 s[6:7], s[38:39]
; GCN-NEXT: s_mov_b64 s[8:9], s[36:37]
; GCN-NEXT: s_mov_b64 s[10:11], s[34:35]
; GCN-NEXT: s_mov_b32 s12, s53
@@ -349,19 +349,19 @@ define void @test_indirect_call_vgpr_ptr_arg(ptr %fptr) {
; GCN-NEXT: s_xor_b64 exec, exec, s[64:65]
; GCN-NEXT: s_cbranch_execnz .LBB3_1
; GCN-NEXT: ; %bb.2:
-; GCN-NEXT: s_mov_b64 exec, s[62:63]
+; GCN-NEXT: s_mov_b64 exec, s[54:55]
; GCN-NEXT: v_readlane_b32 s65, v40, 17
; GCN-NEXT: v_readlane_b32 s64, v40, 16
-; GCN-NEXT: v_readlane_b32 s63, v40, 15
-; GCN-NEXT: v_readlane_b32 s62, v40, 14
+; GCN-NEXT: v_readlane_b32 s55, v40, 15
+; GCN-NEXT: v_readlane_b32 s54, v40, 14
; GCN-NEXT: v_readlane_b32 s53, v40, 13
; GCN-NEXT: v_readlane_b32 s52, v40, 12
; GCN-NEXT: v_readlane_b32 s51, v40, 11
; GCN-NEXT: v_readlane_b32 s50, v40, 10
; GCN-NEXT: v_readlane_b32 s49, v40, 9
; GCN-NEXT: v_readlane_b32 s48, v40, 8
-; GCN-NEXT: v_readlane_b32 s47, v40, 7
-; GCN-NEXT: v_readlane_b32 s46, v40, 6
+; GCN-NEXT: v_readlane_b32 s39, v40, 7
+; GCN-NEXT: v_readlane_b32 s38, v40, 6
; GCN-NEXT: v_readlane_b32 s37, v40, 5
; GCN-NEXT: v_readlane_b32 s36, v40, 4
; GCN-NEXT: v_readlane_b32 s35, v40, 3
@@ -393,16 +393,16 @@ define void @test_indirect_call_vgpr_ptr_arg(ptr %fptr) {
; GISEL-NEXT: v_writelane_b32 v40, s35, 3
; GISEL-NEXT: v_writelane_b32 v40, s36, 4
; GISEL-NEXT: v_writelane_b32 v40, s37, 5
-; GISEL-NEXT: v_writelane_b32 v40, s46, 6
-; GISEL-NEXT: v_writelane_b32 v40, s47, 7
+; GISEL-NEXT: v_writelane_b32 v40, s38, 6
+; GISEL-NEXT: v_writelane_b32 v40, s39, 7
; GISEL-NEXT: v_writelane_b32 v40, s48, 8
; GISEL-NEXT: v_writelane_b32 v40, s49, 9
; GISEL-NEXT: v_writelane_b32 v40, s50, 10
; GISEL-NEXT: v_writelane_b32 v40, s51, 11
; GISEL-NEXT: v_writelane_b32 v40, s52, 12
; GISEL-NEXT: v_writelane_b32 v40, s53, 13
-; GISEL-NEXT: v_writelane_b32 v40, s62, 14
-; GISEL-NEXT: v_writelane_b32 v40, s63, 15
+; GISEL-NEXT: v_writelane_b32 v40, s54, 14
+; GISEL-NEXT: v_writelane_b32 v40, s55, 15
; GISEL-NEXT: v_writelane_b32 v40, s64, 16
; GISEL-NEXT: v_writelane_b32 v40, s65, 17
; GISEL-NEXT: s_mov_b32 s50, s15
@@ -411,9 +411,9 @@ define void @test_indirect_call_vgpr_ptr_arg(ptr %fptr) {
; GISEL-NEXT: s_mov_b32 s53, s12
; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11]
; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9]
-; GISEL-NEXT: s_mov_b64 s[46:47], s[6:7]
+; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7]
; GISEL-NEXT: s_mov_b64 s[48:49], s[4:5]
-; GISEL-NEXT: s_mov_b64 s[62:63], exec
+; GISEL-NEXT: s_mov_b64 s[54:55], exec
; GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
; GISEL-NEXT: v_readfirstlane_b32 s16, v0
; GISEL-NEXT: v_readfirstlane_b32 s17, v1
@@ -421,7 +421,7 @@ define void @test_indirect_call_vgpr_ptr_arg(ptr %fptr) {
; GISEL-NEXT: s_and_saveexec_b64 s[64:65], vcc
; GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
; GISEL-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GISEL-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39]
; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37]
; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35]
; GISEL-NEXT: s_mov_b32 s12, s53
@@ -434,19 +434,19 @@ define void @test_indirect_call_vgpr_ptr_arg(ptr %fptr) {
; GISEL-NEXT: s_xor_b64 exec, exec, s[64:65]
; GISEL-NEXT: s_cbranch_execnz .LBB3_1
; GISEL-NEXT: ; %bb.2:
-; GISEL-NEXT: s_mov_b64 exec, s[62:63]
+; GISEL-NEXT: s_mov_b64 exec, s[54:55]
; GISEL-NEXT: v_readlane_b32 s65, v40, 17
; GISEL-NEXT: v_readlane_b32 s64, v40, 16
-; GISEL-NEXT: v_readlane_b32 s63, v40, 15
-; GISEL-NEXT: v_readlane_b32 s62, v40, 14
+; GISEL-NEXT: v_readlane_b32 s55, v40, 15
+; GISEL-NEXT: v_readlane_b32 s54, v40, 14
; GISEL-NEXT: v_readlane_b32 s53, v40, 13
; GISEL-NEXT: v_readlane_b32 s52, v40, 12
; GISEL-NEXT: v_readlane_b32 s51, v40, 11
; GISEL-NEXT: v_readlane_b32 s50, v40, 10
; GISEL-NEXT: v_readlane_b32 s49, v40, 9
; GISEL-NEXT: v_readlane_b32 s48, v40, 8
-; GISEL-NEXT: v_readlane_b32 s47, v40, 7
-; GISEL-NEXT: v_readlane_b32 s46, v40, 6
+; GISEL-NEXT: v_readlane_b32 s39, v40, 7
+; GISEL-NEXT: v_readlane_b32 s38, v40, 6
; GISEL-NEXT: v_readlane_b32 s37, v40, 5
; GISEL-NEXT: v_readlane_b32 s36, v40, 4
; GISEL-NEXT: v_readlane_b32 s35, v40, 3
@@ -482,16 +482,16 @@ define i32 @test_indirect_call_vgpr_ptr_ret(ptr %fptr) {
; GCN-NEXT: v_writelane_b32 v40, s35, 3
; GCN-NEXT: v_writelane_b32 v40, s36, 4
; GCN-NEXT: v_writelane_b32 v40, s37, 5
-; GCN-NEXT: v_writelane_b32 v40, s46, 6
-; GCN-NEXT: v_writelane_b32 v40, s47, 7
+; GCN-NEXT: v_writelane_b32 v40, s38, 6
+; GCN-NEXT: v_writelane_b32 v40, s39, 7
; GCN-NEXT: v_writelane_b32 v40, s48, 8
; GCN-NEXT: v_writelane_b32 v40, s49, 9
; GCN-NEXT: v_writelane_b32 v40, s50, 10
; GCN-NEXT: v_writelane_b32 v40, s51, 11
; GCN-NEXT: v_writelane_b32 v40, s52, 12
; GCN-NEXT: v_writelane_b32 v40, s53, 13
-; GCN-NEXT: v_writelane_b32 v40, s62, 14
-; GCN-NEXT: v_writelane_b32 v40, s63, 15
+; GCN-NEXT: v_writelane_b32 v40, s54, 14
+; GCN-NEXT: v_writelane_b32 v40, s55, 15
; GCN-NEXT: v_writelane_b32 v40, s64, 16
; GCN-NEXT: v_writelane_b32 v40, s65, 17
; GCN-NEXT: s_mov_b32 s50, s15
@@ -500,16 +500,16 @@ define i32 @test_indirect_call_vgpr_ptr_ret(ptr %fptr) {
; GCN-NEXT: s_mov_b32 s53, s12
; GCN-NEXT: s_mov_b64 s[34:35], s[10:11]
; GCN-NEXT: s_mov_b64 s[36:37], s[8:9]
-; GCN-NEXT: s_mov_b64 s[46:47], s[6:7]
+; GCN-NEXT: s_mov_b64 s[38:39], s[6:7]
; GCN-NEXT: s_mov_b64 s[48:49], s[4:5]
-; GCN-NEXT: s_mov_b64 s[62:63], exec
+; GCN-NEXT: s_mov_b64 s[54:55], exec
; GCN-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GCN-NEXT: v_readfirstlane_b32 s16, v0
; GCN-NEXT: v_readfirstlane_b32 s17, v1
; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
; GCN-NEXT: s_and_saveexec_b64 s[64:65], vcc
; GCN-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GCN-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GCN-NEXT: s_mov_b64 s[6:7], s[38:39]
; GCN-NEXT: s_mov_b64 s[8:9], s[36:37]
; GCN-NEXT: s_mov_b64 s[10:11], s[34:35]
; GCN-NEXT: s_mov_b32 s12, s53
@@ -523,20 +523,20 @@ define i32 @test_indirect_call_vgpr_ptr_ret(ptr %fptr) {
; GCN-NEXT: s_xor_b64 exec, exec, s[64:65]
; GCN-NEXT: s_cbranch_execnz .LBB4_1
; GCN-NEXT: ; %bb.2:
-; GCN-NEXT: s_mov_b64 exec, s[62:63]
+; GCN-NEXT: s_mov_b64 exec, s[54:55]
; GCN-NEXT: v_add_i32_e32 v0, vcc, 1, v2
; GCN-NEXT: v_readlane_b32 s65, v40, 17
; GCN-NEXT: v_readlane_b32 s64, v40, 16
-; GCN-NEXT: v_readlane_b32 s63, v40, 15
-; GCN-NEXT: v_readlane_b32 s62, v40, 14
+; GCN-NEXT: v_readlane_b32 s55, v40, 15
+; GCN-NEXT: v_readlane_b32 s54, v40, 14
; GCN-NEXT: v_readlane_b32 s53, v40, 13
; GCN-NEXT: v_readlane_b32 s52, v40, 12
; GCN-NEXT: v_readlane_b32 s51, v40, 11
; GCN-NEXT: v_readlane_b32 s50, v40, 10
; GCN-NEXT: v_readlane_b32 s49, v40, 9
; GCN-NEXT: v_readlane_b32 s48, v40, 8
-; GCN-NEXT: v_readlane_b32 s47, v40, 7
-; GCN-NEXT: v_readlane_b32 s46, v40, 6
+; GCN-NEXT: v_readlane_b32 s39, v40, 7
+; GCN-NEXT: v_readlane_b32 s38, v40, 6
; GCN-NEXT: v_readlane_b32 s37, v40, 5
; GCN-NEXT: v_readlane_b32 s36, v40, 4
; GCN-NEXT: v_readlane_b32 s35, v40, 3
@@ -568,16 +568,16 @@ define i32 @test_indirect_call_vgpr_ptr_ret(ptr %fptr) {
; GISEL-NEXT: v_writelane_b32 v40, s35, 3
; GISEL-NEXT: v_writelane_b32 v40, s36, 4
; GISEL-NEXT: v_writelane_b32 v40, s37, 5
-; GISEL-NEXT: v_writelane_b32 v40, s46, 6
-; GISEL-NEXT: v_writelane_b32 v40, s47, 7
+; GISEL-NEXT: v_writelane_b32 v40, s38, 6
+; GISEL-NEXT: v_writelane_b32 v40, s39, 7
; GISEL-NEXT: v_writelane_b32 v40, s48, 8
; GISEL-NEXT: v_writelane_b32 v40, s49, 9
; GISEL-NEXT: v_writelane_b32 v40, s50, 10
; GISEL-NEXT: v_writelane_b32 v40, s51, 11
; GISEL-NEXT: v_writelane_b32 v40, s52, 12
; GISEL-NEXT: v_writelane_b32 v40, s53, 13
-; GISEL-NEXT: v_writelane_b32 v40, s62, 14
-; GISEL-NEXT: v_writelane_b32 v40, s63, 15
+; GISEL-NEXT: v_writelane_b32 v40, s54, 14
+; GISEL-NEXT: v_writelane_b32 v40, s55, 15
; GISEL-NEXT: v_writelane_b32 v40, s64, 16
; GISEL-NEXT: v_writelane_b32 v40, s65, 17
; GISEL-NEXT: s_mov_b32 s50, s15
@@ -586,16 +586,16 @@ define i32 @test_indirect_call_vgpr_ptr_ret(ptr %fptr) {
; GISEL-NEXT: s_mov_b32 s53, s12
; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11]
; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9]
-; GISEL-NEXT: s_mov_b64 s[46:47], s[6:7]
+; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7]
; GISEL-NEXT: s_mov_b64 s[48:49], s[4:5]
-; GISEL-NEXT: s_mov_b64 s[62:63], exec
+; GISEL-NEXT: s_mov_b64 s[54:55], exec
; GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GISEL-NEXT: v_readfirstlane_b32 s16, v0
; GISEL-NEXT: v_readfirstlane_b32 s17, v1
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
; GISEL-NEXT: s_and_saveexec_b64 s[64:65], vcc
; GISEL-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GISEL-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39]
; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37]
; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35]
; GISEL-NEXT: s_mov_b32 s12, s53
@@ -609,20 +609,20 @@ define i32 @test_indirect_call_vgpr_ptr_ret(ptr %fptr) {
; GISEL-NEXT: s_xor_b64 exec, exec, s[64:65]
; GISEL-NEXT: s_cbranch_execnz .LBB4_1
; GISEL-NEXT: ; %bb.2:
-; GISEL-NEXT: s_mov_b64 exec, s[62:63]
+; GISEL-NEXT: s_mov_b64 exec, s[54:55]
; GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v1
; GISEL-NEXT: v_readlane_b32 s65, v40, 17
; GISEL-NEXT: v_readlane_b32 s64, v40, 16
-; GISEL-NEXT: v_readlane_b32 s63, v40, 15
-; GISEL-NEXT: v_readlane_b32 s62, v40, 14
+; GISEL-NEXT: v_readlane_b32 s55, v40, 15
+; GISEL-NEXT: v_readlane_b32 s54, v40, 14
; GISEL-NEXT: v_readlane_b32 s53, v40, 13
; GISEL-NEXT: v_readlane_b32 s52, v40, 12
; GISEL-NEXT: v_readlane_b32 s51, v40, 11
; GISEL-NEXT: v_readlane_b32 s50, v40, 10
; GISEL-NEXT: v_readlane_b32 s49, v40, 9
; GISEL-NEXT: v_readlane_b32 s48, v40, 8
-; GISEL-NEXT: v_readlane_b32 s47, v40, 7
-; GISEL-NEXT: v_readlane_b32 s46, v40, 6
+; GISEL-NEXT: v_readlane_b32 s39, v40, 7
+; GISEL-NEXT: v_readlane_b32 s38, v40, 6
; GISEL-NEXT: v_readlane_b32 s37, v40, 5
; GISEL-NEXT: v_readlane_b32 s36, v40, 4
; GISEL-NEXT: v_readlane_b32 s35, v40, 3
@@ -659,16 +659,16 @@ define void @test_indirect_call_vgpr_ptr_in_branch(ptr %fptr, i1 %cond) {
; GCN-NEXT: v_writelane_b32 v40, s35, 3
; GCN-NEXT: v_writelane_b32 v40, s36, 4
; GCN-NEXT: v_writelane_b32 v40, s37, 5
-; GCN-NEXT: v_writelane_b32 v40, s46, 6
-; GCN-NEXT: v_writelane_b32 v40, s47, 7
+; GCN-NEXT: v_writelane_b32 v40, s38, 6
+; GCN-NEXT: v_writelane_b32 v40, s39, 7
; GCN-NEXT: v_writelane_b32 v40, s48, 8
; GCN-NEXT: v_writelane_b32 v40, s49, 9
; GCN-NEXT: v_writelane_b32 v40, s50, 10
; GCN-NEXT: v_writelane_b32 v40, s51, 11
; GCN-NEXT: v_writelane_b32 v40, s52, 12
; GCN-NEXT: v_writelane_b32 v40, s53, 13
-; GCN-NEXT: v_writelane_b32 v40, s62, 14
-; GCN-NEXT: v_writelane_b32 v40, s63, 15
+; GCN-NEXT: v_writelane_b32 v40, s54, 14
+; GCN-NEXT: v_writelane_b32 v40, s55, 15
; GCN-NEXT: v_writelane_b32 v40, s64, 16
; GCN-NEXT: v_writelane_b32 v40, s65, 17
; GCN-NEXT: v_writelane_b32 v40, s66, 18
@@ -679,11 +679,11 @@ define void @test_indirect_call_vgpr_ptr_in_branch(ptr %fptr, i1 %cond) {
; GCN-NEXT: s_mov_b32 s53, s12
; GCN-NEXT: s_mov_b64 s[34:35], s[10:11]
; GCN-NEXT: s_mov_b64 s[36:37], s[8:9]
-; GCN-NEXT: s_mov_b64 s[46:47], s[6:7]
+; GCN-NEXT: s_mov_b64 s[38:39], s[6:7]
; GCN-NEXT: s_mov_b64 s[48:49], s[4:5]
; GCN-NEXT: v_and_b32_e32 v2, 1, v2
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
-; GCN-NEXT: s_and_saveexec_b64 s[62:63], vcc
+; GCN-NEXT: s_and_saveexec_b64 s[54:55], vcc
; GCN-NEXT: s_cbranch_execz .LBB5_4
; GCN-NEXT: ; %bb.1: ; %bb1
; GCN-NEXT: s_mov_b64 s[64:65], exec
@@ -693,7 +693,7 @@ define void @test_indirect_call_vgpr_ptr_in_branch(ptr %fptr, i1 %cond) {
; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
; GCN-NEXT: s_and_saveexec_b64 s[66:67], vcc
; GCN-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GCN-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GCN-NEXT: s_mov_b64 s[6:7], s[38:39]
; GCN-NEXT: s_mov_b64 s[8:9], s[36:37]
; GCN-NEXT: s_mov_b64 s[10:11], s[34:35]
; GCN-NEXT: s_mov_b32 s12, s53
@@ -708,21 +708,21 @@ define void @test_indirect_call_vgpr_ptr_in_branch(ptr %fptr, i1 %cond) {
; GCN-NEXT: ; %bb.3:
; GCN-NEXT: s_mov_b64 exec, s[64:65]
; GCN-NEXT: .LBB5_4: ; %bb2
-; GCN-NEXT: s_or_b64 exec, exec, s[62:63]
+; GCN-NEXT: s_or_b64 exec, exec, s[54:55]
; GCN-NEXT: v_readlane_b32 s67, v40, 19
; GCN-NEXT: v_readlane_b32 s66, v40, 18
; GCN-NEXT: v_readlane_b32 s65, v40, 17
; GCN-NEXT: v_readlane_b32 s64, v40, 16
-; GCN-NEXT: v_readlane_b32 s63, v40, 15
-; GCN-NEXT: v_readlane_b32 s62, v40, 14
+; GCN-NEXT: v_readlane_b32 s55, v40, 15
+; GCN-NEXT: v_readlane_b32 s54, v40, 14
; GCN-NEXT: v_readlane_b32 s53, v40, 13
; GCN-NEXT: v_readlane_b32 s52, v40, 12
; GCN-NEXT: v_readlane_b32 s51, v40, 11
; GCN-NEXT: v_readlane_b32 s50, v40, 10
; GCN-NEXT: v_readlane_b32 s49, v40, 9
; GCN-NEXT: v_readlane_b32 s48, v40, 8
-; GCN-NEXT: v_readlane_b32 s47, v40, 7
-; GCN-NEXT: v_readlane_b32 s46, v40, 6
+; GCN-NEXT: v_readlane_b32 s39, v40, 7
+; GCN-NEXT: v_readlane_b32 s38, v40, 6
; GCN-NEXT: v_readlane_b32 s37, v40, 5
; GCN-NEXT: v_readlane_b32 s36, v40, 4
; GCN-NEXT: v_readlane_b32 s35, v40, 3
@@ -754,16 +754,16 @@ define void @test_indirect_call_vgpr_ptr_in_branch(ptr %fptr, i1 %cond) {
; GISEL-NEXT: v_writelane_b32 v40, s35, 3
; GISEL-NEXT: v_writelane_b32 v40, s36, 4
; GISEL-NEXT: v_writelane_b32 v40, s37, 5
-; GISEL-NEXT: v_writelane_b32 v40, s46, 6
-; GISEL-NEXT: v_writelane_b32 v40, s47, 7
+; GISEL-NEXT: v_writelane_b32 v40, s38, 6
+; GISEL-NEXT: v_writelane_b32 v40, s39, 7
; GISEL-NEXT: v_writelane_b32 v40, s48, 8
; GISEL-NEXT: v_writelane_b32 v40, s49, 9
; GISEL-NEXT: v_writelane_b32 v40, s50, 10
; GISEL-NEXT: v_writelane_b32 v40, s51, 11
; GISEL-NEXT: v_writelane_b32 v40, s52, 12
; GISEL-NEXT: v_writelane_b32 v40, s53, 13
-; GISEL-NEXT: v_writelane_b32 v40, s62, 14
-; GISEL-NEXT: v_writelane_b32 v40, s63, 15
+; GISEL-NEXT: v_writelane_b32 v40, s54, 14
+; GISEL-NEXT: v_writelane_b32 v40, s55, 15
; GISEL-NEXT: v_writelane_b32 v40, s64, 16
; GISEL-NEXT: v_writelane_b32 v40, s65, 17
; GISEL-NEXT: v_writelane_b32 v40, s66, 18
@@ -774,11 +774,11 @@ define void @test_indirect_call_vgpr_ptr_in_branch(ptr %fptr, i1 %cond) {
; GISEL-NEXT: s_mov_b32 s53, s12
; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11]
; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9]
-; GISEL-NEXT: s_mov_b64 s[46:47], s[6:7]
+; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7]
; GISEL-NEXT: s_mov_b64 s[48:49], s[4:5]
; GISEL-NEXT: v_and_b32_e32 v2, 1, v2
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
-; GISEL-NEXT: s_and_saveexec_b64 s[62:63], vcc
+; GISEL-NEXT: s_and_saveexec_b64 s[54:55], vcc
; GISEL-NEXT: s_cbranch_execz .LBB5_4
; GISEL-NEXT: ; %bb.1: ; %bb1
; GISEL-NEXT: s_mov_b64 s[64:65], exec
@@ -788,7 +788,7 @@ define void @test_indirect_call_vgpr_ptr_in_branch(ptr %fptr, i1 %cond) {
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
; GISEL-NEXT: s_and_saveexec_b64 s[66:67], vcc
; GISEL-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GISEL-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39]
; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37]
; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35]
; GISEL-NEXT: s_mov_b32 s12, s53
@@ -803,21 +803,21 @@ define void @test_indirect_call_vgpr_ptr_in_branch(ptr %fptr, i1 %cond) {
; GISEL-NEXT: ; %bb.3:
; GISEL-NEXT: s_mov_b64 exec, s[64:65]
; GISEL-NEXT: .LBB5_4: ; %bb2
-; GISEL-NEXT: s_or_b64 exec, exec, s[62:63]
+; GISEL-NEXT: s_or_b64 exec, exec, s[54:55]
; GISEL-NEXT: v_readlane_b32 s67, v40, 19
; GISEL-NEXT: v_readlane_b32 s66, v40, 18
; GISEL-NEXT: v_readlane_b32 s65, v40, 17
; GISEL-NEXT: v_readlane_b32 s64, v40, 16
-; GISEL-NEXT: v_readlane_b32 s63, v40, 15
-; GISEL-NEXT: v_readlane_b32 s62, v40, 14
+; GISEL-NEXT: v_readlane_b32 s55, v40, 15
+; GISEL-NEXT: v_readlane_b32 s54, v40, 14
; GISEL-NEXT: v_readlane_b32 s53, v40, 13
; GISEL-NEXT: v_readlane_b32 s52, v40, 12
; GISEL-NEXT: v_readlane_b32 s51, v40, 11
; GISEL-NEXT: v_readlane_b32 s50, v40, 10
; GISEL-NEXT: v_readlane_b32 s49, v40, 9
; GISEL-NEXT: v_readlane_b32 s48, v40, 8
-; GISEL-NEXT: v_readlane_b32 s47, v40, 7
-; GISEL-NEXT: v_readlane_b32 s46, v40, 6
+; GISEL-NEXT: v_readlane_b32 s39, v40, 7
+; GISEL-NEXT: v_readlane_b32 s38, v40, 6
; GISEL-NEXT: v_readlane_b32 s37, v40, 5
; GISEL-NEXT: v_readlane_b32 s36, v40, 4
; GISEL-NEXT: v_readlane_b32 s35, v40, 3
@@ -859,16 +859,16 @@ define void @test_indirect_call_vgpr_ptr_inreg_arg(ptr %fptr) {
; GCN-NEXT: v_writelane_b32 v40, s35, 3
; GCN-NEXT: v_writelane_b32 v40, s36, 4
; GCN-NEXT: v_writelane_b32 v40, s37, 5
-; GCN-NEXT: v_writelane_b32 v40, s46, 6
-; GCN-NEXT: v_writelane_b32 v40, s47, 7
+; GCN-NEXT: v_writelane_b32 v40, s38, 6
+; GCN-NEXT: v_writelane_b32 v40, s39, 7
; GCN-NEXT: v_writelane_b32 v40, s48, 8
; GCN-NEXT: v_writelane_b32 v40, s49, 9
; GCN-NEXT: v_writelane_b32 v40, s50, 10
; GCN-NEXT: v_writelane_b32 v40, s51, 11
; GCN-NEXT: v_writelane_b32 v40, s52, 12
; GCN-NEXT: v_writelane_b32 v40, s53, 13
-; GCN-NEXT: v_writelane_b32 v40, s62, 14
-; GCN-NEXT: v_writelane_b32 v40, s63, 15
+; GCN-NEXT: v_writelane_b32 v40, s54, 14
+; GCN-NEXT: v_writelane_b32 v40, s55, 15
; GCN-NEXT: s_mov_b64 s[6:7], exec
; GCN-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1
; GCN-NEXT: v_readfirstlane_b32 s8, v0
@@ -882,16 +882,16 @@ define void @test_indirect_call_vgpr_ptr_inreg_arg(ptr %fptr) {
; GCN-NEXT: s_cbranch_execnz .LBB6_1
; GCN-NEXT: ; %bb.2:
; GCN-NEXT: s_mov_b64 exec, s[6:7]
-; GCN-NEXT: v_readlane_b32 s63, v40, 15
-; GCN-NEXT: v_readlane_b32 s62, v40, 14
+; GCN-NEXT: v_readlane_b32 s55, v40, 15
+; GCN-NEXT: v_readlane_b32 s54, v40, 14
; GCN-NEXT: v_readlane_b32 s53, v40, 13
; GCN-NEXT: v_readlane_b32 s52, v40, 12
; GCN-NEXT: v_readlane_b32 s51, v40, 11
; GCN-NEXT: v_readlane_b32 s50, v40, 10
; GCN-NEXT: v_readlane_b32 s49, v40, 9
; GCN-NEXT: v_readlane_b32 s48, v40, 8
-; GCN-NEXT: v_readlane_b32 s47, v40, 7
-; GCN-NEXT: v_readlane_b32 s46, v40, 6
+; GCN-NEXT: v_readlane_b32 s39, v40, 7
+; GCN-NEXT: v_readlane_b32 s38, v40, 6
; GCN-NEXT: v_readlane_b32 s37, v40, 5
; GCN-NEXT: v_readlane_b32 s36, v40, 4
; GCN-NEXT: v_readlane_b32 s35, v40, 3
@@ -921,16 +921,16 @@ define void @test_indirect_call_vgpr_ptr_inreg_arg(ptr %fptr) {
; GISEL-NEXT: v_writelane_b32 v40, s35, 3
; GISEL-NEXT: v_writelane_b32 v40, s36, 4
; GISEL-NEXT: v_writelane_b32 v40, s37, 5
-; GISEL-NEXT: v_writelane_b32 v40, s46, 6
-; GISEL-NEXT: v_writelane_b32 v40, s47, 7
+; GISEL-NEXT: v_writelane_b32 v40, s38, 6
+; GISEL-NEXT: v_writelane_b32 v40, s39, 7
; GISEL-NEXT: v_writelane_b32 v40, s48, 8
; GISEL-NEXT: v_writelane_b32 v40, s49, 9
; GISEL-NEXT: v_writelane_b32 v40, s50, 10
; GISEL-NEXT: v_writelane_b32 v40, s51, 11
; GISEL-NEXT: v_writelane_b32 v40, s52, 12
; GISEL-NEXT: v_writelane_b32 v40, s53, 13
-; GISEL-NEXT: v_writelane_b32 v40, s62, 14
-; GISEL-NEXT: v_writelane_b32 v40, s63, 15
+; GISEL-NEXT: v_writelane_b32 v40, s54, 14
+; GISEL-NEXT: v_writelane_b32 v40, s55, 15
; GISEL-NEXT: s_mov_b64 s[6:7], exec
; GISEL-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1
; GISEL-NEXT: v_readfirstlane_b32 s8, v0
@@ -944,16 +944,16 @@ define void @test_indirect_call_vgpr_ptr_inreg_arg(ptr %fptr) {
; GISEL-NEXT: s_cbranch_execnz .LBB6_1
; GISEL-NEXT: ; %bb.2:
; GISEL-NEXT: s_mov_b64 exec, s[6:7]
-; GISEL-NEXT: v_readlane_b32 s63, v40, 15
-; GISEL-NEXT: v_readlane_b32 s62, v40, 14
+; GISEL-NEXT: v_readlane_b32 s55, v40, 15
+; GISEL-NEXT: v_readlane_b32 s54, v40, 14
; GISEL-NEXT: v_readlane_b32 s53, v40, 13
; GISEL-NEXT: v_readlane_b32 s52, v40, 12
; GISEL-NEXT: v_readlane_b32 s51, v40, 11
; GISEL-NEXT: v_readlane_b32 s50, v40, 10
; GISEL-NEXT: v_readlane_b32 s49, v40, 9
; GISEL-NEXT: v_readlane_b32 s48, v40, 8
-; GISEL-NEXT: v_readlane_b32 s47, v40, 7
-; GISEL-NEXT: v_readlane_b32 s46, v40, 6
+; GISEL-NEXT: v_readlane_b32 s39, v40, 7
+; GISEL-NEXT: v_readlane_b32 s38, v40, 6
; GISEL-NEXT: v_readlane_b32 s37, v40, 5
; GISEL-NEXT: v_readlane_b32 s36, v40, 4
; GISEL-NEXT: v_readlane_b32 s35, v40, 3
@@ -988,16 +988,16 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, ptr %fptr) {
; GCN-NEXT: v_writelane_b32 v41, s35, 3
; GCN-NEXT: v_writelane_b32 v41, s36, 4
; GCN-NEXT: v_writelane_b32 v41, s37, 5
-; GCN-NEXT: v_writelane_b32 v41, s46, 6
-; GCN-NEXT: v_writelane_b32 v41, s47, 7
+; GCN-NEXT: v_writelane_b32 v41, s38, 6
+; GCN-NEXT: v_writelane_b32 v41, s39, 7
; GCN-NEXT: v_writelane_b32 v41, s48, 8
; GCN-NEXT: v_writelane_b32 v41, s49, 9
; GCN-NEXT: v_writelane_b32 v41, s50, 10
; GCN-NEXT: v_writelane_b32 v41, s51, 11
; GCN-NEXT: v_writelane_b32 v41, s52, 12
; GCN-NEXT: v_writelane_b32 v41, s53, 13
-; GCN-NEXT: v_writelane_b32 v41, s62, 14
-; GCN-NEXT: v_writelane_b32 v41, s63, 15
+; GCN-NEXT: v_writelane_b32 v41, s54, 14
+; GCN-NEXT: v_writelane_b32 v41, s55, 15
; GCN-NEXT: v_mov_b32_e32 v40, v0
; GCN-NEXT: s_mov_b64 s[4:5], exec
; GCN-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1
@@ -1013,16 +1013,16 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, ptr %fptr) {
; GCN-NEXT: ; %bb.2:
; GCN-NEXT: s_mov_b64 exec, s[4:5]
; GCN-NEXT: v_mov_b32_e32 v0, v40
-; GCN-NEXT: v_readlane_b32 s63, v41, 15
-; GCN-NEXT: v_readlane_b32 s62, v41, 14
+; GCN-NEXT: v_readlane_b32 s55, v41, 15
+; GCN-NEXT: v_readlane_b32 s54, v41, 14
; GCN-NEXT: v_readlane_b32 s53, v41, 13
; GCN-NEXT: v_readlane_b32 s52, v41, 12
; GCN-NEXT: v_readlane_b32 s51, v41, 11
; GCN-NEXT: v_readlane_b32 s50, v41, 10
; GCN-NEXT: v_readlane_b32 s49, v41, 9
; GCN-NEXT: v_readlane_b32 s48, v41, 8
-; GCN-NEXT: v_readlane_b32 s47, v41, 7
-; GCN-NEXT: v_readlane_b32 s46, v41, 6
+; GCN-NEXT: v_readlane_b32 s39, v41, 7
+; GCN-NEXT: v_readlane_b32 s38, v41, 6
; GCN-NEXT: v_readlane_b32 s37, v41, 5
; GCN-NEXT: v_readlane_b32 s36, v41, 4
; GCN-NEXT: v_readlane_b32 s35, v41, 3
@@ -1054,16 +1054,16 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, ptr %fptr) {
; GISEL-NEXT: v_writelane_b32 v41, s35, 3
; GISEL-NEXT: v_writelane_b32 v41, s36, 4
; GISEL-NEXT: v_writelane_b32 v41, s37, 5
-; GISEL-NEXT: v_writelane_b32 v41, s46, 6
-; GISEL-NEXT: v_writelane_b32 v41, s47, 7
+; GISEL-NEXT: v_writelane_b32 v41, s38, 6
+; GISEL-NEXT: v_writelane_b32 v41, s39, 7
; GISEL-NEXT: v_writelane_b32 v41, s48, 8
; GISEL-NEXT: v_writelane_b32 v41, s49, 9
; GISEL-NEXT: v_writelane_b32 v41, s50, 10
; GISEL-NEXT: v_writelane_b32 v41, s51, 11
; GISEL-NEXT: v_writelane_b32 v41, s52, 12
; GISEL-NEXT: v_writelane_b32 v41, s53, 13
-; GISEL-NEXT: v_writelane_b32 v41, s62, 14
-; GISEL-NEXT: v_writelane_b32 v41, s63, 15
+; GISEL-NEXT: v_writelane_b32 v41, s54, 14
+; GISEL-NEXT: v_writelane_b32 v41, s55, 15
; GISEL-NEXT: v_mov_b32_e32 v40, v0
; GISEL-NEXT: s_mov_b64 s[4:5], exec
; GISEL-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1
@@ -1079,16 +1079,16 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, ptr %fptr) {
; GISEL-NEXT: ; %bb.2:
; GISEL-NEXT: s_mov_b64 exec, s[4:5]
; GISEL-NEXT: v_mov_b32_e32 v0, v40
-; GISEL-NEXT: v_readlane_b32 s63, v41, 15
-; GISEL-NEXT: v_readlane_b32 s62, v41, 14
+; GISEL-NEXT: v_readlane_b32 s55, v41, 15
+; GISEL-NEXT: v_readlane_b32 s54, v41, 14
; GISEL-NEXT: v_readlane_b32 s53, v41, 13
; GISEL-NEXT: v_readlane_b32 s52, v41, 12
; GISEL-NEXT: v_readlane_b32 s51, v41, 11
; GISEL-NEXT: v_readlane_b32 s50, v41, 10
; GISEL-NEXT: v_readlane_b32 s49, v41, 9
; GISEL-NEXT: v_readlane_b32 s48, v41, 8
-; GISEL-NEXT: v_readlane_b32 s47, v41, 7
-; GISEL-NEXT: v_readlane_b32 s46, v41, 6
+; GISEL-NEXT: v_readlane_b32 s39, v41, 7
+; GISEL-NEXT: v_readlane_b32 s38, v41, 6
; GISEL-NEXT: v_readlane_b32 s37, v41, 5
; GISEL-NEXT: v_readlane_b32 s36, v41, 4
; GISEL-NEXT: v_readlane_b32 s35, v41, 3
@@ -1127,16 +1127,16 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, ptr %fptr) {
; GCN-NEXT: v_writelane_b32 v40, s35, 3
; GCN-NEXT: v_writelane_b32 v40, s36, 4
; GCN-NEXT: v_writelane_b32 v40, s37, 5
-; GCN-NEXT: v_writelane_b32 v40, s46, 6
-; GCN-NEXT: v_writelane_b32 v40, s47, 7
+; GCN-NEXT: v_writelane_b32 v40, s38, 6
+; GCN-NEXT: v_writelane_b32 v40, s39, 7
; GCN-NEXT: v_writelane_b32 v40, s48, 8
; GCN-NEXT: v_writelane_b32 v40, s49, 9
; GCN-NEXT: v_writelane_b32 v40, s50, 10
; GCN-NEXT: v_writelane_b32 v40, s51, 11
; GCN-NEXT: v_writelane_b32 v40, s52, 12
; GCN-NEXT: v_writelane_b32 v40, s53, 13
-; GCN-NEXT: v_writelane_b32 v40, s62, 14
-; GCN-NEXT: v_writelane_b32 v40, s63, 15
+; GCN-NEXT: v_writelane_b32 v40, s54, 14
+; GCN-NEXT: v_writelane_b32 v40, s55, 15
; GCN-NEXT: s_mov_b64 s[4:5], exec
; GCN-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
; GCN-NEXT: v_readfirstlane_b32 s8, v1
@@ -1152,16 +1152,16 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, ptr %fptr) {
; GCN-NEXT: ; %bb.2:
; GCN-NEXT: s_mov_b64 exec, s[4:5]
; GCN-NEXT: v_mov_b32_e32 v0, v3
-; GCN-NEXT: v_readlane_b32 s63, v40, 15
-; GCN-NEXT: v_readlane_b32 s62, v40, 14
+; GCN-NEXT: v_readlane_b32 s55, v40, 15
+; GCN-NEXT: v_readlane_b32 s54, v40, 14
; GCN-NEXT: v_readlane_b32 s53, v40, 13
; GCN-NEXT: v_readlane_b32 s52, v40, 12
; GCN-NEXT: v_readlane_b32 s51, v40, 11
; GCN-NEXT: v_readlane_b32 s50, v40, 10
; GCN-NEXT: v_readlane_b32 s49, v40, 9
; GCN-NEXT: v_readlane_b32 s48, v40, 8
-; GCN-NEXT: v_readlane_b32 s47, v40, 7
-; GCN-NEXT: v_readlane_b32 s46, v40, 6
+; GCN-NEXT: v_readlane_b32 s39, v40, 7
+; GCN-NEXT: v_readlane_b32 s38, v40, 6
; GCN-NEXT: v_readlane_b32 s37, v40, 5
; GCN-NEXT: v_readlane_b32 s36, v40, 4
; GCN-NEXT: v_readlane_b32 s35, v40, 3
@@ -1191,16 +1191,16 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, ptr %fptr) {
; GISEL-NEXT: v_writelane_b32 v40, s35, 3
; GISEL-NEXT: v_writelane_b32 v40, s36, 4
; GISEL-NEXT: v_writelane_b32 v40, s37, 5
-; GISEL-NEXT: v_writelane_b32 v40, s46, 6
-; GISEL-NEXT: v_writelane_b32 v40, s47, 7
+; GISEL-NEXT: v_writelane_b32 v40, s38, 6
+; GISEL-NEXT: v_writelane_b32 v40, s39, 7
; GISEL-NEXT: v_writelane_b32 v40, s48, 8
; GISEL-NEXT: v_writelane_b32 v40, s49, 9
; GISEL-NEXT: v_writelane_b32 v40, s50, 10
; GISEL-NEXT: v_writelane_b32 v40, s51, 11
; GISEL-NEXT: v_writelane_b32 v40, s52, 12
; GISEL-NEXT: v_writelane_b32 v40, s53, 13
-; GISEL-NEXT: v_writelane_b32 v40, s62, 14
-; GISEL-NEXT: v_writelane_b32 v40, s63, 15
+; GISEL-NEXT: v_writelane_b32 v40, s54, 14
+; GISEL-NEXT: v_writelane_b32 v40, s55, 15
; GISEL-NEXT: s_mov_b64 s[4:5], exec
; GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
; GISEL-NEXT: v_readfirstlane_b32 s8, v1
@@ -1216,16 +1216,16 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, ptr %fptr) {
; GISEL-NEXT: ; %bb.2:
; GISEL-NEXT: s_mov_b64 exec, s[4:5]
; GISEL-NEXT: v_mov_b32_e32 v0, v2
-; GISEL-NEXT: v_readlane_b32 s63, v40, 15
-; GISEL-NEXT: v_readlane_b32 s62, v40, 14
+; GISEL-NEXT: v_readlane_b32 s55, v40, 15
+; GISEL-NEXT: v_readlane_b32 s54, v40, 14
; GISEL-NEXT: v_readlane_b32 s53, v40, 13
; GISEL-NEXT: v_readlane_b32 s52, v40, 12
; GISEL-NEXT: v_readlane_b32 s51, v40, 11
; GISEL-NEXT: v_readlane_b32 s50, v40, 10
; GISEL-NEXT: v_readlane_b32 s49, v40, 9
; GISEL-NEXT: v_readlane_b32 s48, v40, 8
-; GISEL-NEXT: v_readlane_b32 s47, v40, 7
-; GISEL-NEXT: v_readlane_b32 s46, v40, 6
+; GISEL-NEXT: v_readlane_b32 s39, v40, 7
+; GISEL-NEXT: v_readlane_b32 s38, v40, 6
; GISEL-NEXT: v_readlane_b32 s37, v40, 5
; GISEL-NEXT: v_readlane_b32 s36, v40, 4
; GISEL-NEXT: v_readlane_b32 s35, v40, 3
@@ -1260,16 +1260,16 @@ define void @test_indirect_tail_call_vgpr_ptr(ptr %fptr) {
; GCN-NEXT: v_writelane_b32 v40, s35, 3
; GCN-NEXT: v_writelane_b32 v40, s36, 4
; GCN-NEXT: v_writelane_b32 v40, s37, 5
-; GCN-NEXT: v_writelane_b32 v40, s46, 6
-; GCN-NEXT: v_writelane_b32 v40, s47, 7
+; GCN-NEXT: v_writelane_b32 v40, s38, 6
+; GCN-NEXT: v_writelane_b32 v40, s39, 7
; GCN-NEXT: v_writelane_b32 v40, s48, 8
; GCN-NEXT: v_writelane_b32 v40, s49, 9
; GCN-NEXT: v_writelane_b32 v40, s50, 10
; GCN-NEXT: v_writelane_b32 v40, s51, 11
; GCN-NEXT: v_writelane_b32 v40, s52, 12
; GCN-NEXT: v_writelane_b32 v40, s53, 13
-; GCN-NEXT: v_writelane_b32 v40, s62, 14
-; GCN-NEXT: v_writelane_b32 v40, s63, 15
+; GCN-NEXT: v_writelane_b32 v40, s54, 14
+; GCN-NEXT: v_writelane_b32 v40, s55, 15
; GCN-NEXT: s_mov_b64 s[4:5], exec
; GCN-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1
; GCN-NEXT: v_readfirstlane_b32 s6, v0
@@ -1282,16 +1282,16 @@ define void @test_indirect_tail_call_vgpr_ptr(ptr %fptr) {
; GCN-NEXT: s_cbranch_execnz .LBB9_1
; GCN-NEXT: ; %bb.2:
; GCN-NEXT: s_mov_b64 exec, s[4:5]
-; GCN-NEXT: v_readlane_b32 s63, v40, 15
-; GCN-NEXT: v_readlane_b32 s62, v40, 14
+; GCN-NEXT: v_readlane_b32 s55, v40, 15
+; GCN-NEXT: v_readlane_b32 s54, v40, 14
; GCN-NEXT: v_readlane_b32 s53, v40, 13
; GCN-NEXT: v_readlane_b32 s52, v40, 12
; GCN-NEXT: v_readlane_b32 s51, v40, 11
; GCN-NEXT: v_readlane_b32 s50, v40, 10
; GCN-NEXT: v_readlane_b32 s49, v40, 9
; GCN-NEXT: v_readlane_b32 s48, v40, 8
-; GCN-NEXT: v_readlane_b32 s47, v40, 7
-; GCN-NEXT: v_readlane_b32 s46, v40, 6
+; GCN-NEXT: v_readlane_b32 s39, v40, 7
+; GCN-NEXT: v_readlane_b32 s38, v40, 6
; GCN-NEXT: v_readlane_b32 s37, v40, 5
; GCN-NEXT: v_readlane_b32 s36, v40, 4
; GCN-NEXT: v_readlane_b32 s35, v40, 3
@@ -1321,16 +1321,16 @@ define void @test_indirect_tail_call_vgpr_ptr(ptr %fptr) {
; GISEL-NEXT: v_writelane_b32 v40, s35, 3
; GISEL-NEXT: v_writelane_b32 v40, s36, 4
; GISEL-NEXT: v_writelane_b32 v40, s37, 5
-; GISEL-NEXT: v_writelane_b32 v40, s46, 6
-; GISEL-NEXT: v_writelane_b32 v40, s47, 7
+; GISEL-NEXT: v_writelane_b32 v40, s38, 6
+; GISEL-NEXT: v_writelane_b32 v40, s39, 7
; GISEL-NEXT: v_writelane_b32 v40, s48, 8
; GISEL-NEXT: v_writelane_b32 v40, s49, 9
; GISEL-NEXT: v_writelane_b32 v40, s50, 10
; GISEL-NEXT: v_writelane_b32 v40, s51, 11
; GISEL-NEXT: v_writelane_b32 v40, s52, 12
; GISEL-NEXT: v_writelane_b32 v40, s53, 13
-; GISEL-NEXT: v_writelane_b32 v40, s62, 14
-; GISEL-NEXT: v_writelane_b32 v40, s63, 15
+; GISEL-NEXT: v_writelane_b32 v40, s54, 14
+; GISEL-NEXT: v_writelane_b32 v40, s55, 15
; GISEL-NEXT: s_mov_b64 s[4:5], exec
; GISEL-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1
; GISEL-NEXT: v_readfirstlane_b32 s6, v0
@@ -1343,16 +1343,16 @@ define void @test_indirect_tail_call_vgpr_ptr(ptr %fptr) {
; GISEL-NEXT: s_cbranch_execnz .LBB9_1
; GISEL-NEXT: ; %bb.2:
; GISEL-NEXT: s_mov_b64 exec, s[4:5]
-; GISEL-NEXT: v_readlane_b32 s63, v40, 15
-; GISEL-NEXT: v_readlane_b32 s62, v40, 14
+; GISEL-NEXT: v_readlane_b32 s55, v40, 15
+; GISEL-NEXT: v_readlane_b32 s54, v40, 14
; GISEL-NEXT: v_readlane_b32 s53, v40, 13
; GISEL-NEXT: v_readlane_b32 s52, v40, 12
; GISEL-NEXT: v_readlane_b32 s51, v40, 11
; GISEL-NEXT: v_readlane_b32 s50, v40, 10
; GISEL-NEXT: v_readlane_b32 s49, v40, 9
; GISEL-NEXT: v_readlane_b32 s48, v40, 8
-; GISEL-NEXT: v_readlane_b32 s47, v40, 7
-; GISEL-NEXT: v_readlane_b32 s46, v40, 6
+; GISEL-NEXT: v_readlane_b32 s39, v40, 7
+; GISEL-NEXT: v_readlane_b32 s38, v40, 6
; GISEL-NEXT: v_readlane_b32 s37, v40, 5
; GISEL-NEXT: v_readlane_b32 s36, v40, 4
; GISEL-NEXT: v_readlane_b32 s35, v40, 3
diff --git a/llvm/test/CodeGen/AMDGPU/issue48473.mir b/llvm/test/CodeGen/AMDGPU/issue48473.mir
index 55de5dd133700..b447272702641 100644
--- a/llvm/test/CodeGen/AMDGPU/issue48473.mir
+++ b/llvm/test/CodeGen/AMDGPU/issue48473.mir
@@ -43,7 +43,7 @@
# %25 to $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67
# CHECK-LABEL: name: issue48473
-# CHECK: S_NOP 0, implicit killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed renamable $sgpr12_sgpr13_sgpr14_sgpr15, implicit killed renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, implicit killed renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, implicit killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43, implicit killed renamable $sgpr88_sgpr89_sgpr90_sgpr91, implicit killed renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, implicit killed renamable $sgpr84_sgpr85_sgpr86_sgpr87, implicit killed renamable $sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83, implicit killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59, implicit killed renamable $sgpr92_sgpr93_sgpr94_sgpr95, implicit killed renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit killed renamable $sgpr96_sgpr97_sgpr98_sgpr99, implicit killed renamable $sgpr8_sgpr9_sgpr10_sgpr11, implicit killed renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67
+# CHECK: S_NOP 0, implicit killed renamable $sgpr20_sgpr21_sgpr22_sgpr23, implicit killed renamable $sgpr88_sgpr89_sgpr90_sgpr91, implicit killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit killed renamable $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, implicit killed renamable $sgpr40_sgpr41_sgpr42_sgpr43, implicit killed renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, implicit killed renamable $sgpr36_sgpr37_sgpr38_sgpr39, implicit killed renamable $sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79, implicit killed renamable $sgpr80_sgpr81_sgpr82_sgpr83, implicit killed renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71, implicit killed renamable $sgpr16_sgpr17_sgpr18_sgpr19, implicit killed renamable $sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99, implicit killed renamable $sgpr84_sgpr85_sgpr86_sgpr87, implicit killed renamable $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55, implicit renamable $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55, implicit killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed renamable $sgpr44_sgpr45_sgpr46_sgpr47, implicit killed renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
---
name: issue48473
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll
index 4fd9fc95b8532..dbe95a8091932 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll
@@ -113,20 +113,20 @@ exit:
define amdgpu_ps void @test_call(ptr addrspace(1) inreg %ptr) {
; GFX9-SDAG-LABEL: test_call:
; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-SDAG-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-SDAG-NEXT: s_mov_b32 s50, -1
-; GFX9-SDAG-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-SDAG-NEXT: s_add_u32 s48, s48, s2
-; GFX9-SDAG-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-SDAG-NEXT: s_mov_b32 s38, -1
+; GFX9-SDAG-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-SDAG-NEXT: s_add_u32 s36, s36, s2
+; GFX9-SDAG-NEXT: s_addc_u32 s37, s37, 0
; GFX9-SDAG-NEXT: s_getpc_b64 s[0:1]
; GFX9-SDAG-NEXT: s_add_u32 s0, s0, foo at gotpcrel32@lo+4
; GFX9-SDAG-NEXT: s_addc_u32 s1, s1, foo at gotpcrel32@hi+12
; GFX9-SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
; GFX9-SDAG-NEXT: s_mov_b32 s6, src_pops_exiting_wave_id
-; GFX9-SDAG-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-SDAG-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-SDAG-NEXT: s_mov_b64 s[8:9], 36
-; GFX9-SDAG-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-SDAG-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s6
; GFX9-SDAG-NEXT: s_mov_b32 s32, 0
; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
@@ -135,20 +135,20 @@ define amdgpu_ps void @test_call(ptr addrspace(1) inreg %ptr) {
;
; GFX9-GISEL-LABEL: test_call:
; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-GISEL-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-GISEL-NEXT: s_mov_b32 s50, -1
-; GFX9-GISEL-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-GISEL-NEXT: s_add_u32 s48, s48, s2
-; GFX9-GISEL-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-GISEL-NEXT: s_mov_b32 s38, -1
+; GFX9-GISEL-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-GISEL-NEXT: s_add_u32 s36, s36, s2
+; GFX9-GISEL-NEXT: s_addc_u32 s37, s37, 0
; GFX9-GISEL-NEXT: s_getpc_b64 s[0:1]
; GFX9-GISEL-NEXT: s_add_u32 s0, s0, foo at gotpcrel32@lo+4
; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, foo at gotpcrel32@hi+12
; GFX9-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
; GFX9-GISEL-NEXT: s_mov_b32 s2, src_pops_exiting_wave_id
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX9-GISEL-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-GISEL-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-GISEL-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-GISEL-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-GISEL-NEXT: s_mov_b64 s[8:9], 36
; GFX9-GISEL-NEXT: s_mov_b32 s32, 0
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -157,12 +157,12 @@ define amdgpu_ps void @test_call(ptr addrspace(1) inreg %ptr) {
;
; GFX10-LABEL: test_call:
; GFX10: ; %bb.0:
-; GFX10-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX10-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX10-NEXT: s_mov_b32 s50, -1
-; GFX10-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX10-NEXT: s_add_u32 s48, s48, s2
-; GFX10-NEXT: s_addc_u32 s49, s49, 0
+; GFX10-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX10-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX10-NEXT: s_mov_b32 s38, -1
+; GFX10-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX10-NEXT: s_add_u32 s36, s36, s2
+; GFX10-NEXT: s_addc_u32 s37, s37, 0
; GFX10-NEXT: s_getpc_b64 s[0:1]
; GFX10-NEXT: s_add_u32 s0, s0, foo at gotpcrel32@lo+4
; GFX10-NEXT: s_addc_u32 s1, s1, foo at gotpcrel32@hi+12
@@ -171,8 +171,8 @@ define amdgpu_ps void @test_call(ptr addrspace(1) inreg %ptr) {
; GFX10-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
; GFX10-NEXT: s_mov_b32 s32, 0
; GFX10-NEXT: v_mov_b32_e32 v0, s0
-; GFX10-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX10-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX10-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX10-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX10-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll
index c6a412a9f88b0..8b1ba393c8de8 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll
@@ -1738,7 +1738,7 @@ define <16 x float> @v_maximum_v16f32(<16 x float> %src0, <16 x float> %src1) {
; GFX7-NEXT: v_max_f32_e32 v19, v0, v16
; GFX7-NEXT: v_cmp_o_f32_e64 s[28:29], v0, v16
; GFX7-NEXT: v_max_f32_e32 v16, v14, v30
-; GFX7-NEXT: v_cmp_o_f32_e64 s[38:39], v14, v30
+; GFX7-NEXT: v_cmp_o_f32_e64 s[40:41], v14, v30
; GFX7-NEXT: v_cmp_o_f32_e64 s[8:9], v4, v20
; GFX7-NEXT: v_max_f32_e32 v4, v4, v20
; GFX7-NEXT: v_cmp_o_f32_e64 s[10:11], v5, v21
@@ -1760,7 +1760,7 @@ define <16 x float> @v_maximum_v16f32(<16 x float> %src0, <16 x float> %src1) {
; GFX7-NEXT: v_cmp_o_f32_e64 s[26:27], v13, v29
; GFX7-NEXT: v_max_f32_e32 v13, v13, v29
; GFX7-NEXT: v_cndmask_b32_e32 v1, v18, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v14, v18, v16, s[38:39]
+; GFX7-NEXT: v_cndmask_b32_e64 v14, v18, v16, s[40:41]
; GFX7-NEXT: v_cndmask_b32_e64 v0, v18, v19, s[28:29]
; GFX7-NEXT: v_cndmask_b32_e64 v2, v18, v2, s[4:5]
; GFX7-NEXT: v_cndmask_b32_e64 v3, v18, v3, s[6:7]
@@ -1794,7 +1794,7 @@ define <16 x float> @v_maximum_v16f32(<16 x float> %src0, <16 x float> %src1) {
; GFX8-NEXT: v_max_f32_e32 v19, v0, v16
; GFX8-NEXT: v_cmp_o_f32_e64 s[28:29], v0, v16
; GFX8-NEXT: v_max_f32_e32 v16, v14, v30
-; GFX8-NEXT: v_cmp_o_f32_e64 s[38:39], v14, v30
+; GFX8-NEXT: v_cmp_o_f32_e64 s[40:41], v14, v30
; GFX8-NEXT: v_cmp_o_f32_e64 s[8:9], v4, v20
; GFX8-NEXT: v_max_f32_e32 v4, v4, v20
; GFX8-NEXT: v_cmp_o_f32_e64 s[10:11], v5, v21
@@ -1816,7 +1816,7 @@ define <16 x float> @v_maximum_v16f32(<16 x float> %src0, <16 x float> %src1) {
; GFX8-NEXT: v_cmp_o_f32_e64 s[26:27], v13, v29
; GFX8-NEXT: v_max_f32_e32 v13, v13, v29
; GFX8-NEXT: v_cndmask_b32_e32 v1, v18, v1, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v14, v18, v16, s[38:39]
+; GFX8-NEXT: v_cndmask_b32_e64 v14, v18, v16, s[40:41]
; GFX8-NEXT: v_cndmask_b32_e64 v0, v18, v19, s[28:29]
; GFX8-NEXT: v_cndmask_b32_e64 v2, v18, v2, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e64 v3, v18, v3, s[6:7]
@@ -1850,7 +1850,7 @@ define <16 x float> @v_maximum_v16f32(<16 x float> %src0, <16 x float> %src1) {
; GFX900-NEXT: v_max_f32_e32 v19, v0, v16
; GFX900-NEXT: v_cmp_o_f32_e64 s[28:29], v0, v16
; GFX900-NEXT: v_max_f32_e32 v16, v14, v30
-; GFX900-NEXT: v_cmp_o_f32_e64 s[38:39], v14, v30
+; GFX900-NEXT: v_cmp_o_f32_e64 s[40:41], v14, v30
; GFX900-NEXT: v_cmp_o_f32_e64 s[8:9], v4, v20
; GFX900-NEXT: v_max_f32_e32 v4, v4, v20
; GFX900-NEXT: v_cmp_o_f32_e64 s[10:11], v5, v21
@@ -1872,7 +1872,7 @@ define <16 x float> @v_maximum_v16f32(<16 x float> %src0, <16 x float> %src1) {
; GFX900-NEXT: v_cmp_o_f32_e64 s[26:27], v13, v29
; GFX900-NEXT: v_max_f32_e32 v13, v13, v29
; GFX900-NEXT: v_cndmask_b32_e32 v1, v18, v1, vcc
-; GFX900-NEXT: v_cndmask_b32_e64 v14, v18, v16, s[38:39]
+; GFX900-NEXT: v_cndmask_b32_e64 v14, v18, v16, s[40:41]
; GFX900-NEXT: v_cndmask_b32_e64 v0, v18, v19, s[28:29]
; GFX900-NEXT: v_cndmask_b32_e64 v2, v18, v2, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e64 v3, v18, v3, s[6:7]
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll
index f7ce72efa4373..3344c73f9eb6f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll
@@ -2095,14 +2095,14 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1)
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116
; GFX7-NEXT: v_cndmask_b32_e64 v26, v26, 0, s[28:29]
; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cmp_u_f64_e64 s[38:39], v[28:29], v[31:32]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[40:41], v[28:29], v[31:32]
; GFX7-NEXT: v_max_f64 v[28:29], v[28:29], v[31:32]
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32
; GFX7-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:128
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124
-; GFX7-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[38:39]
+; GFX7-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[40:41]
; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cmp_u_f64_e64 s[40:41], v[30:31], v[32:33]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[42:43], v[30:31], v[32:33]
; GFX7-NEXT: v_max_f64 v[30:31], v[30:31], v[32:33]
; GFX7-NEXT: v_mov_b32_e32 v32, 0x7ff80000
; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v32, vcc
@@ -2119,9 +2119,9 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1)
; GFX7-NEXT: v_cndmask_b32_e64 v23, v23, v32, s[24:25]
; GFX7-NEXT: v_cndmask_b32_e64 v25, v25, v32, s[26:27]
; GFX7-NEXT: v_cndmask_b32_e64 v27, v27, v32, s[28:29]
-; GFX7-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[38:39]
-; GFX7-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[40:41]
-; GFX7-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[40:41]
+; GFX7-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[40:41]
+; GFX7-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[42:43]
+; GFX7-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[42:43]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_maximum_v16f64:
@@ -2214,14 +2214,14 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1)
; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116
; GFX8-NEXT: v_cndmask_b32_e64 v26, v26, 0, s[28:29]
; GFX8-NEXT: s_waitcnt vmcnt(0)
-; GFX8-NEXT: v_cmp_u_f64_e64 s[38:39], v[28:29], v[31:32]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[40:41], v[28:29], v[31:32]
; GFX8-NEXT: v_max_f64 v[28:29], v[28:29], v[31:32]
; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32
; GFX8-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:128
; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124
-; GFX8-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[38:39]
+; GFX8-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[40:41]
; GFX8-NEXT: s_waitcnt vmcnt(0)
-; GFX8-NEXT: v_cmp_u_f64_e64 s[40:41], v[30:31], v[32:33]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[42:43], v[30:31], v[32:33]
; GFX8-NEXT: v_max_f64 v[30:31], v[30:31], v[32:33]
; GFX8-NEXT: v_mov_b32_e32 v32, 0x7ff80000
; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v32, vcc
@@ -2238,9 +2238,9 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1)
; GFX8-NEXT: v_cndmask_b32_e64 v23, v23, v32, s[24:25]
; GFX8-NEXT: v_cndmask_b32_e64 v25, v25, v32, s[26:27]
; GFX8-NEXT: v_cndmask_b32_e64 v27, v27, v32, s[28:29]
-; GFX8-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[38:39]
-; GFX8-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[40:41]
-; GFX8-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[40:41]
+; GFX8-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[40:41]
+; GFX8-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[42:43]
+; GFX8-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[42:43]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-LABEL: v_maximum_v16f64:
@@ -2333,14 +2333,14 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1)
; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116
; GFX900-NEXT: v_cndmask_b32_e64 v26, v26, 0, s[28:29]
; GFX900-NEXT: s_waitcnt vmcnt(0)
-; GFX900-NEXT: v_cmp_u_f64_e64 s[38:39], v[28:29], v[31:32]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[40:41], v[28:29], v[31:32]
; GFX900-NEXT: v_max_f64 v[28:29], v[28:29], v[31:32]
; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32
; GFX900-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:128
; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124
-; GFX900-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[38:39]
+; GFX900-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[40:41]
; GFX900-NEXT: s_waitcnt vmcnt(0)
-; GFX900-NEXT: v_cmp_u_f64_e64 s[40:41], v[30:31], v[32:33]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[42:43], v[30:31], v[32:33]
; GFX900-NEXT: v_max_f64 v[30:31], v[30:31], v[32:33]
; GFX900-NEXT: v_mov_b32_e32 v32, 0x7ff80000
; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v32, vcc
@@ -2357,9 +2357,9 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1)
; GFX900-NEXT: v_cndmask_b32_e64 v23, v23, v32, s[24:25]
; GFX900-NEXT: v_cndmask_b32_e64 v25, v25, v32, s[26:27]
; GFX900-NEXT: v_cndmask_b32_e64 v27, v27, v32, s[28:29]
-; GFX900-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[38:39]
-; GFX900-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[40:41]
-; GFX900-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[40:41]
+; GFX900-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[40:41]
+; GFX900-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[42:43]
+; GFX900-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[42:43]
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_maximum_v16f64:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll
index 7fe4f9be2727d..7b2998cbd242f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll
@@ -1738,7 +1738,7 @@ define <16 x float> @v_minimum_v16f32(<16 x float> %src0, <16 x float> %src1) {
; GFX7-NEXT: v_min_f32_e32 v19, v0, v16
; GFX7-NEXT: v_cmp_o_f32_e64 s[28:29], v0, v16
; GFX7-NEXT: v_min_f32_e32 v16, v14, v30
-; GFX7-NEXT: v_cmp_o_f32_e64 s[38:39], v14, v30
+; GFX7-NEXT: v_cmp_o_f32_e64 s[40:41], v14, v30
; GFX7-NEXT: v_cmp_o_f32_e64 s[8:9], v4, v20
; GFX7-NEXT: v_min_f32_e32 v4, v4, v20
; GFX7-NEXT: v_cmp_o_f32_e64 s[10:11], v5, v21
@@ -1760,7 +1760,7 @@ define <16 x float> @v_minimum_v16f32(<16 x float> %src0, <16 x float> %src1) {
; GFX7-NEXT: v_cmp_o_f32_e64 s[26:27], v13, v29
; GFX7-NEXT: v_min_f32_e32 v13, v13, v29
; GFX7-NEXT: v_cndmask_b32_e32 v1, v18, v1, vcc
-; GFX7-NEXT: v_cndmask_b32_e64 v14, v18, v16, s[38:39]
+; GFX7-NEXT: v_cndmask_b32_e64 v14, v18, v16, s[40:41]
; GFX7-NEXT: v_cndmask_b32_e64 v0, v18, v19, s[28:29]
; GFX7-NEXT: v_cndmask_b32_e64 v2, v18, v2, s[4:5]
; GFX7-NEXT: v_cndmask_b32_e64 v3, v18, v3, s[6:7]
@@ -1794,7 +1794,7 @@ define <16 x float> @v_minimum_v16f32(<16 x float> %src0, <16 x float> %src1) {
; GFX8-NEXT: v_min_f32_e32 v19, v0, v16
; GFX8-NEXT: v_cmp_o_f32_e64 s[28:29], v0, v16
; GFX8-NEXT: v_min_f32_e32 v16, v14, v30
-; GFX8-NEXT: v_cmp_o_f32_e64 s[38:39], v14, v30
+; GFX8-NEXT: v_cmp_o_f32_e64 s[40:41], v14, v30
; GFX8-NEXT: v_cmp_o_f32_e64 s[8:9], v4, v20
; GFX8-NEXT: v_min_f32_e32 v4, v4, v20
; GFX8-NEXT: v_cmp_o_f32_e64 s[10:11], v5, v21
@@ -1816,7 +1816,7 @@ define <16 x float> @v_minimum_v16f32(<16 x float> %src0, <16 x float> %src1) {
; GFX8-NEXT: v_cmp_o_f32_e64 s[26:27], v13, v29
; GFX8-NEXT: v_min_f32_e32 v13, v13, v29
; GFX8-NEXT: v_cndmask_b32_e32 v1, v18, v1, vcc
-; GFX8-NEXT: v_cndmask_b32_e64 v14, v18, v16, s[38:39]
+; GFX8-NEXT: v_cndmask_b32_e64 v14, v18, v16, s[40:41]
; GFX8-NEXT: v_cndmask_b32_e64 v0, v18, v19, s[28:29]
; GFX8-NEXT: v_cndmask_b32_e64 v2, v18, v2, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e64 v3, v18, v3, s[6:7]
@@ -1850,7 +1850,7 @@ define <16 x float> @v_minimum_v16f32(<16 x float> %src0, <16 x float> %src1) {
; GFX900-NEXT: v_min_f32_e32 v19, v0, v16
; GFX900-NEXT: v_cmp_o_f32_e64 s[28:29], v0, v16
; GFX900-NEXT: v_min_f32_e32 v16, v14, v30
-; GFX900-NEXT: v_cmp_o_f32_e64 s[38:39], v14, v30
+; GFX900-NEXT: v_cmp_o_f32_e64 s[40:41], v14, v30
; GFX900-NEXT: v_cmp_o_f32_e64 s[8:9], v4, v20
; GFX900-NEXT: v_min_f32_e32 v4, v4, v20
; GFX900-NEXT: v_cmp_o_f32_e64 s[10:11], v5, v21
@@ -1872,7 +1872,7 @@ define <16 x float> @v_minimum_v16f32(<16 x float> %src0, <16 x float> %src1) {
; GFX900-NEXT: v_cmp_o_f32_e64 s[26:27], v13, v29
; GFX900-NEXT: v_min_f32_e32 v13, v13, v29
; GFX900-NEXT: v_cndmask_b32_e32 v1, v18, v1, vcc
-; GFX900-NEXT: v_cndmask_b32_e64 v14, v18, v16, s[38:39]
+; GFX900-NEXT: v_cndmask_b32_e64 v14, v18, v16, s[40:41]
; GFX900-NEXT: v_cndmask_b32_e64 v0, v18, v19, s[28:29]
; GFX900-NEXT: v_cndmask_b32_e64 v2, v18, v2, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e64 v3, v18, v3, s[6:7]
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll
index ab20fd88091d9..1d1673315f6ff 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll
@@ -2095,14 +2095,14 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1)
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116
; GFX7-NEXT: v_cndmask_b32_e64 v26, v26, 0, s[28:29]
; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cmp_u_f64_e64 s[38:39], v[28:29], v[31:32]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[40:41], v[28:29], v[31:32]
; GFX7-NEXT: v_min_f64 v[28:29], v[28:29], v[31:32]
; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32
; GFX7-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:128
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124
-; GFX7-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[38:39]
+; GFX7-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[40:41]
; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_cmp_u_f64_e64 s[40:41], v[30:31], v[32:33]
+; GFX7-NEXT: v_cmp_u_f64_e64 s[42:43], v[30:31], v[32:33]
; GFX7-NEXT: v_min_f64 v[30:31], v[30:31], v[32:33]
; GFX7-NEXT: v_mov_b32_e32 v32, 0x7ff80000
; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v32, vcc
@@ -2119,9 +2119,9 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1)
; GFX7-NEXT: v_cndmask_b32_e64 v23, v23, v32, s[24:25]
; GFX7-NEXT: v_cndmask_b32_e64 v25, v25, v32, s[26:27]
; GFX7-NEXT: v_cndmask_b32_e64 v27, v27, v32, s[28:29]
-; GFX7-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[38:39]
-; GFX7-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[40:41]
-; GFX7-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[40:41]
+; GFX7-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[40:41]
+; GFX7-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[42:43]
+; GFX7-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[42:43]
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimum_v16f64:
@@ -2214,14 +2214,14 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1)
; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116
; GFX8-NEXT: v_cndmask_b32_e64 v26, v26, 0, s[28:29]
; GFX8-NEXT: s_waitcnt vmcnt(0)
-; GFX8-NEXT: v_cmp_u_f64_e64 s[38:39], v[28:29], v[31:32]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[40:41], v[28:29], v[31:32]
; GFX8-NEXT: v_min_f64 v[28:29], v[28:29], v[31:32]
; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32
; GFX8-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:128
; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124
-; GFX8-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[38:39]
+; GFX8-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[40:41]
; GFX8-NEXT: s_waitcnt vmcnt(0)
-; GFX8-NEXT: v_cmp_u_f64_e64 s[40:41], v[30:31], v[32:33]
+; GFX8-NEXT: v_cmp_u_f64_e64 s[42:43], v[30:31], v[32:33]
; GFX8-NEXT: v_min_f64 v[30:31], v[30:31], v[32:33]
; GFX8-NEXT: v_mov_b32_e32 v32, 0x7ff80000
; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v32, vcc
@@ -2238,9 +2238,9 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1)
; GFX8-NEXT: v_cndmask_b32_e64 v23, v23, v32, s[24:25]
; GFX8-NEXT: v_cndmask_b32_e64 v25, v25, v32, s[26:27]
; GFX8-NEXT: v_cndmask_b32_e64 v27, v27, v32, s[28:29]
-; GFX8-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[38:39]
-; GFX8-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[40:41]
-; GFX8-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[40:41]
+; GFX8-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[40:41]
+; GFX8-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[42:43]
+; GFX8-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[42:43]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-LABEL: v_minimum_v16f64:
@@ -2333,14 +2333,14 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1)
; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116
; GFX900-NEXT: v_cndmask_b32_e64 v26, v26, 0, s[28:29]
; GFX900-NEXT: s_waitcnt vmcnt(0)
-; GFX900-NEXT: v_cmp_u_f64_e64 s[38:39], v[28:29], v[31:32]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[40:41], v[28:29], v[31:32]
; GFX900-NEXT: v_min_f64 v[28:29], v[28:29], v[31:32]
; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32
; GFX900-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:128
; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124
-; GFX900-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[38:39]
+; GFX900-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[40:41]
; GFX900-NEXT: s_waitcnt vmcnt(0)
-; GFX900-NEXT: v_cmp_u_f64_e64 s[40:41], v[30:31], v[32:33]
+; GFX900-NEXT: v_cmp_u_f64_e64 s[42:43], v[30:31], v[32:33]
; GFX900-NEXT: v_min_f64 v[30:31], v[30:31], v[32:33]
; GFX900-NEXT: v_mov_b32_e32 v32, 0x7ff80000
; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v32, vcc
@@ -2357,9 +2357,9 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1)
; GFX900-NEXT: v_cndmask_b32_e64 v23, v23, v32, s[24:25]
; GFX900-NEXT: v_cndmask_b32_e64 v25, v25, v32, s[26:27]
; GFX900-NEXT: v_cndmask_b32_e64 v27, v27, v32, s[28:29]
-; GFX900-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[38:39]
-; GFX900-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[40:41]
-; GFX900-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[40:41]
+; GFX900-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[40:41]
+; GFX900-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[42:43]
+; GFX900-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[42:43]
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_minimum_v16f64:
diff --git a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll
index 55e1c3842aa6f..989ef6f981d9d 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll
@@ -68,12 +68,12 @@ define amdgpu_kernel void @workgroup_ids_kernel() {
define amdgpu_kernel void @caller() {
; GFX9-SDAG-LABEL: caller:
; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-SDAG-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-SDAG-NEXT: s_mov_b32 s50, -1
-; GFX9-SDAG-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-SDAG-NEXT: s_add_u32 s48, s48, s11
-; GFX9-SDAG-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-SDAG-NEXT: s_mov_b32 s38, -1
+; GFX9-SDAG-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-SDAG-NEXT: s_add_u32 s36, s36, s11
+; GFX9-SDAG-NEXT: s_addc_u32 s37, s37, 0
; GFX9-SDAG-NEXT: s_mov_b32 s12, s8
; GFX9-SDAG-NEXT: s_add_u32 s8, s4, 36
; GFX9-SDAG-NEXT: s_addc_u32 s9, s5, 0
@@ -86,9 +86,9 @@ define amdgpu_kernel void @caller() {
; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-SDAG-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9-SDAG-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9-SDAG-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-SDAG-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-SDAG-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9-SDAG-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-SDAG-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s12
; GFX9-SDAG-NEXT: s_mov_b32 s32, 0
; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
@@ -97,12 +97,12 @@ define amdgpu_kernel void @caller() {
;
; GFX9-GISEL-LABEL: caller:
; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-GISEL-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-GISEL-NEXT: s_mov_b32 s50, -1
-; GFX9-GISEL-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-GISEL-NEXT: s_add_u32 s48, s48, s11
-; GFX9-GISEL-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-GISEL-NEXT: s_mov_b32 s38, -1
+; GFX9-GISEL-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-GISEL-NEXT: s_add_u32 s36, s36, s11
+; GFX9-GISEL-NEXT: s_addc_u32 s37, s37, 0
; GFX9-GISEL-NEXT: s_mov_b32 s14, s8
; GFX9-GISEL-NEXT: s_add_u32 s8, s4, 36
; GFX9-GISEL-NEXT: s_addc_u32 s9, s5, 0
@@ -115,10 +115,10 @@ define amdgpu_kernel void @caller() {
; GFX9-GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v2, 20, v2
-; GFX9-GISEL-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-GISEL-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9-GISEL-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s14
-; GFX9-GISEL-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-GISEL-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-GISEL-NEXT: s_mov_b64 s[4:5], s[12:13]
; GFX9-GISEL-NEXT: s_mov_b32 s12, s14
; GFX9-GISEL-NEXT: s_mov_b32 s32, 0
@@ -128,12 +128,12 @@ define amdgpu_kernel void @caller() {
;
; GFX9ARCH-SDAG-LABEL: caller:
; GFX9ARCH-SDAG: ; %bb.0:
-; GFX9ARCH-SDAG-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9ARCH-SDAG-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9ARCH-SDAG-NEXT: s_mov_b32 s50, -1
-; GFX9ARCH-SDAG-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9ARCH-SDAG-NEXT: s_add_u32 s48, s48, s8
-; GFX9ARCH-SDAG-NEXT: s_addc_u32 s49, s49, 0
+; GFX9ARCH-SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9ARCH-SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9ARCH-SDAG-NEXT: s_mov_b32 s38, -1
+; GFX9ARCH-SDAG-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9ARCH-SDAG-NEXT: s_add_u32 s36, s36, s8
+; GFX9ARCH-SDAG-NEXT: s_addc_u32 s37, s37, 0
; GFX9ARCH-SDAG-NEXT: s_add_u32 s8, s4, 36
; GFX9ARCH-SDAG-NEXT: s_addc_u32 s9, s5, 0
; GFX9ARCH-SDAG-NEXT: s_getpc_b64 s[4:5]
@@ -145,9 +145,9 @@ define amdgpu_kernel void @caller() {
; GFX9ARCH-SDAG-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9ARCH-SDAG-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX9ARCH-SDAG-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9ARCH-SDAG-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9ARCH-SDAG-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9ARCH-SDAG-NEXT: v_or3_b32 v31, v0, v1, v2
-; GFX9ARCH-SDAG-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9ARCH-SDAG-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9ARCH-SDAG-NEXT: v_mov_b32_e32 v0, ttmp9
; GFX9ARCH-SDAG-NEXT: s_mov_b32 s32, 0
; GFX9ARCH-SDAG-NEXT: s_waitcnt lgkmcnt(0)
@@ -156,12 +156,12 @@ define amdgpu_kernel void @caller() {
;
; GFX9ARCH-GISEL-LABEL: caller:
; GFX9ARCH-GISEL: ; %bb.0:
-; GFX9ARCH-GISEL-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9ARCH-GISEL-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9ARCH-GISEL-NEXT: s_mov_b32 s50, -1
-; GFX9ARCH-GISEL-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9ARCH-GISEL-NEXT: s_add_u32 s48, s48, s8
-; GFX9ARCH-GISEL-NEXT: s_addc_u32 s49, s49, 0
+; GFX9ARCH-GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9ARCH-GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9ARCH-GISEL-NEXT: s_mov_b32 s38, -1
+; GFX9ARCH-GISEL-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9ARCH-GISEL-NEXT: s_add_u32 s36, s36, s8
+; GFX9ARCH-GISEL-NEXT: s_addc_u32 s37, s37, 0
; GFX9ARCH-GISEL-NEXT: s_add_u32 s8, s4, 36
; GFX9ARCH-GISEL-NEXT: s_addc_u32 s9, s5, 0
; GFX9ARCH-GISEL-NEXT: s_mov_b64 s[12:13], s[0:1]
@@ -173,10 +173,10 @@ define amdgpu_kernel void @caller() {
; GFX9ARCH-GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX9ARCH-GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX9ARCH-GISEL-NEXT: v_lshlrev_b32_e32 v2, 20, v2
-; GFX9ARCH-GISEL-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9ARCH-GISEL-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX9ARCH-GISEL-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9ARCH-GISEL-NEXT: v_mov_b32_e32 v0, ttmp9
-; GFX9ARCH-GISEL-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9ARCH-GISEL-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9ARCH-GISEL-NEXT: s_mov_b64 s[4:5], s[12:13]
; GFX9ARCH-GISEL-NEXT: s_mov_b32 s32, 0
; GFX9ARCH-GISEL-NEXT: s_waitcnt lgkmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll b/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll
index d29e6f8c3d2c6..4fb6a0114b499 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll
+++ b/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll
@@ -43,7 +43,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: s_addc_u32 s13, s13, 0
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13
-; CHECK-NEXT: s_load_dwordx8 s[96:103], s[8:9], 0x0
+; CHECK-NEXT: s_load_dwordx8 s[64:71], s[8:9], 0x0
; CHECK-NEXT: s_add_u32 s0, s0, s17
; CHECK-NEXT: s_mov_b64 s[34:35], s[8:9]
; CHECK-NEXT: s_addc_u32 s1, s1, 0
@@ -63,7 +63,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: s_mov_b32 s14, s33
; CHECK-NEXT: s_mov_b32 s50, s15
; CHECK-NEXT: s_mov_b64 s[36:37], s[10:11]
-; CHECK-NEXT: s_mov_b64 s[46:47], s[6:7]
+; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
; CHECK-NEXT: s_mov_b64 s[48:49], s[4:5]
; CHECK-NEXT: v_mov_b32_e32 v45, 0
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
@@ -74,7 +74,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: s_add_u32 s16, s16, _Z12get_local_idj at rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z12get_local_idj at rel32@hi+12
; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
-; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47]
+; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
; CHECK-NEXT: s_mov_b64 s[8:9], s[52:53]
; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
; CHECK-NEXT: s_mov_b32 s12, s51
@@ -88,7 +88,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: s_add_u32 s16, s16, _Z7barrierj at rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z7barrierj at rel32@hi+12
; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
-; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47]
+; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
; CHECK-NEXT: s_mov_b64 s[8:9], s[52:53]
; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
; CHECK-NEXT: s_mov_b32 s12, s51
@@ -105,10 +105,10 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
; CHECK-NEXT: v_and_b32_e32 v0, 0x7ffffffc, v0
; CHECK-NEXT: v_and_b32_e32 v1, 28, v1
-; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47]
+; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
; CHECK-NEXT: s_mov_b64 s[8:9], s[52:53]
; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
-; CHECK-NEXT: global_load_dword v0, v0, s[100:101]
+; CHECK-NEXT: global_load_dword v0, v0, s[68:69]
; CHECK-NEXT: s_mov_b32 s12, s51
; CHECK-NEXT: s_mov_b32 s13, s50
; CHECK-NEXT: s_mov_b32 s14, s33
@@ -144,46 +144,46 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: v_lshlrev_b32_e32 v43, 10, v43
; CHECK-NEXT: v_add_nc_u32_e32 v46, 0x3c05, v0
; CHECK-NEXT: v_mov_b32_e32 v47, 0
-; CHECK-NEXT: s_mov_b32 s63, 0
+; CHECK-NEXT: s_mov_b32 s55, 0
; CHECK-NEXT: .LBB0_5: ; =>This Loop Header: Depth=1
; CHECK-NEXT: ; Child Loop BB0_8 Depth 2
; CHECK-NEXT: ; Child Loop BB0_20 Depth 2
-; CHECK-NEXT: v_add_nc_u32_e32 v0, s63, v44
-; CHECK-NEXT: s_lshl_b32 s4, s63, 5
-; CHECK-NEXT: s_add_i32 s62, s63, 1
-; CHECK-NEXT: s_add_i32 s5, s63, 5
-; CHECK-NEXT: v_or3_b32 v57, s4, v43, s62
+; CHECK-NEXT: v_add_nc_u32_e32 v0, s55, v44
+; CHECK-NEXT: s_lshl_b32 s4, s55, 5
+; CHECK-NEXT: s_add_i32 s54, s55, 1
+; CHECK-NEXT: s_add_i32 s5, s55, 5
+; CHECK-NEXT: v_or3_b32 v57, s4, v43, s54
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: ds_read_u8 v56, v0
-; CHECK-NEXT: v_mov_b32_e32 v58, s62
-; CHECK-NEXT: s_mov_b32 s64, exec_lo
+; CHECK-NEXT: v_mov_b32_e32 v58, s54
+; CHECK-NEXT: s_mov_b32 s68, exec_lo
; CHECK-NEXT: v_cmpx_lt_u32_e64 s5, v42
; CHECK-NEXT: s_cbranch_execz .LBB0_17
; CHECK-NEXT: ; %bb.6: ; %.preheader2
; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1
-; CHECK-NEXT: s_mov_b32 s65, 0
-; CHECK-NEXT: s_mov_b32 s66, 0
+; CHECK-NEXT: s_mov_b32 s69, 0
+; CHECK-NEXT: s_mov_b32 s80, 0
; CHECK-NEXT: s_branch .LBB0_8
; CHECK-NEXT: .LBB0_7: ; in Loop: Header=BB0_8 Depth=2
-; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s67
-; CHECK-NEXT: s_add_i32 s66, s66, 4
-; CHECK-NEXT: s_add_i32 s4, s63, s66
-; CHECK-NEXT: v_add_nc_u32_e32 v0, s66, v57
+; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s81
+; CHECK-NEXT: s_add_i32 s80, s80, 4
+; CHECK-NEXT: s_add_i32 s4, s55, s80
+; CHECK-NEXT: v_add_nc_u32_e32 v0, s80, v57
; CHECK-NEXT: s_add_i32 s5, s4, 5
; CHECK-NEXT: s_add_i32 s4, s4, 1
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc_lo, s5, v42
; CHECK-NEXT: v_mov_b32_e32 v58, s4
-; CHECK-NEXT: s_or_b32 s65, vcc_lo, s65
-; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s65
+; CHECK-NEXT: s_or_b32 s69, vcc_lo, s69
+; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s69
; CHECK-NEXT: s_cbranch_execz .LBB0_16
; CHECK-NEXT: .LBB0_8: ; Parent Loop BB0_5 Depth=1
; CHECK-NEXT: ; => This Inner Loop Header: Depth=2
-; CHECK-NEXT: v_add_nc_u32_e32 v59, s66, v46
-; CHECK-NEXT: v_add_nc_u32_e32 v58, s66, v57
+; CHECK-NEXT: v_add_nc_u32_e32 v59, s80, v46
+; CHECK-NEXT: v_add_nc_u32_e32 v58, s80, v57
; CHECK-NEXT: ds_read_u8 v0, v59
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: v_cmp_eq_u16_sdwa s4, v56, v0 src0_sel:BYTE_0 src1_sel:DWORD
-; CHECK-NEXT: s_and_saveexec_b32 s67, s4
+; CHECK-NEXT: s_and_saveexec_b32 s81, s4
; CHECK-NEXT: s_cbranch_execz .LBB0_10
; CHECK-NEXT: ; %bb.9: ; in Loop: Header=BB0_8 Depth=2
; CHECK-NEXT: v_mov_b32_e32 v31, v40
@@ -194,7 +194,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_incPU3AS3Vj at rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_incPU3AS3Vj at rel32@hi+12
; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
-; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47]
+; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
; CHECK-NEXT: s_mov_b32 s12, s51
; CHECK-NEXT: s_mov_b32 s13, s50
@@ -204,11 +204,11 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; CHECK-NEXT: ds_write_b32 v0, v58
; CHECK-NEXT: .LBB0_10: ; in Loop: Header=BB0_8 Depth=2
-; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s67
+; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s81
; CHECK-NEXT: ds_read_u8 v0, v59 offset:1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: v_cmp_eq_u16_sdwa s4, v56, v0 src0_sel:BYTE_0 src1_sel:DWORD
-; CHECK-NEXT: s_and_saveexec_b32 s67, s4
+; CHECK-NEXT: s_and_saveexec_b32 s81, s4
; CHECK-NEXT: s_cbranch_execz .LBB0_12
; CHECK-NEXT: ; %bb.11: ; in Loop: Header=BB0_8 Depth=2
; CHECK-NEXT: v_mov_b32_e32 v31, v40
@@ -219,7 +219,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_incPU3AS3Vj at rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_incPU3AS3Vj at rel32@hi+12
; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
-; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47]
+; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
; CHECK-NEXT: s_mov_b32 s12, s51
; CHECK-NEXT: s_mov_b32 s13, s50
@@ -230,11 +230,11 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; CHECK-NEXT: ds_write_b32 v0, v60
; CHECK-NEXT: .LBB0_12: ; in Loop: Header=BB0_8 Depth=2
-; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s67
+; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s81
; CHECK-NEXT: ds_read_u8 v0, v59 offset:2
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: v_cmp_eq_u16_sdwa s4, v56, v0 src0_sel:BYTE_0 src1_sel:DWORD
-; CHECK-NEXT: s_and_saveexec_b32 s67, s4
+; CHECK-NEXT: s_and_saveexec_b32 s81, s4
; CHECK-NEXT: s_cbranch_execz .LBB0_14
; CHECK-NEXT: ; %bb.13: ; in Loop: Header=BB0_8 Depth=2
; CHECK-NEXT: v_mov_b32_e32 v31, v40
@@ -245,7 +245,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_incPU3AS3Vj at rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_incPU3AS3Vj at rel32@hi+12
; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
-; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47]
+; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
; CHECK-NEXT: s_mov_b32 s12, s51
; CHECK-NEXT: s_mov_b32 s13, s50
@@ -256,11 +256,11 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; CHECK-NEXT: ds_write_b32 v0, v60
; CHECK-NEXT: .LBB0_14: ; in Loop: Header=BB0_8 Depth=2
-; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s67
+; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s81
; CHECK-NEXT: ds_read_u8 v0, v59 offset:3
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: v_cmp_eq_u16_sdwa s4, v56, v0 src0_sel:BYTE_0 src1_sel:DWORD
-; CHECK-NEXT: s_and_saveexec_b32 s67, s4
+; CHECK-NEXT: s_and_saveexec_b32 s81, s4
; CHECK-NEXT: s_cbranch_execz .LBB0_7
; CHECK-NEXT: ; %bb.15: ; in Loop: Header=BB0_8 Depth=2
; CHECK-NEXT: v_mov_b32_e32 v31, v40
@@ -271,7 +271,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_incPU3AS3Vj at rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_incPU3AS3Vj at rel32@hi+12
; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
-; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47]
+; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
; CHECK-NEXT: s_mov_b32 s12, s51
; CHECK-NEXT: s_mov_b32 s13, s50
@@ -284,27 +284,27 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: s_branch .LBB0_7
; CHECK-NEXT: .LBB0_16: ; %Flow45
; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1
-; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s65
+; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s69
; CHECK-NEXT: v_mov_b32_e32 v57, v0
; CHECK-NEXT: .LBB0_17: ; %Flow46
; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1
-; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s64
-; CHECK-NEXT: s_mov_b32 s63, exec_lo
+; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s68
+; CHECK-NEXT: s_mov_b32 s55, exec_lo
; CHECK-NEXT: v_cmpx_lt_u32_e64 v58, v42
; CHECK-NEXT: s_cbranch_execz .LBB0_23
; CHECK-NEXT: ; %bb.18: ; %.preheader
; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1
-; CHECK-NEXT: s_mov_b32 s64, 0
+; CHECK-NEXT: s_mov_b32 s68, 0
; CHECK-NEXT: s_inst_prefetch 0x1
; CHECK-NEXT: s_branch .LBB0_20
; CHECK-NEXT: .p2align 6
; CHECK-NEXT: .LBB0_19: ; in Loop: Header=BB0_20 Depth=2
-; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s65
+; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s69
; CHECK-NEXT: v_add_nc_u32_e32 v58, 1, v58
; CHECK-NEXT: v_add_nc_u32_e32 v57, 1, v57
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc_lo, v58, v42
-; CHECK-NEXT: s_or_b32 s64, vcc_lo, s64
-; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s64
+; CHECK-NEXT: s_or_b32 s68, vcc_lo, s68
+; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s68
; CHECK-NEXT: s_cbranch_execz .LBB0_22
; CHECK-NEXT: .LBB0_20: ; Parent Loop BB0_5 Depth=1
; CHECK-NEXT: ; => This Inner Loop Header: Depth=2
@@ -312,7 +312,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: ds_read_u8 v0, v0
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: v_cmp_eq_u16_sdwa s4, v56, v0 src0_sel:BYTE_0 src1_sel:DWORD
-; CHECK-NEXT: s_and_saveexec_b32 s65, s4
+; CHECK-NEXT: s_and_saveexec_b32 s69, s4
; CHECK-NEXT: s_cbranch_execz .LBB0_19
; CHECK-NEXT: ; %bb.21: ; in Loop: Header=BB0_20 Depth=2
; CHECK-NEXT: v_mov_b32_e32 v31, v40
@@ -323,7 +323,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_incPU3AS3Vj at rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_incPU3AS3Vj at rel32@hi+12
; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
-; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47]
+; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
; CHECK-NEXT: s_mov_b32 s12, s51
; CHECK-NEXT: s_mov_b32 s13, s50
@@ -336,15 +336,15 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: .LBB0_22: ; %Flow43
; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1
; CHECK-NEXT: s_inst_prefetch 0x2
-; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s64
+; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s68
; CHECK-NEXT: .LBB0_23: ; %Flow44
; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1
-; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s63
+; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s55
; CHECK-NEXT: ; %bb.24: ; in Loop: Header=BB0_5 Depth=1
-; CHECK-NEXT: v_cmp_ge_u32_e32 vcc_lo, s62, v45
+; CHECK-NEXT: v_cmp_ge_u32_e32 vcc_lo, s54, v45
; CHECK-NEXT: v_cmp_lt_u32_e64 s4, 59, v47
; CHECK-NEXT: v_add_nc_u32_e32 v46, 1, v46
-; CHECK-NEXT: s_mov_b32 s63, s62
+; CHECK-NEXT: s_mov_b32 s55, s54
; CHECK-NEXT: s_or_b32 s4, vcc_lo, s4
; CHECK-NEXT: s_and_b32 s4, exec_lo, s4
; CHECK-NEXT: s_or_b32 s53, s4, s53
@@ -360,7 +360,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: s_add_u32 s16, s16, _Z7barrierj at rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z7barrierj at rel32@hi+12
; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
-; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47]
+; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
; CHECK-NEXT: s_mov_b32 s12, s51
; CHECK-NEXT: s_mov_b32 s13, s50
@@ -385,7 +385,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: s_add_u32 s16, s16, _Z14get_local_sizej at rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z14get_local_sizej at rel32@hi+12
; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
-; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47]
+; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
; CHECK-NEXT: s_mov_b32 s12, s51
; CHECK-NEXT: s_mov_b32 s13, s50
@@ -407,8 +407,8 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: v_mul_u32_u24_e32 v1, 0x180, v63
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 5, v62
; CHECK-NEXT: v_lshlrev_b32_e32 v4, 5, v72
-; CHECK-NEXT: v_add_co_u32 v2, s4, s96, v1
-; CHECK-NEXT: v_add_co_ci_u32_e64 v3, null, s97, 0, s4
+; CHECK-NEXT: v_add_co_u32 v2, s4, s64, v1
+; CHECK-NEXT: v_add_co_ci_u32_e64 v3, null, s65, 0, s4
; CHECK-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0
; CHECK-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v3, vcc_lo
; CHECK-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4
@@ -443,7 +443,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_addPU3AS1Vjj at rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_addPU3AS1Vjj at rel32@hi+12
; CHECK-NEXT: v_or3_b32 v2, v3, v2, v4
-; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47]
+; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
; CHECK-NEXT: s_mov_b32 s12, s51
; CHECK-NEXT: s_mov_b32 s13, s50
@@ -454,8 +454,8 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: v_and_b32_e32 v0, 0x7fffc, v0
; CHECK-NEXT: v_lshlrev_b32_e64 v44, v1, 1
; CHECK-NEXT: v_and_b32_e32 v74, 28, v1
-; CHECK-NEXT: v_add_co_u32 v42, s4, s102, v0
-; CHECK-NEXT: v_add_co_ci_u32_e64 v43, null, s103, 0, s4
+; CHECK-NEXT: v_add_co_u32 v42, s4, s70, v0
+; CHECK-NEXT: v_add_co_ci_u32_e64 v43, null, s71, 0, s4
; CHECK-NEXT: v_mov_b32_e32 v2, v44
; CHECK-NEXT: v_mov_b32_e32 v0, v42
; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
@@ -469,7 +469,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: ; %bb.30: ; in Loop: Header=BB0_28 Depth=1
; CHECK-NEXT: v_xor_b32_e32 v4, v60, v58
; CHECK-NEXT: v_lshrrev_b64 v[2:3], 16, v[56:57]
-; CHECK-NEXT: v_mad_u64_u32 v[6:7], null, 0x180, v73, s[98:99]
+; CHECK-NEXT: v_mad_u64_u32 v[6:7], null, 0x180, v73, s[66:67]
; CHECK-NEXT: v_lshlrev_b32_e32 v10, 5, v0
; CHECK-NEXT: v_lshlrev_b32_e32 v1, 16, v4
; CHECK-NEXT: v_lshlrev_b32_e32 v8, 6, v72
@@ -504,7 +504,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_subPU3AS1Vjj at rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_subPU3AS1Vjj at rel32@hi+12
; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
-; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47]
+; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
; CHECK-NEXT: s_mov_b32 s12, s51
; CHECK-NEXT: s_mov_b32 s13, s50
@@ -792,16 +792,16 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt
; CHECK-NEXT: s_addc_u32 s13, s13, 0
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12
; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13
-; CHECK-NEXT: s_load_dwordx2 s[62:63], s[8:9], 0x10
+; CHECK-NEXT: s_load_dwordx2 s[54:55], s[8:9], 0x10
; CHECK-NEXT: s_add_u32 s0, s0, s17
-; CHECK-NEXT: s_mov_b64 s[46:47], s[8:9]
+; CHECK-NEXT: s_mov_b64 s[38:39], s[8:9]
; CHECK-NEXT: s_addc_u32 s1, s1, 0
; CHECK-NEXT: v_mov_b32_e32 v40, v0
-; CHECK-NEXT: s_add_u32 s52, s46, 40
+; CHECK-NEXT: s_add_u32 s52, s38, 40
; CHECK-NEXT: v_mov_b32_e32 v31, v0
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: s_mov_b32 s33, s16
-; CHECK-NEXT: s_addc_u32 s53, s47, 0
+; CHECK-NEXT: s_addc_u32 s53, s39, 0
; CHECK-NEXT: s_mov_b32 s51, s14
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z13get_global_idj at rel32@lo+4
@@ -858,7 +858,7 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt
; CHECK-NEXT: s_mov_b64 s[6:7], s[36:37]
; CHECK-NEXT: s_mov_b64 s[8:9], s[52:53]
; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
-; CHECK-NEXT: global_load_dword v0, v0, s[62:63]
+; CHECK-NEXT: global_load_dword v0, v0, s[54:55]
; CHECK-NEXT: s_mov_b32 s12, s51
; CHECK-NEXT: s_mov_b32 s13, s50
; CHECK-NEXT: s_mov_b32 s14, s33
@@ -912,12 +912,12 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt
; CHECK-NEXT: .LBB1_5: ; %Flow4
; CHECK-NEXT: ; in Loop: Header=BB1_1 Depth=1
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s5
-; CHECK-NEXT: s_mov_b32 s62, exec_lo
+; CHECK-NEXT: s_mov_b32 s54, exec_lo
; CHECK-NEXT: v_cmpx_lt_u32_e64 v56, v41
; CHECK-NEXT: s_cbranch_execz .LBB1_11
; CHECK-NEXT: ; %bb.6: ; %.103.preheader
; CHECK-NEXT: ; in Loop: Header=BB1_1 Depth=1
-; CHECK-NEXT: s_mov_b32 s63, 0
+; CHECK-NEXT: s_mov_b32 s55, 0
; CHECK-NEXT: s_inst_prefetch 0x1
; CHECK-NEXT: s_branch .LBB1_8
; CHECK-NEXT: .p2align 6
@@ -927,8 +927,8 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt
; CHECK-NEXT: v_add_nc_u32_e32 v56, 1, v56
; CHECK-NEXT: v_add_nc_u32_e32 v47, 1, v47
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc_lo, v56, v41
-; CHECK-NEXT: s_or_b32 s63, vcc_lo, s63
-; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s63
+; CHECK-NEXT: s_or_b32 s55, vcc_lo, s55
+; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s55
; CHECK-NEXT: s_cbranch_execz .LBB1_10
; CHECK-NEXT: .LBB1_8: ; %.103
; CHECK-NEXT: ; Parent Loop BB1_1 Depth=1
@@ -943,8 +943,8 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt
; CHECK-NEXT: ; in Loop: Header=BB1_8 Depth=2
; CHECK-NEXT: v_mov_b32_e32 v31, v40
; CHECK-NEXT: v_mov_b32_e32 v0, 0x3c00
-; CHECK-NEXT: s_add_u32 s8, s46, 40
-; CHECK-NEXT: s_addc_u32 s9, s47, 0
+; CHECK-NEXT: s_add_u32 s8, s38, 40
+; CHECK-NEXT: s_addc_u32 s9, s39, 0
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_incPU3AS3Vj at rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_incPU3AS3Vj at rel32@hi+12
@@ -962,10 +962,10 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt
; CHECK-NEXT: .LBB1_10: ; %Flow
; CHECK-NEXT: ; in Loop: Header=BB1_1 Depth=1
; CHECK-NEXT: s_inst_prefetch 0x2
-; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s63
+; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s55
; CHECK-NEXT: .LBB1_11: ; %Flow2
; CHECK-NEXT: ; in Loop: Header=BB1_1 Depth=1
-; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s62
+; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s54
; CHECK-NEXT: ; %bb.12: ; %.32
; CHECK-NEXT: ; in Loop: Header=BB1_1 Depth=1
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc_lo, s53, v45
@@ -980,8 +980,8 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s52
; CHECK-NEXT: v_mov_b32_e32 v31, v40
; CHECK-NEXT: v_mov_b32_e32 v0, 1
-; CHECK-NEXT: s_add_u32 s8, s46, 40
-; CHECK-NEXT: s_addc_u32 s9, s47, 0
+; CHECK-NEXT: s_add_u32 s8, s38, 40
+; CHECK-NEXT: s_addc_u32 s9, s39, 0
; CHECK-NEXT: s_getpc_b64 s[16:17]
; CHECK-NEXT: s_add_u32 s16, s16, _Z7barrierj at rel32@lo+4
; CHECK-NEXT: s_addc_u32 s17, s17, _Z7barrierj at rel32@hi+12
diff --git a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll
index 23b7369a11dd3..e8dacc93a8f3c 100644
--- a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll
@@ -44,17 +44,19 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0
; GFX7-NEXT: v_writelane_b32 v23, s35, 4
; GFX7-NEXT: v_writelane_b32 v23, s36, 5
; GFX7-NEXT: v_writelane_b32 v23, s37, 6
-; GFX7-NEXT: v_writelane_b32 v23, s46, 7
-; GFX7-NEXT: v_writelane_b32 v23, s47, 8
+; GFX7-NEXT: v_writelane_b32 v23, s38, 7
+; GFX7-NEXT: v_writelane_b32 v23, s39, 8
; GFX7-NEXT: v_writelane_b32 v23, s48, 9
; GFX7-NEXT: v_writelane_b32 v23, s49, 10
; GFX7-NEXT: v_writelane_b32 v23, s50, 11
; GFX7-NEXT: v_writelane_b32 v23, s51, 12
-; GFX7-NEXT: v_lshr_b32_e64 v0, s32, 6
; GFX7-NEXT: v_writelane_b32 v23, s52, 13
+; GFX7-NEXT: v_writelane_b32 v23, s53, 14
+; GFX7-NEXT: v_lshr_b32_e64 v0, s32, 6
+; GFX7-NEXT: v_writelane_b32 v23, s54, 15
; GFX7-NEXT: v_add_i32_e32 v0, vcc, 64, v0
; GFX7-NEXT: s_and_b64 s[4:5], 0, exec
-; GFX7-NEXT: v_writelane_b32 v23, s53, 14
+; GFX7-NEXT: v_writelane_b32 v23, s55, 16
; GFX7-NEXT: ;;#ASMSTART
; GFX7-NEXT: ; use alloca0 v0
; GFX7-NEXT: ;;#ASMEND
@@ -71,14 +73,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0
; GFX7-NEXT: ;;#ASMSTART
; GFX7-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc
; GFX7-NEXT: ;;#ASMEND
+; GFX7-NEXT: v_readlane_b32 s55, v23, 16
+; GFX7-NEXT: v_readlane_b32 s54, v23, 15
; GFX7-NEXT: v_readlane_b32 s53, v23, 14
; GFX7-NEXT: v_readlane_b32 s52, v23, 13
; GFX7-NEXT: v_readlane_b32 s51, v23, 12
; GFX7-NEXT: v_readlane_b32 s50, v23, 11
; GFX7-NEXT: v_readlane_b32 s49, v23, 10
; GFX7-NEXT: v_readlane_b32 s48, v23, 9
-; GFX7-NEXT: v_readlane_b32 s47, v23, 8
-; GFX7-NEXT: v_readlane_b32 s46, v23, 7
+; GFX7-NEXT: v_readlane_b32 s39, v23, 8
+; GFX7-NEXT: v_readlane_b32 s38, v23, 7
; GFX7-NEXT: v_readlane_b32 s37, v23, 6
; GFX7-NEXT: v_readlane_b32 s36, v23, 5
; GFX7-NEXT: v_readlane_b32 s35, v23, 4
@@ -107,17 +111,19 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0
; GFX8-NEXT: v_writelane_b32 v23, s35, 4
; GFX8-NEXT: v_writelane_b32 v23, s36, 5
; GFX8-NEXT: v_writelane_b32 v23, s37, 6
-; GFX8-NEXT: v_writelane_b32 v23, s46, 7
-; GFX8-NEXT: v_writelane_b32 v23, s47, 8
+; GFX8-NEXT: v_writelane_b32 v23, s38, 7
+; GFX8-NEXT: v_writelane_b32 v23, s39, 8
; GFX8-NEXT: v_writelane_b32 v23, s48, 9
; GFX8-NEXT: v_writelane_b32 v23, s49, 10
; GFX8-NEXT: v_writelane_b32 v23, s50, 11
; GFX8-NEXT: v_writelane_b32 v23, s51, 12
-; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32
; GFX8-NEXT: v_writelane_b32 v23, s52, 13
+; GFX8-NEXT: v_writelane_b32 v23, s53, 14
+; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32
+; GFX8-NEXT: v_writelane_b32 v23, s54, 15
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0
; GFX8-NEXT: s_and_b64 s[4:5], 0, exec
-; GFX8-NEXT: v_writelane_b32 v23, s53, 14
+; GFX8-NEXT: v_writelane_b32 v23, s55, 16
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use alloca0 v0
; GFX8-NEXT: ;;#ASMEND
@@ -134,14 +140,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc
; GFX8-NEXT: ;;#ASMEND
+; GFX8-NEXT: v_readlane_b32 s55, v23, 16
+; GFX8-NEXT: v_readlane_b32 s54, v23, 15
; GFX8-NEXT: v_readlane_b32 s53, v23, 14
; GFX8-NEXT: v_readlane_b32 s52, v23, 13
; GFX8-NEXT: v_readlane_b32 s51, v23, 12
; GFX8-NEXT: v_readlane_b32 s50, v23, 11
; GFX8-NEXT: v_readlane_b32 s49, v23, 10
; GFX8-NEXT: v_readlane_b32 s48, v23, 9
-; GFX8-NEXT: v_readlane_b32 s47, v23, 8
-; GFX8-NEXT: v_readlane_b32 s46, v23, 7
+; GFX8-NEXT: v_readlane_b32 s39, v23, 8
+; GFX8-NEXT: v_readlane_b32 s38, v23, 7
; GFX8-NEXT: v_readlane_b32 s37, v23, 6
; GFX8-NEXT: v_readlane_b32 s36, v23, 5
; GFX8-NEXT: v_readlane_b32 s35, v23, 4
@@ -170,17 +178,19 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0
; GFX900-NEXT: v_writelane_b32 v23, s35, 4
; GFX900-NEXT: v_writelane_b32 v23, s36, 5
; GFX900-NEXT: v_writelane_b32 v23, s37, 6
-; GFX900-NEXT: v_writelane_b32 v23, s46, 7
-; GFX900-NEXT: v_writelane_b32 v23, s47, 8
+; GFX900-NEXT: v_writelane_b32 v23, s38, 7
+; GFX900-NEXT: v_writelane_b32 v23, s39, 8
; GFX900-NEXT: v_writelane_b32 v23, s48, 9
; GFX900-NEXT: v_writelane_b32 v23, s49, 10
; GFX900-NEXT: v_writelane_b32 v23, s50, 11
; GFX900-NEXT: v_writelane_b32 v23, s51, 12
-; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32
; GFX900-NEXT: v_writelane_b32 v23, s52, 13
+; GFX900-NEXT: v_writelane_b32 v23, s53, 14
+; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32
+; GFX900-NEXT: v_writelane_b32 v23, s54, 15
; GFX900-NEXT: v_add_u32_e32 v0, 64, v0
; GFX900-NEXT: s_and_b64 s[4:5], 0, exec
-; GFX900-NEXT: v_writelane_b32 v23, s53, 14
+; GFX900-NEXT: v_writelane_b32 v23, s55, 16
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use alloca0 v0
; GFX900-NEXT: ;;#ASMEND
@@ -196,14 +206,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_readlane_b32 s55, v23, 16
+; GFX900-NEXT: v_readlane_b32 s54, v23, 15
; GFX900-NEXT: v_readlane_b32 s53, v23, 14
; GFX900-NEXT: v_readlane_b32 s52, v23, 13
; GFX900-NEXT: v_readlane_b32 s51, v23, 12
; GFX900-NEXT: v_readlane_b32 s50, v23, 11
; GFX900-NEXT: v_readlane_b32 s49, v23, 10
; GFX900-NEXT: v_readlane_b32 s48, v23, 9
-; GFX900-NEXT: v_readlane_b32 s47, v23, 8
-; GFX900-NEXT: v_readlane_b32 s46, v23, 7
+; GFX900-NEXT: v_readlane_b32 s39, v23, 8
+; GFX900-NEXT: v_readlane_b32 s38, v23, 7
; GFX900-NEXT: v_readlane_b32 s37, v23, 6
; GFX900-NEXT: v_readlane_b32 s36, v23, 5
; GFX900-NEXT: v_readlane_b32 s35, v23, 4
@@ -232,17 +244,19 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0
; GFX942-NEXT: v_writelane_b32 v23, s35, 4
; GFX942-NEXT: v_writelane_b32 v23, s36, 5
; GFX942-NEXT: v_writelane_b32 v23, s37, 6
-; GFX942-NEXT: v_writelane_b32 v23, s46, 7
-; GFX942-NEXT: v_writelane_b32 v23, s47, 8
+; GFX942-NEXT: v_writelane_b32 v23, s38, 7
+; GFX942-NEXT: v_writelane_b32 v23, s39, 8
; GFX942-NEXT: v_writelane_b32 v23, s48, 9
; GFX942-NEXT: v_writelane_b32 v23, s49, 10
; GFX942-NEXT: v_writelane_b32 v23, s50, 11
; GFX942-NEXT: v_writelane_b32 v23, s51, 12
-; GFX942-NEXT: s_add_i32 s0, s32, 64
; GFX942-NEXT: v_writelane_b32 v23, s52, 13
+; GFX942-NEXT: v_writelane_b32 v23, s53, 14
+; GFX942-NEXT: s_add_i32 s0, s32, 64
+; GFX942-NEXT: v_writelane_b32 v23, s54, 15
; GFX942-NEXT: v_mov_b32_e32 v0, s0
; GFX942-NEXT: s_and_b64 s[60:61], 0, exec
-; GFX942-NEXT: v_writelane_b32 v23, s53, 14
+; GFX942-NEXT: v_writelane_b32 v23, s55, 16
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use alloca0 v0
; GFX942-NEXT: ;;#ASMEND
@@ -256,14 +270,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_readlane_b32 s55, v23, 16
+; GFX942-NEXT: v_readlane_b32 s54, v23, 15
; GFX942-NEXT: v_readlane_b32 s53, v23, 14
; GFX942-NEXT: v_readlane_b32 s52, v23, 13
; GFX942-NEXT: v_readlane_b32 s51, v23, 12
; GFX942-NEXT: v_readlane_b32 s50, v23, 11
; GFX942-NEXT: v_readlane_b32 s49, v23, 10
; GFX942-NEXT: v_readlane_b32 s48, v23, 9
-; GFX942-NEXT: v_readlane_b32 s47, v23, 8
-; GFX942-NEXT: v_readlane_b32 s46, v23, 7
+; GFX942-NEXT: v_readlane_b32 s39, v23, 8
+; GFX942-NEXT: v_readlane_b32 s38, v23, 7
; GFX942-NEXT: v_readlane_b32 s37, v23, 6
; GFX942-NEXT: v_readlane_b32 s36, v23, 5
; GFX942-NEXT: v_readlane_b32 s35, v23, 4
@@ -299,14 +315,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0
; GFX10_1-NEXT: v_writelane_b32 v23, s35, 4
; GFX10_1-NEXT: v_writelane_b32 v23, s36, 5
; GFX10_1-NEXT: v_writelane_b32 v23, s37, 6
-; GFX10_1-NEXT: v_writelane_b32 v23, s46, 7
-; GFX10_1-NEXT: v_writelane_b32 v23, s47, 8
+; GFX10_1-NEXT: v_writelane_b32 v23, s38, 7
+; GFX10_1-NEXT: v_writelane_b32 v23, s39, 8
; GFX10_1-NEXT: v_writelane_b32 v23, s48, 9
; GFX10_1-NEXT: v_writelane_b32 v23, s49, 10
; GFX10_1-NEXT: v_writelane_b32 v23, s50, 11
; GFX10_1-NEXT: v_writelane_b32 v23, s51, 12
; GFX10_1-NEXT: v_writelane_b32 v23, s52, 13
; GFX10_1-NEXT: v_writelane_b32 v23, s53, 14
+; GFX10_1-NEXT: v_writelane_b32 v23, s54, 15
+; GFX10_1-NEXT: v_writelane_b32 v23, s55, 16
; GFX10_1-NEXT: ;;#ASMSTART
; GFX10_1-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
; GFX10_1-NEXT: ;;#ASMEND
@@ -316,14 +334,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0
; GFX10_1-NEXT: ;;#ASMSTART
; GFX10_1-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc
; GFX10_1-NEXT: ;;#ASMEND
+; GFX10_1-NEXT: v_readlane_b32 s55, v23, 16
+; GFX10_1-NEXT: v_readlane_b32 s54, v23, 15
; GFX10_1-NEXT: v_readlane_b32 s53, v23, 14
; GFX10_1-NEXT: v_readlane_b32 s52, v23, 13
; GFX10_1-NEXT: v_readlane_b32 s51, v23, 12
; GFX10_1-NEXT: v_readlane_b32 s50, v23, 11
; GFX10_1-NEXT: v_readlane_b32 s49, v23, 10
; GFX10_1-NEXT: v_readlane_b32 s48, v23, 9
-; GFX10_1-NEXT: v_readlane_b32 s47, v23, 8
-; GFX10_1-NEXT: v_readlane_b32 s46, v23, 7
+; GFX10_1-NEXT: v_readlane_b32 s39, v23, 8
+; GFX10_1-NEXT: v_readlane_b32 s38, v23, 7
; GFX10_1-NEXT: v_readlane_b32 s37, v23, 6
; GFX10_1-NEXT: v_readlane_b32 s36, v23, 5
; GFX10_1-NEXT: v_readlane_b32 s35, v23, 4
@@ -359,14 +379,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0
; GFX10_3-NEXT: v_writelane_b32 v23, s35, 4
; GFX10_3-NEXT: v_writelane_b32 v23, s36, 5
; GFX10_3-NEXT: v_writelane_b32 v23, s37, 6
-; GFX10_3-NEXT: v_writelane_b32 v23, s46, 7
-; GFX10_3-NEXT: v_writelane_b32 v23, s47, 8
+; GFX10_3-NEXT: v_writelane_b32 v23, s38, 7
+; GFX10_3-NEXT: v_writelane_b32 v23, s39, 8
; GFX10_3-NEXT: v_writelane_b32 v23, s48, 9
; GFX10_3-NEXT: v_writelane_b32 v23, s49, 10
; GFX10_3-NEXT: v_writelane_b32 v23, s50, 11
; GFX10_3-NEXT: v_writelane_b32 v23, s51, 12
; GFX10_3-NEXT: v_writelane_b32 v23, s52, 13
; GFX10_3-NEXT: v_writelane_b32 v23, s53, 14
+; GFX10_3-NEXT: v_writelane_b32 v23, s54, 15
+; GFX10_3-NEXT: v_writelane_b32 v23, s55, 16
; GFX10_3-NEXT: ;;#ASMSTART
; GFX10_3-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
; GFX10_3-NEXT: ;;#ASMEND
@@ -376,14 +398,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0
; GFX10_3-NEXT: ;;#ASMSTART
; GFX10_3-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc
; GFX10_3-NEXT: ;;#ASMEND
+; GFX10_3-NEXT: v_readlane_b32 s55, v23, 16
+; GFX10_3-NEXT: v_readlane_b32 s54, v23, 15
; GFX10_3-NEXT: v_readlane_b32 s53, v23, 14
; GFX10_3-NEXT: v_readlane_b32 s52, v23, 13
; GFX10_3-NEXT: v_readlane_b32 s51, v23, 12
; GFX10_3-NEXT: v_readlane_b32 s50, v23, 11
; GFX10_3-NEXT: v_readlane_b32 s49, v23, 10
; GFX10_3-NEXT: v_readlane_b32 s48, v23, 9
-; GFX10_3-NEXT: v_readlane_b32 s47, v23, 8
-; GFX10_3-NEXT: v_readlane_b32 s46, v23, 7
+; GFX10_3-NEXT: v_readlane_b32 s39, v23, 8
+; GFX10_3-NEXT: v_readlane_b32 s38, v23, 7
; GFX10_3-NEXT: v_readlane_b32 s37, v23, 6
; GFX10_3-NEXT: v_readlane_b32 s36, v23, 5
; GFX10_3-NEXT: v_readlane_b32 s35, v23, 4
@@ -419,14 +443,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0
; GFX11-NEXT: v_writelane_b32 v23, s35, 4
; GFX11-NEXT: v_writelane_b32 v23, s36, 5
; GFX11-NEXT: v_writelane_b32 v23, s37, 6
-; GFX11-NEXT: v_writelane_b32 v23, s46, 7
-; GFX11-NEXT: v_writelane_b32 v23, s47, 8
+; GFX11-NEXT: v_writelane_b32 v23, s38, 7
+; GFX11-NEXT: v_writelane_b32 v23, s39, 8
; GFX11-NEXT: v_writelane_b32 v23, s48, 9
; GFX11-NEXT: v_writelane_b32 v23, s49, 10
; GFX11-NEXT: v_writelane_b32 v23, s50, 11
; GFX11-NEXT: v_writelane_b32 v23, s51, 12
; GFX11-NEXT: v_writelane_b32 v23, s52, 13
; GFX11-NEXT: v_writelane_b32 v23, s53, 14
+; GFX11-NEXT: v_writelane_b32 v23, s54, 15
+; GFX11-NEXT: v_writelane_b32 v23, s55, 16
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
; GFX11-NEXT: ;;#ASMEND
@@ -438,14 +464,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc
; GFX11-NEXT: ;;#ASMEND
+; GFX11-NEXT: v_readlane_b32 s55, v23, 16
+; GFX11-NEXT: v_readlane_b32 s54, v23, 15
; GFX11-NEXT: v_readlane_b32 s53, v23, 14
; GFX11-NEXT: v_readlane_b32 s52, v23, 13
; GFX11-NEXT: v_readlane_b32 s51, v23, 12
; GFX11-NEXT: v_readlane_b32 s50, v23, 11
; GFX11-NEXT: v_readlane_b32 s49, v23, 10
; GFX11-NEXT: v_readlane_b32 s48, v23, 9
-; GFX11-NEXT: v_readlane_b32 s47, v23, 8
-; GFX11-NEXT: v_readlane_b32 s46, v23, 7
+; GFX11-NEXT: v_readlane_b32 s39, v23, 8
+; GFX11-NEXT: v_readlane_b32 s38, v23, 7
; GFX11-NEXT: v_readlane_b32 s37, v23, 6
; GFX11-NEXT: v_readlane_b32 s36, v23, 5
; GFX11-NEXT: v_readlane_b32 s35, v23, 4
@@ -483,14 +511,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0
; GFX12-NEXT: v_writelane_b32 v23, s35, 4
; GFX12-NEXT: v_writelane_b32 v23, s36, 5
; GFX12-NEXT: v_writelane_b32 v23, s37, 6
-; GFX12-NEXT: v_writelane_b32 v23, s46, 7
-; GFX12-NEXT: v_writelane_b32 v23, s47, 8
+; GFX12-NEXT: v_writelane_b32 v23, s38, 7
+; GFX12-NEXT: v_writelane_b32 v23, s39, 8
; GFX12-NEXT: v_writelane_b32 v23, s48, 9
; GFX12-NEXT: v_writelane_b32 v23, s49, 10
; GFX12-NEXT: v_writelane_b32 v23, s50, 11
; GFX12-NEXT: v_writelane_b32 v23, s51, 12
; GFX12-NEXT: v_writelane_b32 v23, s52, 13
; GFX12-NEXT: v_writelane_b32 v23, s53, 14
+; GFX12-NEXT: v_writelane_b32 v23, s54, 15
+; GFX12-NEXT: v_writelane_b32 v23, s55, 16
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
; GFX12-NEXT: ;;#ASMEND
@@ -503,14 +533,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc
; GFX12-NEXT: ;;#ASMEND
+; GFX12-NEXT: v_readlane_b32 s55, v23, 16
+; GFX12-NEXT: v_readlane_b32 s54, v23, 15
; GFX12-NEXT: v_readlane_b32 s53, v23, 14
; GFX12-NEXT: v_readlane_b32 s52, v23, 13
; GFX12-NEXT: v_readlane_b32 s51, v23, 12
; GFX12-NEXT: v_readlane_b32 s50, v23, 11
; GFX12-NEXT: v_readlane_b32 s49, v23, 10
; GFX12-NEXT: v_readlane_b32 s48, v23, 9
-; GFX12-NEXT: v_readlane_b32 s47, v23, 8
-; GFX12-NEXT: v_readlane_b32 s46, v23, 7
+; GFX12-NEXT: v_readlane_b32 s39, v23, 8
+; GFX12-NEXT: v_readlane_b32 s38, v23, 7
; GFX12-NEXT: v_readlane_b32 s37, v23, 6
; GFX12-NEXT: v_readlane_b32 s36, v23, 5
; GFX12-NEXT: v_readlane_b32 s35, v23, 4
@@ -581,15 +613,17 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe
; GFX7-NEXT: v_writelane_b32 v21, s35, 4
; GFX7-NEXT: v_writelane_b32 v21, s36, 5
; GFX7-NEXT: v_writelane_b32 v21, s37, 6
-; GFX7-NEXT: v_writelane_b32 v21, s46, 7
-; GFX7-NEXT: v_writelane_b32 v21, s47, 8
+; GFX7-NEXT: v_writelane_b32 v21, s38, 7
+; GFX7-NEXT: v_writelane_b32 v21, s39, 8
; GFX7-NEXT: v_writelane_b32 v21, s48, 9
; GFX7-NEXT: v_writelane_b32 v21, s49, 10
; GFX7-NEXT: v_writelane_b32 v21, s50, 11
; GFX7-NEXT: v_writelane_b32 v21, s51, 12
; GFX7-NEXT: v_writelane_b32 v21, s52, 13
-; GFX7-NEXT: s_and_b64 s[4:5], 0, exec
; GFX7-NEXT: v_writelane_b32 v21, s53, 14
+; GFX7-NEXT: v_writelane_b32 v21, s54, 15
+; GFX7-NEXT: s_and_b64 s[4:5], 0, exec
+; GFX7-NEXT: v_writelane_b32 v21, s55, 16
; GFX7-NEXT: ;;#ASMSTART
; GFX7-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
; GFX7-NEXT: ;;#ASMEND
@@ -599,14 +633,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe
; GFX7-NEXT: ;;#ASMSTART
; GFX7-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc
; GFX7-NEXT: ;;#ASMEND
+; GFX7-NEXT: v_readlane_b32 s55, v21, 16
+; GFX7-NEXT: v_readlane_b32 s54, v21, 15
; GFX7-NEXT: v_readlane_b32 s53, v21, 14
; GFX7-NEXT: v_readlane_b32 s52, v21, 13
; GFX7-NEXT: v_readlane_b32 s51, v21, 12
; GFX7-NEXT: v_readlane_b32 s50, v21, 11
; GFX7-NEXT: v_readlane_b32 s49, v21, 10
; GFX7-NEXT: v_readlane_b32 s48, v21, 9
-; GFX7-NEXT: v_readlane_b32 s47, v21, 8
-; GFX7-NEXT: v_readlane_b32 s46, v21, 7
+; GFX7-NEXT: v_readlane_b32 s39, v21, 8
+; GFX7-NEXT: v_readlane_b32 s38, v21, 7
; GFX7-NEXT: v_readlane_b32 s37, v21, 6
; GFX7-NEXT: v_readlane_b32 s36, v21, 5
; GFX7-NEXT: v_readlane_b32 s35, v21, 4
@@ -635,15 +671,17 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe
; GFX8-NEXT: v_writelane_b32 v21, s35, 4
; GFX8-NEXT: v_writelane_b32 v21, s36, 5
; GFX8-NEXT: v_writelane_b32 v21, s37, 6
-; GFX8-NEXT: v_writelane_b32 v21, s46, 7
-; GFX8-NEXT: v_writelane_b32 v21, s47, 8
+; GFX8-NEXT: v_writelane_b32 v21, s38, 7
+; GFX8-NEXT: v_writelane_b32 v21, s39, 8
; GFX8-NEXT: v_writelane_b32 v21, s48, 9
; GFX8-NEXT: v_writelane_b32 v21, s49, 10
; GFX8-NEXT: v_writelane_b32 v21, s50, 11
; GFX8-NEXT: v_writelane_b32 v21, s51, 12
; GFX8-NEXT: v_writelane_b32 v21, s52, 13
-; GFX8-NEXT: s_and_b64 s[4:5], 0, exec
; GFX8-NEXT: v_writelane_b32 v21, s53, 14
+; GFX8-NEXT: v_writelane_b32 v21, s54, 15
+; GFX8-NEXT: s_and_b64 s[4:5], 0, exec
+; GFX8-NEXT: v_writelane_b32 v21, s55, 16
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
; GFX8-NEXT: ;;#ASMEND
@@ -653,14 +691,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc
; GFX8-NEXT: ;;#ASMEND
+; GFX8-NEXT: v_readlane_b32 s55, v21, 16
+; GFX8-NEXT: v_readlane_b32 s54, v21, 15
; GFX8-NEXT: v_readlane_b32 s53, v21, 14
; GFX8-NEXT: v_readlane_b32 s52, v21, 13
; GFX8-NEXT: v_readlane_b32 s51, v21, 12
; GFX8-NEXT: v_readlane_b32 s50, v21, 11
; GFX8-NEXT: v_readlane_b32 s49, v21, 10
; GFX8-NEXT: v_readlane_b32 s48, v21, 9
-; GFX8-NEXT: v_readlane_b32 s47, v21, 8
-; GFX8-NEXT: v_readlane_b32 s46, v21, 7
+; GFX8-NEXT: v_readlane_b32 s39, v21, 8
+; GFX8-NEXT: v_readlane_b32 s38, v21, 7
; GFX8-NEXT: v_readlane_b32 s37, v21, 6
; GFX8-NEXT: v_readlane_b32 s36, v21, 5
; GFX8-NEXT: v_readlane_b32 s35, v21, 4
@@ -689,15 +729,17 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe
; GFX900-NEXT: v_writelane_b32 v21, s35, 4
; GFX900-NEXT: v_writelane_b32 v21, s36, 5
; GFX900-NEXT: v_writelane_b32 v21, s37, 6
-; GFX900-NEXT: v_writelane_b32 v21, s46, 7
-; GFX900-NEXT: v_writelane_b32 v21, s47, 8
+; GFX900-NEXT: v_writelane_b32 v21, s38, 7
+; GFX900-NEXT: v_writelane_b32 v21, s39, 8
; GFX900-NEXT: v_writelane_b32 v21, s48, 9
; GFX900-NEXT: v_writelane_b32 v21, s49, 10
; GFX900-NEXT: v_writelane_b32 v21, s50, 11
; GFX900-NEXT: v_writelane_b32 v21, s51, 12
; GFX900-NEXT: v_writelane_b32 v21, s52, 13
-; GFX900-NEXT: s_and_b64 s[4:5], 0, exec
; GFX900-NEXT: v_writelane_b32 v21, s53, 14
+; GFX900-NEXT: v_writelane_b32 v21, s54, 15
+; GFX900-NEXT: s_and_b64 s[4:5], 0, exec
+; GFX900-NEXT: v_writelane_b32 v21, s55, 16
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
; GFX900-NEXT: ;;#ASMEND
@@ -707,14 +749,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_readlane_b32 s55, v21, 16
+; GFX900-NEXT: v_readlane_b32 s54, v21, 15
; GFX900-NEXT: v_readlane_b32 s53, v21, 14
; GFX900-NEXT: v_readlane_b32 s52, v21, 13
; GFX900-NEXT: v_readlane_b32 s51, v21, 12
; GFX900-NEXT: v_readlane_b32 s50, v21, 11
; GFX900-NEXT: v_readlane_b32 s49, v21, 10
; GFX900-NEXT: v_readlane_b32 s48, v21, 9
-; GFX900-NEXT: v_readlane_b32 s47, v21, 8
-; GFX900-NEXT: v_readlane_b32 s46, v21, 7
+; GFX900-NEXT: v_readlane_b32 s39, v21, 8
+; GFX900-NEXT: v_readlane_b32 s38, v21, 7
; GFX900-NEXT: v_readlane_b32 s37, v21, 6
; GFX900-NEXT: v_readlane_b32 s36, v21, 5
; GFX900-NEXT: v_readlane_b32 s35, v21, 4
@@ -743,15 +787,17 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe
; GFX942-NEXT: v_writelane_b32 v21, s35, 4
; GFX942-NEXT: v_writelane_b32 v21, s36, 5
; GFX942-NEXT: v_writelane_b32 v21, s37, 6
-; GFX942-NEXT: v_writelane_b32 v21, s46, 7
-; GFX942-NEXT: v_writelane_b32 v21, s47, 8
+; GFX942-NEXT: v_writelane_b32 v21, s38, 7
+; GFX942-NEXT: v_writelane_b32 v21, s39, 8
; GFX942-NEXT: v_writelane_b32 v21, s48, 9
; GFX942-NEXT: v_writelane_b32 v21, s49, 10
; GFX942-NEXT: v_writelane_b32 v21, s50, 11
; GFX942-NEXT: v_writelane_b32 v21, s51, 12
; GFX942-NEXT: v_writelane_b32 v21, s52, 13
-; GFX942-NEXT: s_and_b64 s[60:61], 0, exec
; GFX942-NEXT: v_writelane_b32 v21, s53, 14
+; GFX942-NEXT: v_writelane_b32 v21, s54, 15
+; GFX942-NEXT: s_and_b64 s[60:61], 0, exec
+; GFX942-NEXT: v_writelane_b32 v21, s55, 16
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
; GFX942-NEXT: ;;#ASMEND
@@ -762,14 +808,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_readlane_b32 s55, v21, 16
+; GFX942-NEXT: v_readlane_b32 s54, v21, 15
; GFX942-NEXT: v_readlane_b32 s53, v21, 14
; GFX942-NEXT: v_readlane_b32 s52, v21, 13
; GFX942-NEXT: v_readlane_b32 s51, v21, 12
; GFX942-NEXT: v_readlane_b32 s50, v21, 11
; GFX942-NEXT: v_readlane_b32 s49, v21, 10
; GFX942-NEXT: v_readlane_b32 s48, v21, 9
-; GFX942-NEXT: v_readlane_b32 s47, v21, 8
-; GFX942-NEXT: v_readlane_b32 s46, v21, 7
+; GFX942-NEXT: v_readlane_b32 s39, v21, 8
+; GFX942-NEXT: v_readlane_b32 s38, v21, 7
; GFX942-NEXT: v_readlane_b32 s37, v21, 6
; GFX942-NEXT: v_readlane_b32 s36, v21, 5
; GFX942-NEXT: v_readlane_b32 s35, v21, 4
@@ -800,14 +848,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe
; GFX10_1-NEXT: v_writelane_b32 v21, s35, 4
; GFX10_1-NEXT: v_writelane_b32 v21, s36, 5
; GFX10_1-NEXT: v_writelane_b32 v21, s37, 6
-; GFX10_1-NEXT: v_writelane_b32 v21, s46, 7
-; GFX10_1-NEXT: v_writelane_b32 v21, s47, 8
+; GFX10_1-NEXT: v_writelane_b32 v21, s38, 7
+; GFX10_1-NEXT: v_writelane_b32 v21, s39, 8
; GFX10_1-NEXT: v_writelane_b32 v21, s48, 9
; GFX10_1-NEXT: v_writelane_b32 v21, s49, 10
; GFX10_1-NEXT: v_writelane_b32 v21, s50, 11
; GFX10_1-NEXT: v_writelane_b32 v21, s51, 12
; GFX10_1-NEXT: v_writelane_b32 v21, s52, 13
; GFX10_1-NEXT: v_writelane_b32 v21, s53, 14
+; GFX10_1-NEXT: v_writelane_b32 v21, s54, 15
+; GFX10_1-NEXT: v_writelane_b32 v21, s55, 16
; GFX10_1-NEXT: ;;#ASMSTART
; GFX10_1-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
; GFX10_1-NEXT: ;;#ASMEND
@@ -817,14 +867,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe
; GFX10_1-NEXT: ;;#ASMSTART
; GFX10_1-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc
; GFX10_1-NEXT: ;;#ASMEND
+; GFX10_1-NEXT: v_readlane_b32 s55, v21, 16
+; GFX10_1-NEXT: v_readlane_b32 s54, v21, 15
; GFX10_1-NEXT: v_readlane_b32 s53, v21, 14
; GFX10_1-NEXT: v_readlane_b32 s52, v21, 13
; GFX10_1-NEXT: v_readlane_b32 s51, v21, 12
; GFX10_1-NEXT: v_readlane_b32 s50, v21, 11
; GFX10_1-NEXT: v_readlane_b32 s49, v21, 10
; GFX10_1-NEXT: v_readlane_b32 s48, v21, 9
-; GFX10_1-NEXT: v_readlane_b32 s47, v21, 8
-; GFX10_1-NEXT: v_readlane_b32 s46, v21, 7
+; GFX10_1-NEXT: v_readlane_b32 s39, v21, 8
+; GFX10_1-NEXT: v_readlane_b32 s38, v21, 7
; GFX10_1-NEXT: v_readlane_b32 s37, v21, 6
; GFX10_1-NEXT: v_readlane_b32 s36, v21, 5
; GFX10_1-NEXT: v_readlane_b32 s35, v21, 4
@@ -855,14 +907,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe
; GFX10_3-NEXT: v_writelane_b32 v21, s35, 4
; GFX10_3-NEXT: v_writelane_b32 v21, s36, 5
; GFX10_3-NEXT: v_writelane_b32 v21, s37, 6
-; GFX10_3-NEXT: v_writelane_b32 v21, s46, 7
-; GFX10_3-NEXT: v_writelane_b32 v21, s47, 8
+; GFX10_3-NEXT: v_writelane_b32 v21, s38, 7
+; GFX10_3-NEXT: v_writelane_b32 v21, s39, 8
; GFX10_3-NEXT: v_writelane_b32 v21, s48, 9
; GFX10_3-NEXT: v_writelane_b32 v21, s49, 10
; GFX10_3-NEXT: v_writelane_b32 v21, s50, 11
; GFX10_3-NEXT: v_writelane_b32 v21, s51, 12
; GFX10_3-NEXT: v_writelane_b32 v21, s52, 13
; GFX10_3-NEXT: v_writelane_b32 v21, s53, 14
+; GFX10_3-NEXT: v_writelane_b32 v21, s54, 15
+; GFX10_3-NEXT: v_writelane_b32 v21, s55, 16
; GFX10_3-NEXT: ;;#ASMSTART
; GFX10_3-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
; GFX10_3-NEXT: ;;#ASMEND
@@ -872,14 +926,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe
; GFX10_3-NEXT: ;;#ASMSTART
; GFX10_3-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc
; GFX10_3-NEXT: ;;#ASMEND
+; GFX10_3-NEXT: v_readlane_b32 s55, v21, 16
+; GFX10_3-NEXT: v_readlane_b32 s54, v21, 15
; GFX10_3-NEXT: v_readlane_b32 s53, v21, 14
; GFX10_3-NEXT: v_readlane_b32 s52, v21, 13
; GFX10_3-NEXT: v_readlane_b32 s51, v21, 12
; GFX10_3-NEXT: v_readlane_b32 s50, v21, 11
; GFX10_3-NEXT: v_readlane_b32 s49, v21, 10
; GFX10_3-NEXT: v_readlane_b32 s48, v21, 9
-; GFX10_3-NEXT: v_readlane_b32 s47, v21, 8
-; GFX10_3-NEXT: v_readlane_b32 s46, v21, 7
+; GFX10_3-NEXT: v_readlane_b32 s39, v21, 8
+; GFX10_3-NEXT: v_readlane_b32 s38, v21, 7
; GFX10_3-NEXT: v_readlane_b32 s37, v21, 6
; GFX10_3-NEXT: v_readlane_b32 s36, v21, 5
; GFX10_3-NEXT: v_readlane_b32 s35, v21, 4
@@ -909,14 +965,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe
; GFX11-NEXT: v_writelane_b32 v21, s35, 4
; GFX11-NEXT: v_writelane_b32 v21, s36, 5
; GFX11-NEXT: v_writelane_b32 v21, s37, 6
-; GFX11-NEXT: v_writelane_b32 v21, s46, 7
-; GFX11-NEXT: v_writelane_b32 v21, s47, 8
+; GFX11-NEXT: v_writelane_b32 v21, s38, 7
+; GFX11-NEXT: v_writelane_b32 v21, s39, 8
; GFX11-NEXT: v_writelane_b32 v21, s48, 9
; GFX11-NEXT: v_writelane_b32 v21, s49, 10
; GFX11-NEXT: v_writelane_b32 v21, s50, 11
; GFX11-NEXT: v_writelane_b32 v21, s51, 12
; GFX11-NEXT: v_writelane_b32 v21, s52, 13
; GFX11-NEXT: v_writelane_b32 v21, s53, 14
+; GFX11-NEXT: v_writelane_b32 v21, s54, 15
+; GFX11-NEXT: v_writelane_b32 v21, s55, 16
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
; GFX11-NEXT: ;;#ASMEND
@@ -928,14 +986,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc
; GFX11-NEXT: ;;#ASMEND
+; GFX11-NEXT: v_readlane_b32 s55, v21, 16
+; GFX11-NEXT: v_readlane_b32 s54, v21, 15
; GFX11-NEXT: v_readlane_b32 s53, v21, 14
; GFX11-NEXT: v_readlane_b32 s52, v21, 13
; GFX11-NEXT: v_readlane_b32 s51, v21, 12
; GFX11-NEXT: v_readlane_b32 s50, v21, 11
; GFX11-NEXT: v_readlane_b32 s49, v21, 10
; GFX11-NEXT: v_readlane_b32 s48, v21, 9
-; GFX11-NEXT: v_readlane_b32 s47, v21, 8
-; GFX11-NEXT: v_readlane_b32 s46, v21, 7
+; GFX11-NEXT: v_readlane_b32 s39, v21, 8
+; GFX11-NEXT: v_readlane_b32 s38, v21, 7
; GFX11-NEXT: v_readlane_b32 s37, v21, 6
; GFX11-NEXT: v_readlane_b32 s36, v21, 5
; GFX11-NEXT: v_readlane_b32 s35, v21, 4
@@ -969,14 +1029,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe
; GFX12-NEXT: v_writelane_b32 v21, s35, 4
; GFX12-NEXT: v_writelane_b32 v21, s36, 5
; GFX12-NEXT: v_writelane_b32 v21, s37, 6
-; GFX12-NEXT: v_writelane_b32 v21, s46, 7
-; GFX12-NEXT: v_writelane_b32 v21, s47, 8
+; GFX12-NEXT: v_writelane_b32 v21, s38, 7
+; GFX12-NEXT: v_writelane_b32 v21, s39, 8
; GFX12-NEXT: v_writelane_b32 v21, s48, 9
; GFX12-NEXT: v_writelane_b32 v21, s49, 10
; GFX12-NEXT: v_writelane_b32 v21, s50, 11
; GFX12-NEXT: v_writelane_b32 v21, s51, 12
; GFX12-NEXT: v_writelane_b32 v21, s52, 13
; GFX12-NEXT: v_writelane_b32 v21, s53, 14
+; GFX12-NEXT: v_writelane_b32 v21, s54, 15
+; GFX12-NEXT: v_writelane_b32 v21, s55, 16
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
; GFX12-NEXT: ;;#ASMEND
@@ -985,14 +1047,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe
; GFX12-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT: v_readlane_b32 s55, v21, 16
+; GFX12-NEXT: v_readlane_b32 s54, v21, 15
; GFX12-NEXT: v_readlane_b32 s53, v21, 14
; GFX12-NEXT: v_readlane_b32 s52, v21, 13
; GFX12-NEXT: v_readlane_b32 s51, v21, 12
; GFX12-NEXT: v_readlane_b32 s50, v21, 11
; GFX12-NEXT: v_readlane_b32 s49, v21, 10
; GFX12-NEXT: v_readlane_b32 s48, v21, 9
-; GFX12-NEXT: v_readlane_b32 s47, v21, 8
-; GFX12-NEXT: v_readlane_b32 s46, v21, 7
+; GFX12-NEXT: v_readlane_b32 s39, v21, 8
+; GFX12-NEXT: v_readlane_b32 s38, v21, 7
; GFX12-NEXT: v_readlane_b32 s37, v21, 6
; GFX12-NEXT: v_readlane_b32 s36, v21, 5
; GFX12-NEXT: v_readlane_b32 s35, v21, 4
@@ -1055,8 +1119,8 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX7-NEXT: s_add_i32 s6, s32, 0x201100
; GFX7-NEXT: buffer_store_dword v22, off, s[0:3], s6 ; 4-byte Folded Spill
; GFX7-NEXT: s_mov_b64 exec, s[4:5]
-; GFX7-NEXT: v_writelane_b32 v23, s28, 15
-; GFX7-NEXT: v_writelane_b32 v23, s29, 16
+; GFX7-NEXT: v_writelane_b32 v23, s28, 17
+; GFX7-NEXT: v_writelane_b32 v23, s29, 18
; GFX7-NEXT: v_writelane_b32 v23, s30, 0
; GFX7-NEXT: v_writelane_b32 v23, s31, 1
; GFX7-NEXT: v_writelane_b32 v23, s33, 2
@@ -1064,21 +1128,23 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX7-NEXT: v_writelane_b32 v23, s35, 4
; GFX7-NEXT: v_writelane_b32 v23, s36, 5
; GFX7-NEXT: v_writelane_b32 v23, s37, 6
-; GFX7-NEXT: v_writelane_b32 v23, s46, 7
-; GFX7-NEXT: v_writelane_b32 v23, s47, 8
+; GFX7-NEXT: v_writelane_b32 v23, s38, 7
+; GFX7-NEXT: v_writelane_b32 v23, s39, 8
; GFX7-NEXT: v_writelane_b32 v23, s48, 9
; GFX7-NEXT: v_writelane_b32 v23, s49, 10
; GFX7-NEXT: v_writelane_b32 v23, s50, 11
-; GFX7-NEXT: s_lshr_b32 s5, s32, 6
; GFX7-NEXT: v_writelane_b32 v23, s51, 12
+; GFX7-NEXT: v_writelane_b32 v23, s52, 13
+; GFX7-NEXT: s_lshr_b32 s5, s32, 6
+; GFX7-NEXT: v_writelane_b32 v23, s53, 14
; GFX7-NEXT: v_lshr_b32_e64 v0, s32, 6
; GFX7-NEXT: s_add_i32 s4, s5, 0x4240
; GFX7-NEXT: ; implicit-def: $vgpr22 : SGPR spill to VGPR lane
-; GFX7-NEXT: v_writelane_b32 v23, s52, 13
+; GFX7-NEXT: v_writelane_b32 v23, s54, 15
; GFX7-NEXT: v_add_i32_e32 v0, vcc, 64, v0
; GFX7-NEXT: v_writelane_b32 v22, s4, 0
; GFX7-NEXT: s_and_b64 s[4:5], 0, exec
-; GFX7-NEXT: v_writelane_b32 v23, s53, 14
+; GFX7-NEXT: v_writelane_b32 v23, s55, 16
; GFX7-NEXT: ;;#ASMSTART
; GFX7-NEXT: ; use alloca0 v0
; GFX7-NEXT: ;;#ASMEND
@@ -1089,14 +1155,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX7-NEXT: ;;#ASMSTART
; GFX7-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc
; GFX7-NEXT: ;;#ASMEND
+; GFX7-NEXT: v_readlane_b32 s55, v23, 16
+; GFX7-NEXT: v_readlane_b32 s54, v23, 15
; GFX7-NEXT: v_readlane_b32 s53, v23, 14
; GFX7-NEXT: v_readlane_b32 s52, v23, 13
; GFX7-NEXT: v_readlane_b32 s51, v23, 12
; GFX7-NEXT: v_readlane_b32 s50, v23, 11
; GFX7-NEXT: v_readlane_b32 s49, v23, 10
; GFX7-NEXT: v_readlane_b32 s48, v23, 9
-; GFX7-NEXT: v_readlane_b32 s47, v23, 8
-; GFX7-NEXT: v_readlane_b32 s46, v23, 7
+; GFX7-NEXT: v_readlane_b32 s39, v23, 8
+; GFX7-NEXT: v_readlane_b32 s38, v23, 7
; GFX7-NEXT: v_readlane_b32 s37, v23, 6
; GFX7-NEXT: v_readlane_b32 s36, v23, 5
; GFX7-NEXT: v_readlane_b32 s35, v23, 4
@@ -1104,8 +1172,8 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX7-NEXT: v_readlane_b32 s33, v23, 2
; GFX7-NEXT: v_readlane_b32 s31, v23, 1
; GFX7-NEXT: v_readlane_b32 s30, v23, 0
-; GFX7-NEXT: v_readlane_b32 s28, v23, 15
-; GFX7-NEXT: v_readlane_b32 s29, v23, 16
+; GFX7-NEXT: v_readlane_b32 s28, v23, 17
+; GFX7-NEXT: v_readlane_b32 s29, v23, 18
; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1
; GFX7-NEXT: s_add_i32 s6, s32, 0x201000
; GFX7-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload
@@ -1129,19 +1197,21 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX8-NEXT: v_writelane_b32 v22, s35, 4
; GFX8-NEXT: v_writelane_b32 v22, s36, 5
; GFX8-NEXT: v_writelane_b32 v22, s37, 6
-; GFX8-NEXT: v_writelane_b32 v22, s46, 7
-; GFX8-NEXT: v_writelane_b32 v22, s47, 8
+; GFX8-NEXT: v_writelane_b32 v22, s38, 7
+; GFX8-NEXT: v_writelane_b32 v22, s39, 8
; GFX8-NEXT: v_writelane_b32 v22, s48, 9
; GFX8-NEXT: v_writelane_b32 v22, s49, 10
; GFX8-NEXT: v_writelane_b32 v22, s50, 11
-; GFX8-NEXT: s_lshr_b32 s4, s32, 6
; GFX8-NEXT: v_writelane_b32 v22, s51, 12
+; GFX8-NEXT: v_writelane_b32 v22, s52, 13
+; GFX8-NEXT: s_lshr_b32 s4, s32, 6
+; GFX8-NEXT: v_writelane_b32 v22, s53, 14
; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32
; GFX8-NEXT: s_add_i32 s59, s4, 0x4240
-; GFX8-NEXT: v_writelane_b32 v22, s52, 13
+; GFX8-NEXT: v_writelane_b32 v22, s54, 15
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0
; GFX8-NEXT: s_and_b64 s[4:5], 0, exec
-; GFX8-NEXT: v_writelane_b32 v22, s53, 14
+; GFX8-NEXT: v_writelane_b32 v22, s55, 16
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use alloca0 v0
; GFX8-NEXT: ;;#ASMEND
@@ -1151,14 +1221,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc
; GFX8-NEXT: ;;#ASMEND
+; GFX8-NEXT: v_readlane_b32 s55, v22, 16
+; GFX8-NEXT: v_readlane_b32 s54, v22, 15
; GFX8-NEXT: v_readlane_b32 s53, v22, 14
; GFX8-NEXT: v_readlane_b32 s52, v22, 13
; GFX8-NEXT: v_readlane_b32 s51, v22, 12
; GFX8-NEXT: v_readlane_b32 s50, v22, 11
; GFX8-NEXT: v_readlane_b32 s49, v22, 10
; GFX8-NEXT: v_readlane_b32 s48, v22, 9
-; GFX8-NEXT: v_readlane_b32 s47, v22, 8
-; GFX8-NEXT: v_readlane_b32 s46, v22, 7
+; GFX8-NEXT: v_readlane_b32 s39, v22, 8
+; GFX8-NEXT: v_readlane_b32 s38, v22, 7
; GFX8-NEXT: v_readlane_b32 s37, v22, 6
; GFX8-NEXT: v_readlane_b32 s36, v22, 5
; GFX8-NEXT: v_readlane_b32 s35, v22, 4
@@ -1187,19 +1259,21 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX900-NEXT: v_writelane_b32 v22, s35, 4
; GFX900-NEXT: v_writelane_b32 v22, s36, 5
; GFX900-NEXT: v_writelane_b32 v22, s37, 6
-; GFX900-NEXT: v_writelane_b32 v22, s46, 7
-; GFX900-NEXT: v_writelane_b32 v22, s47, 8
+; GFX900-NEXT: v_writelane_b32 v22, s38, 7
+; GFX900-NEXT: v_writelane_b32 v22, s39, 8
; GFX900-NEXT: v_writelane_b32 v22, s48, 9
; GFX900-NEXT: v_writelane_b32 v22, s49, 10
; GFX900-NEXT: v_writelane_b32 v22, s50, 11
-; GFX900-NEXT: s_lshr_b32 s4, s32, 6
; GFX900-NEXT: v_writelane_b32 v22, s51, 12
+; GFX900-NEXT: v_writelane_b32 v22, s52, 13
+; GFX900-NEXT: s_lshr_b32 s4, s32, 6
+; GFX900-NEXT: v_writelane_b32 v22, s53, 14
; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32
; GFX900-NEXT: s_add_i32 s59, s4, 0x4240
-; GFX900-NEXT: v_writelane_b32 v22, s52, 13
+; GFX900-NEXT: v_writelane_b32 v22, s54, 15
; GFX900-NEXT: v_add_u32_e32 v0, 64, v0
; GFX900-NEXT: s_and_b64 s[4:5], 0, exec
-; GFX900-NEXT: v_writelane_b32 v22, s53, 14
+; GFX900-NEXT: v_writelane_b32 v22, s55, 16
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use alloca0 v0
; GFX900-NEXT: ;;#ASMEND
@@ -1209,14 +1283,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_readlane_b32 s55, v22, 16
+; GFX900-NEXT: v_readlane_b32 s54, v22, 15
; GFX900-NEXT: v_readlane_b32 s53, v22, 14
; GFX900-NEXT: v_readlane_b32 s52, v22, 13
; GFX900-NEXT: v_readlane_b32 s51, v22, 12
; GFX900-NEXT: v_readlane_b32 s50, v22, 11
; GFX900-NEXT: v_readlane_b32 s49, v22, 10
; GFX900-NEXT: v_readlane_b32 s48, v22, 9
-; GFX900-NEXT: v_readlane_b32 s47, v22, 8
-; GFX900-NEXT: v_readlane_b32 s46, v22, 7
+; GFX900-NEXT: v_readlane_b32 s39, v22, 8
+; GFX900-NEXT: v_readlane_b32 s38, v22, 7
; GFX900-NEXT: v_readlane_b32 s37, v22, 6
; GFX900-NEXT: v_readlane_b32 s36, v22, 5
; GFX900-NEXT: v_readlane_b32 s35, v22, 4
@@ -1245,16 +1321,18 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX942-NEXT: v_writelane_b32 v22, s35, 4
; GFX942-NEXT: v_writelane_b32 v22, s36, 5
; GFX942-NEXT: v_writelane_b32 v22, s37, 6
-; GFX942-NEXT: v_writelane_b32 v22, s46, 7
-; GFX942-NEXT: v_writelane_b32 v22, s47, 8
+; GFX942-NEXT: v_writelane_b32 v22, s38, 7
+; GFX942-NEXT: v_writelane_b32 v22, s39, 8
; GFX942-NEXT: v_writelane_b32 v22, s48, 9
; GFX942-NEXT: v_writelane_b32 v22, s49, 10
; GFX942-NEXT: v_writelane_b32 v22, s50, 11
; GFX942-NEXT: v_writelane_b32 v22, s51, 12
-; GFX942-NEXT: s_add_i32 s0, s32, 64
; GFX942-NEXT: v_writelane_b32 v22, s52, 13
-; GFX942-NEXT: v_mov_b32_e32 v0, s0
; GFX942-NEXT: v_writelane_b32 v22, s53, 14
+; GFX942-NEXT: s_add_i32 s0, s32, 64
+; GFX942-NEXT: v_writelane_b32 v22, s54, 15
+; GFX942-NEXT: v_mov_b32_e32 v0, s0
+; GFX942-NEXT: v_writelane_b32 v22, s55, 16
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use alloca0 v0
; GFX942-NEXT: ;;#ASMEND
@@ -1266,14 +1344,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_readlane_b32 s55, v22, 16
+; GFX942-NEXT: v_readlane_b32 s54, v22, 15
; GFX942-NEXT: v_readlane_b32 s53, v22, 14
; GFX942-NEXT: v_readlane_b32 s52, v22, 13
; GFX942-NEXT: v_readlane_b32 s51, v22, 12
; GFX942-NEXT: v_readlane_b32 s50, v22, 11
; GFX942-NEXT: v_readlane_b32 s49, v22, 10
; GFX942-NEXT: v_readlane_b32 s48, v22, 9
-; GFX942-NEXT: v_readlane_b32 s47, v22, 8
-; GFX942-NEXT: v_readlane_b32 s46, v22, 7
+; GFX942-NEXT: v_readlane_b32 s39, v22, 8
+; GFX942-NEXT: v_readlane_b32 s38, v22, 7
; GFX942-NEXT: v_readlane_b32 s37, v22, 6
; GFX942-NEXT: v_readlane_b32 s36, v22, 5
; GFX942-NEXT: v_readlane_b32 s35, v22, 4
@@ -1311,28 +1391,32 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX10_1-NEXT: v_writelane_b32 v22, s35, 4
; GFX10_1-NEXT: v_writelane_b32 v22, s36, 5
; GFX10_1-NEXT: v_writelane_b32 v22, s37, 6
-; GFX10_1-NEXT: v_writelane_b32 v22, s46, 7
-; GFX10_1-NEXT: v_writelane_b32 v22, s47, 8
+; GFX10_1-NEXT: v_writelane_b32 v22, s38, 7
+; GFX10_1-NEXT: v_writelane_b32 v22, s39, 8
; GFX10_1-NEXT: v_writelane_b32 v22, s48, 9
; GFX10_1-NEXT: v_writelane_b32 v22, s49, 10
; GFX10_1-NEXT: v_writelane_b32 v22, s50, 11
; GFX10_1-NEXT: v_writelane_b32 v22, s51, 12
; GFX10_1-NEXT: v_writelane_b32 v22, s52, 13
; GFX10_1-NEXT: v_writelane_b32 v22, s53, 14
+; GFX10_1-NEXT: v_writelane_b32 v22, s54, 15
+; GFX10_1-NEXT: v_writelane_b32 v22, s55, 16
; GFX10_1-NEXT: ;;#ASMSTART
; GFX10_1-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
; GFX10_1-NEXT: ;;#ASMEND
; GFX10_1-NEXT: ;;#ASMSTART
; GFX10_1-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc
; GFX10_1-NEXT: ;;#ASMEND
+; GFX10_1-NEXT: v_readlane_b32 s55, v22, 16
+; GFX10_1-NEXT: v_readlane_b32 s54, v22, 15
; GFX10_1-NEXT: v_readlane_b32 s53, v22, 14
; GFX10_1-NEXT: v_readlane_b32 s52, v22, 13
; GFX10_1-NEXT: v_readlane_b32 s51, v22, 12
; GFX10_1-NEXT: v_readlane_b32 s50, v22, 11
; GFX10_1-NEXT: v_readlane_b32 s49, v22, 10
; GFX10_1-NEXT: v_readlane_b32 s48, v22, 9
-; GFX10_1-NEXT: v_readlane_b32 s47, v22, 8
-; GFX10_1-NEXT: v_readlane_b32 s46, v22, 7
+; GFX10_1-NEXT: v_readlane_b32 s39, v22, 8
+; GFX10_1-NEXT: v_readlane_b32 s38, v22, 7
; GFX10_1-NEXT: v_readlane_b32 s37, v22, 6
; GFX10_1-NEXT: v_readlane_b32 s36, v22, 5
; GFX10_1-NEXT: v_readlane_b32 s35, v22, 4
@@ -1370,28 +1454,32 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX10_3-NEXT: v_writelane_b32 v22, s35, 4
; GFX10_3-NEXT: v_writelane_b32 v22, s36, 5
; GFX10_3-NEXT: v_writelane_b32 v22, s37, 6
-; GFX10_3-NEXT: v_writelane_b32 v22, s46, 7
-; GFX10_3-NEXT: v_writelane_b32 v22, s47, 8
+; GFX10_3-NEXT: v_writelane_b32 v22, s38, 7
+; GFX10_3-NEXT: v_writelane_b32 v22, s39, 8
; GFX10_3-NEXT: v_writelane_b32 v22, s48, 9
; GFX10_3-NEXT: v_writelane_b32 v22, s49, 10
; GFX10_3-NEXT: v_writelane_b32 v22, s50, 11
; GFX10_3-NEXT: v_writelane_b32 v22, s51, 12
; GFX10_3-NEXT: v_writelane_b32 v22, s52, 13
; GFX10_3-NEXT: v_writelane_b32 v22, s53, 14
+; GFX10_3-NEXT: v_writelane_b32 v22, s54, 15
+; GFX10_3-NEXT: v_writelane_b32 v22, s55, 16
; GFX10_3-NEXT: ;;#ASMSTART
; GFX10_3-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
; GFX10_3-NEXT: ;;#ASMEND
; GFX10_3-NEXT: ;;#ASMSTART
; GFX10_3-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc
; GFX10_3-NEXT: ;;#ASMEND
+; GFX10_3-NEXT: v_readlane_b32 s55, v22, 16
+; GFX10_3-NEXT: v_readlane_b32 s54, v22, 15
; GFX10_3-NEXT: v_readlane_b32 s53, v22, 14
; GFX10_3-NEXT: v_readlane_b32 s52, v22, 13
; GFX10_3-NEXT: v_readlane_b32 s51, v22, 12
; GFX10_3-NEXT: v_readlane_b32 s50, v22, 11
; GFX10_3-NEXT: v_readlane_b32 s49, v22, 10
; GFX10_3-NEXT: v_readlane_b32 s48, v22, 9
-; GFX10_3-NEXT: v_readlane_b32 s47, v22, 8
-; GFX10_3-NEXT: v_readlane_b32 s46, v22, 7
+; GFX10_3-NEXT: v_readlane_b32 s39, v22, 8
+; GFX10_3-NEXT: v_readlane_b32 s38, v22, 7
; GFX10_3-NEXT: v_readlane_b32 s37, v22, 6
; GFX10_3-NEXT: v_readlane_b32 s36, v22, 5
; GFX10_3-NEXT: v_readlane_b32 s35, v22, 4
@@ -1427,14 +1515,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX11-NEXT: v_writelane_b32 v22, s35, 4
; GFX11-NEXT: v_writelane_b32 v22, s36, 5
; GFX11-NEXT: v_writelane_b32 v22, s37, 6
-; GFX11-NEXT: v_writelane_b32 v22, s46, 7
-; GFX11-NEXT: v_writelane_b32 v22, s47, 8
+; GFX11-NEXT: v_writelane_b32 v22, s38, 7
+; GFX11-NEXT: v_writelane_b32 v22, s39, 8
; GFX11-NEXT: v_writelane_b32 v22, s48, 9
; GFX11-NEXT: v_writelane_b32 v22, s49, 10
; GFX11-NEXT: v_writelane_b32 v22, s50, 11
; GFX11-NEXT: v_writelane_b32 v22, s51, 12
; GFX11-NEXT: v_writelane_b32 v22, s52, 13
; GFX11-NEXT: v_writelane_b32 v22, s53, 14
+; GFX11-NEXT: v_writelane_b32 v22, s54, 15
+; GFX11-NEXT: v_writelane_b32 v22, s55, 16
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
; GFX11-NEXT: ;;#ASMEND
@@ -1442,14 +1532,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX11-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_readlane_b32 s55, v22, 16
+; GFX11-NEXT: v_readlane_b32 s54, v22, 15
; GFX11-NEXT: v_readlane_b32 s53, v22, 14
; GFX11-NEXT: v_readlane_b32 s52, v22, 13
; GFX11-NEXT: v_readlane_b32 s51, v22, 12
; GFX11-NEXT: v_readlane_b32 s50, v22, 11
; GFX11-NEXT: v_readlane_b32 s49, v22, 10
; GFX11-NEXT: v_readlane_b32 s48, v22, 9
-; GFX11-NEXT: v_readlane_b32 s47, v22, 8
-; GFX11-NEXT: v_readlane_b32 s46, v22, 7
+; GFX11-NEXT: v_readlane_b32 s39, v22, 8
+; GFX11-NEXT: v_readlane_b32 s38, v22, 7
; GFX11-NEXT: v_readlane_b32 s37, v22, 6
; GFX11-NEXT: v_readlane_b32 s36, v22, 5
; GFX11-NEXT: v_readlane_b32 s35, v22, 4
@@ -1488,14 +1580,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX12-NEXT: v_writelane_b32 v22, s35, 4
; GFX12-NEXT: v_writelane_b32 v22, s36, 5
; GFX12-NEXT: v_writelane_b32 v22, s37, 6
-; GFX12-NEXT: v_writelane_b32 v22, s46, 7
-; GFX12-NEXT: v_writelane_b32 v22, s47, 8
+; GFX12-NEXT: v_writelane_b32 v22, s38, 7
+; GFX12-NEXT: v_writelane_b32 v22, s39, 8
; GFX12-NEXT: v_writelane_b32 v22, s48, 9
; GFX12-NEXT: v_writelane_b32 v22, s49, 10
; GFX12-NEXT: v_writelane_b32 v22, s50, 11
; GFX12-NEXT: v_writelane_b32 v22, s51, 12
; GFX12-NEXT: v_writelane_b32 v22, s52, 13
; GFX12-NEXT: v_writelane_b32 v22, s53, 14
+; GFX12-NEXT: v_writelane_b32 v22, s54, 15
+; GFX12-NEXT: v_writelane_b32 v22, s55, 16
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
; GFX12-NEXT: ;;#ASMEND
@@ -1503,14 +1597,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX12-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT: v_readlane_b32 s55, v22, 16
+; GFX12-NEXT: v_readlane_b32 s54, v22, 15
; GFX12-NEXT: v_readlane_b32 s53, v22, 14
; GFX12-NEXT: v_readlane_b32 s52, v22, 13
; GFX12-NEXT: v_readlane_b32 s51, v22, 12
; GFX12-NEXT: v_readlane_b32 s50, v22, 11
; GFX12-NEXT: v_readlane_b32 s49, v22, 10
; GFX12-NEXT: v_readlane_b32 s48, v22, 9
-; GFX12-NEXT: v_readlane_b32 s47, v22, 8
-; GFX12-NEXT: v_readlane_b32 s46, v22, 7
+; GFX12-NEXT: v_readlane_b32 s39, v22, 8
+; GFX12-NEXT: v_readlane_b32 s38, v22, 7
; GFX12-NEXT: v_readlane_b32 s37, v22, 6
; GFX12-NEXT: v_readlane_b32 s36, v22, 5
; GFX12-NEXT: v_readlane_b32 s35, v22, 4
diff --git a/llvm/test/CodeGen/AMDGPU/mcexpr-knownbits-assign-crash-gh-issue-110930.ll b/llvm/test/CodeGen/AMDGPU/mcexpr-knownbits-assign-crash-gh-issue-110930.ll
index 790b934c2b1bf..52f380b7f80a3 100644
--- a/llvm/test/CodeGen/AMDGPU/mcexpr-knownbits-assign-crash-gh-issue-110930.ll
+++ b/llvm/test/CodeGen/AMDGPU/mcexpr-knownbits-assign-crash-gh-issue-110930.ll
@@ -8,7 +8,7 @@
; CHECK-LABEL: I_Quit:
; CHECK: .set I_Quit.num_vgpr, max(41, amdgpu.max_num_vgpr)
; CHECK: .set I_Quit.num_agpr, max(0, amdgpu.max_num_agpr)
-; CHECK: .set I_Quit.numbered_sgpr, max(64, amdgpu.max_num_sgpr)
+; CHECK: .set I_Quit.numbered_sgpr, max(56, amdgpu.max_num_sgpr)
; CHECK: .set I_Quit.private_seg_size, 16
; CHECK: .set I_Quit.uses_vcc, 1
; CHECK: .set I_Quit.uses_flat_scratch, 1
@@ -80,7 +80,7 @@ define void @P_SetThingPosition() {
; CHECK-LABEL: P_SetupPsprites:
; CHECK: .set P_SetupPsprites.num_vgpr, max(41, amdgpu.max_num_vgpr)
; CHECK: .set P_SetupPsprites.num_agpr, max(0, amdgpu.max_num_agpr)
-; CHECK: .set P_SetupPsprites.numbered_sgpr, max(64, amdgpu.max_num_sgpr)
+; CHECK: .set P_SetupPsprites.numbered_sgpr, max(56, amdgpu.max_num_sgpr)
; CHECK: .set P_SetupPsprites.private_seg_size, 16
; CHECK: .set P_SetupPsprites.uses_vcc, 1
; CHECK: .set P_SetupPsprites.uses_flat_scratch, 1
@@ -128,7 +128,7 @@ define void @P_SpawnPlayer() {
; CHECK-LABEL: I_Error:
; CHECK: .set I_Error.num_vgpr, max(41, amdgpu.max_num_vgpr)
; CHECK: .set I_Error.num_agpr, max(0, amdgpu.max_num_agpr)
-; CHECK: .set I_Error.numbered_sgpr, max(64, amdgpu.max_num_sgpr)
+; CHECK: .set I_Error.numbered_sgpr, max(56, amdgpu.max_num_sgpr)
; CHECK: .set I_Error.private_seg_size, 16
; CHECK: .set I_Error.uses_vcc, 1
; CHECK: .set I_Error.uses_flat_scratch, 1
@@ -264,7 +264,7 @@ define ptr @P_SaveGameFile() {
; CHECK-LABEL: R_FlatNumForName:
; CHECK: .set R_FlatNumForName.num_vgpr, max(42, I_Error.num_vgpr)
; CHECK: .set R_FlatNumForName.num_agpr, max(0, I_Error.num_agpr)
-; CHECK: .set R_FlatNumForName.numbered_sgpr, max(64, I_Error.numbered_sgpr)
+; CHECK: .set R_FlatNumForName.numbered_sgpr, max(56, I_Error.numbered_sgpr)
; CHECK: .set R_FlatNumForName.private_seg_size, 16+(max(I_Error.private_seg_size))
; CHECK: .set R_FlatNumForName.uses_vcc, or(1, I_Error.uses_vcc)
; CHECK: .set R_FlatNumForName.uses_flat_scratch, or(0, I_Error.uses_flat_scratch)
@@ -279,7 +279,7 @@ define i32 @R_FlatNumForName() {
; CHECK-LABEL: R_TextureNumForName:
; CHECK: .set R_TextureNumForName.num_vgpr, max(42, R_FlatNumForName.num_vgpr)
; CHECK: .set R_TextureNumForName.num_agpr, max(0, R_FlatNumForName.num_agpr)
-; CHECK: .set R_TextureNumForName.numbered_sgpr, max(64, R_FlatNumForName.numbered_sgpr)
+; CHECK: .set R_TextureNumForName.numbered_sgpr, max(56, R_FlatNumForName.numbered_sgpr)
; CHECK: .set R_TextureNumForName.private_seg_size, 16+(max(R_FlatNumForName.private_seg_size))
; CHECK: .set R_TextureNumForName.uses_vcc, or(1, R_FlatNumForName.uses_vcc)
; CHECK: .set R_TextureNumForName.uses_flat_scratch, or(0, R_FlatNumForName.uses_flat_scratch)
diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir
index 8e957c1c31013..05cbd4c2a010d 100644
--- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir
@@ -27,25 +27,25 @@ body: |
liveins: $vgpr1
; CHECK-LABEL: name: scavenge_sgpr_pei_no_sgprs
- ; CHECK: liveins: $sgpr38, $sgpr39, $vgpr1
+ ; CHECK: liveins: $sgpr40, $sgpr41, $vgpr1
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $sgpr38 = frame-setup COPY $sgpr33
+ ; CHECK-NEXT: $sgpr40 = frame-setup COPY $sgpr33
; CHECK-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
; CHECK-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
- ; CHECK-NEXT: $sgpr39 = frame-setup COPY $sgpr34
+ ; CHECK-NEXT: $sgpr41 = frame-setup COPY $sgpr34
; CHECK-NEXT: $sgpr34 = frame-setup COPY $sgpr32
; CHECK-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
; CHECK-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
- ; CHECK-NEXT: $sgpr40 = S_MOV_B32 8192
- ; CHECK-NEXT: $vgpr0, dead $sgpr40_sgpr41 = V_ADD_CO_U32_e64 killed $sgpr40, killed $vgpr0, 0, implicit $exec
+ ; CHECK-NEXT: $sgpr42 = S_MOV_B32 8192
+ ; CHECK-NEXT: $vgpr0, dead $sgpr42_sgpr43 = V_ADD_CO_U32_e64 killed $sgpr42, killed $vgpr0, 0, implicit $exec
; CHECK-NEXT: $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
- ; CHECK-NEXT: $sgpr40 = S_MOV_B32 16384
- ; CHECK-NEXT: $vgpr2, dead $sgpr40_sgpr41 = V_ADD_CO_U32_e64 killed $sgpr40, killed $vgpr2, 0, implicit $exec
+ ; CHECK-NEXT: $sgpr42 = S_MOV_B32 16384
+ ; CHECK-NEXT: $vgpr2, dead $sgpr42_sgpr43 = V_ADD_CO_U32_e64 killed $sgpr42, killed $vgpr2, 0, implicit $exec
; CHECK-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
; CHECK-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
- ; CHECK-NEXT: $sgpr34 = frame-destroy COPY $sgpr39
- ; CHECK-NEXT: $sgpr33 = frame-destroy COPY $sgpr38
+ ; CHECK-NEXT: $sgpr34 = frame-destroy COPY $sgpr41
+ ; CHECK-NEXT: $sgpr33 = frame-destroy COPY $sgpr40
; CHECK-NEXT: S_ENDPGM 0, implicit $vcc
S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
$vgpr0 = V_MOV_B32_e32 %stack.0, implicit $exec
@@ -75,24 +75,24 @@ body: |
liveins: $vgpr1
; CHECK-LABEL: name: scavenge_sgpr_pei_one_sgpr
- ; CHECK: liveins: $sgpr29, $sgpr38, $vgpr1
+ ; CHECK: liveins: $sgpr29, $sgpr40, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $sgpr29 = frame-setup COPY $sgpr33
; CHECK-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
; CHECK-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
- ; CHECK-NEXT: $sgpr38 = frame-setup COPY $sgpr34
+ ; CHECK-NEXT: $sgpr40 = frame-setup COPY $sgpr34
; CHECK-NEXT: $sgpr34 = frame-setup COPY $sgpr32
; CHECK-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
; CHECK-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
- ; CHECK-NEXT: $sgpr40 = S_MOV_B32 8192
- ; CHECK-NEXT: $vgpr0, dead $sgpr40_sgpr41 = V_ADD_CO_U32_e64 killed $sgpr40, killed $vgpr0, 0, implicit $exec
+ ; CHECK-NEXT: $sgpr42 = S_MOV_B32 8192
+ ; CHECK-NEXT: $vgpr0, dead $sgpr42_sgpr43 = V_ADD_CO_U32_e64 killed $sgpr42, killed $vgpr0, 0, implicit $exec
; CHECK-NEXT: $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
- ; CHECK-NEXT: $sgpr40 = S_MOV_B32 16384
- ; CHECK-NEXT: $vgpr2, dead $sgpr40_sgpr41 = V_ADD_CO_U32_e64 killed $sgpr40, killed $vgpr2, 0, implicit $exec
+ ; CHECK-NEXT: $sgpr42 = S_MOV_B32 16384
+ ; CHECK-NEXT: $vgpr2, dead $sgpr42_sgpr43 = V_ADD_CO_U32_e64 killed $sgpr42, killed $vgpr2, 0, implicit $exec
; CHECK-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr31
; CHECK-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
- ; CHECK-NEXT: $sgpr34 = frame-destroy COPY $sgpr38
+ ; CHECK-NEXT: $sgpr34 = frame-destroy COPY $sgpr40
; CHECK-NEXT: $sgpr33 = frame-destroy COPY $sgpr29
; CHECK-NEXT: S_ENDPGM 0, implicit $vcc
S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
@@ -133,11 +133,11 @@ body: |
; CHECK-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
; CHECK-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
- ; CHECK-NEXT: $sgpr38 = S_MOV_B32 8192
- ; CHECK-NEXT: $vgpr0, dead $sgpr38_sgpr39 = V_ADD_CO_U32_e64 killed $sgpr38, killed $vgpr0, 0, implicit $exec
+ ; CHECK-NEXT: $sgpr40 = S_MOV_B32 8192
+ ; CHECK-NEXT: $vgpr0, dead $sgpr40_sgpr41 = V_ADD_CO_U32_e64 killed $sgpr40, killed $vgpr0, 0, implicit $exec
; CHECK-NEXT: $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
- ; CHECK-NEXT: $sgpr38 = S_MOV_B32 16384
- ; CHECK-NEXT: $vgpr2, dead $sgpr38_sgpr39 = V_ADD_CO_U32_e64 killed $sgpr38, killed $vgpr2, 0, implicit $exec
+ ; CHECK-NEXT: $sgpr40 = S_MOV_B32 16384
+ ; CHECK-NEXT: $vgpr2, dead $sgpr40_sgpr41 = V_ADD_CO_U32_e64 killed $sgpr40, killed $vgpr2, 0, implicit $exec
; CHECK-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr31
; CHECK-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
; CHECK-NEXT: $sgpr34 = frame-destroy COPY $sgpr29
diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir
index 88556040486e2..4f1c9a20fddc3 100644
--- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir
@@ -23,12 +23,12 @@ body: |
liveins: $vgpr1
; MUBUF-LABEL: name: scavenge_sgpr_pei_no_sgprs
- ; MUBUF: liveins: $sgpr38, $sgpr39, $vgpr1
+ ; MUBUF: liveins: $sgpr40, $sgpr41, $vgpr1
; MUBUF-NEXT: {{ $}}
- ; MUBUF-NEXT: $sgpr38 = frame-setup COPY $sgpr33
+ ; MUBUF-NEXT: $sgpr40 = frame-setup COPY $sgpr33
; MUBUF-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
; MUBUF-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
- ; MUBUF-NEXT: $sgpr39 = frame-setup COPY $sgpr34
+ ; MUBUF-NEXT: $sgpr41 = frame-setup COPY $sgpr34
; MUBUF-NEXT: $sgpr34 = frame-setup COPY $sgpr32
; MUBUF-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
; MUBUF-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
@@ -38,27 +38,27 @@ body: |
; MUBUF-NEXT: $vgpr2 = V_ADD_U32_e32 16384, killed $vgpr2, implicit $exec
; MUBUF-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
; MUBUF-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
- ; MUBUF-NEXT: $sgpr34 = frame-destroy COPY $sgpr39
- ; MUBUF-NEXT: $sgpr33 = frame-destroy COPY $sgpr38
+ ; MUBUF-NEXT: $sgpr34 = frame-destroy COPY $sgpr41
+ ; MUBUF-NEXT: $sgpr33 = frame-destroy COPY $sgpr40
; MUBUF-NEXT: S_ENDPGM 0, implicit $vcc
;
; FLATSCR-LABEL: name: scavenge_sgpr_pei_no_sgprs
- ; FLATSCR: liveins: $sgpr38, $sgpr39, $vgpr1
+ ; FLATSCR: liveins: $sgpr40, $sgpr41, $vgpr1
; FLATSCR-NEXT: {{ $}}
- ; FLATSCR-NEXT: $sgpr38 = frame-setup COPY $sgpr33
+ ; FLATSCR-NEXT: $sgpr40 = frame-setup COPY $sgpr33
; FLATSCR-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
; FLATSCR-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
- ; FLATSCR-NEXT: $sgpr39 = frame-setup COPY $sgpr34
+ ; FLATSCR-NEXT: $sgpr41 = frame-setup COPY $sgpr34
; FLATSCR-NEXT: $sgpr34 = frame-setup COPY $sgpr32
; FLATSCR-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
; FLATSCR-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
- ; FLATSCR-NEXT: $sgpr40 = S_ADD_I32 $sgpr33, 8192, implicit-def $scc
- ; FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr40, implicit $exec
- ; FLATSCR-NEXT: $sgpr40 = S_ADD_I32 $sgpr33, 16384, implicit-def $scc
- ; FLATSCR-NEXT: $vgpr0 = V_OR_B32_e32 killed $sgpr40, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
+ ; FLATSCR-NEXT: $sgpr42 = S_ADD_I32 $sgpr33, 8192, implicit-def $scc
+ ; FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr42, implicit $exec
+ ; FLATSCR-NEXT: $sgpr42 = S_ADD_I32 $sgpr33, 16384, implicit-def $scc
+ ; FLATSCR-NEXT: $vgpr0 = V_OR_B32_e32 killed $sgpr42, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
; FLATSCR-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
- ; FLATSCR-NEXT: $sgpr34 = frame-destroy COPY $sgpr39
- ; FLATSCR-NEXT: $sgpr33 = frame-destroy COPY $sgpr38
+ ; FLATSCR-NEXT: $sgpr34 = frame-destroy COPY $sgpr41
+ ; FLATSCR-NEXT: $sgpr33 = frame-destroy COPY $sgpr40
; FLATSCR-NEXT: S_ENDPGM 0, implicit $vcc
S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
$vgpr0 = V_MOV_B32_e32 %stack.0, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir
index 1242e23db6c6a..480859a09a347 100644
--- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir
@@ -22,22 +22,22 @@ body: |
liveins: $vgpr1
; CHECK-LABEL: name: scavenge_sgpr_pei
- ; CHECK: liveins: $sgpr38, $sgpr39, $vgpr1
+ ; CHECK: liveins: $sgpr40, $sgpr41, $vgpr1
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $sgpr38 = frame-setup COPY $sgpr33
+ ; CHECK-NEXT: $sgpr40 = frame-setup COPY $sgpr33
; CHECK-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 262080, implicit-def $scc
; CHECK-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294705152, implicit-def dead $scc
- ; CHECK-NEXT: $sgpr39 = frame-setup COPY $sgpr34
+ ; CHECK-NEXT: $sgpr41 = frame-setup COPY $sgpr34
; CHECK-NEXT: $sgpr34 = frame-setup COPY $sgpr32
; CHECK-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 786432, implicit-def dead $scc
; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
; CHECK-NEXT: $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
- ; CHECK-NEXT: $sgpr40 = S_MOV_B32 4096
- ; CHECK-NEXT: $vgpr2, dead $sgpr40_sgpr41 = V_ADD_CO_U32_e64 killed $sgpr40, killed $vgpr2, 0, implicit $exec
+ ; CHECK-NEXT: $sgpr42 = S_MOV_B32 4096
+ ; CHECK-NEXT: $vgpr2, dead $sgpr42_sgpr43 = V_ADD_CO_U32_e64 killed $sgpr42, killed $vgpr2, 0, implicit $exec
; CHECK-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
; CHECK-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
- ; CHECK-NEXT: $sgpr34 = frame-destroy COPY $sgpr39
- ; CHECK-NEXT: $sgpr33 = frame-destroy COPY $sgpr38
+ ; CHECK-NEXT: $sgpr34 = frame-destroy COPY $sgpr41
+ ; CHECK-NEXT: $sgpr33 = frame-destroy COPY $sgpr40
; CHECK-NEXT: S_ENDPGM 0, implicit $vcc
S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
$vgpr0 = V_OR_B32_e32 %stack.0, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
diff --git a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll
index d5f3be9a515a2..0eb186f5e3d87 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll
@@ -10,20 +10,20 @@ declare i64 @_Z13get_global_idj(i32) #0
define amdgpu_kernel void @clmem_read_simplified(ptr addrspace(1) %buffer) {
; GFX8-LABEL: clmem_read_simplified:
; GFX8: ; %bb.0: ; %entry
-; GFX8-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX8-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX8-NEXT: s_mov_b32 s50, -1
-; GFX8-NEXT: s_mov_b32 s51, 0xe80000
-; GFX8-NEXT: s_add_u32 s48, s48, s11
-; GFX8-NEXT: s_addc_u32 s49, s49, 0
+; GFX8-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX8-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX8-NEXT: s_mov_b32 s38, -1
+; GFX8-NEXT: s_mov_b32 s39, 0xe80000
+; GFX8-NEXT: s_add_u32 s36, s36, s11
+; GFX8-NEXT: s_addc_u32 s37, s37, 0
; GFX8-NEXT: s_getpc_b64 s[0:1]
; GFX8-NEXT: s_add_u32 s0, s0, _Z13get_global_idj at gotpcrel32@lo+4
; GFX8-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj at gotpcrel32@hi+12
; GFX8-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24
; GFX8-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
-; GFX8-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX8-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX8-NEXT: v_mov_b32_e32 v31, v0
-; GFX8-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX8-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX8-NEXT: v_mov_b32_e32 v0, 0
; GFX8-NEXT: s_mov_b32 s32, 0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
@@ -91,20 +91,20 @@ define amdgpu_kernel void @clmem_read_simplified(ptr addrspace(1) %buffer) {
;
; GFX9-LABEL: clmem_read_simplified:
; GFX9: ; %bb.0: ; %entry
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s11
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s11
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_getpc_b64 s[0:1]
; GFX9-NEXT: s_add_u32 s0, s0, _Z13get_global_idj at gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj at gotpcrel32@hi+12
; GFX9-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24
; GFX9-NEXT: v_mov_b32_e32 v31, v0
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
@@ -161,12 +161,12 @@ define amdgpu_kernel void @clmem_read_simplified(ptr addrspace(1) %buffer) {
;
; GFX10-LABEL: clmem_read_simplified:
; GFX10: ; %bb.0: ; %entry
-; GFX10-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX10-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX10-NEXT: s_mov_b32 s50, -1
-; GFX10-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX10-NEXT: s_add_u32 s48, s48, s11
-; GFX10-NEXT: s_addc_u32 s49, s49, 0
+; GFX10-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX10-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX10-NEXT: s_mov_b32 s38, -1
+; GFX10-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX10-NEXT: s_add_u32 s36, s36, s11
+; GFX10-NEXT: s_addc_u32 s37, s37, 0
; GFX10-NEXT: s_getpc_b64 s[0:1]
; GFX10-NEXT: s_add_u32 s0, s0, _Z13get_global_idj at gotpcrel32@lo+4
; GFX10-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj at gotpcrel32@hi+12
@@ -174,8 +174,8 @@ define amdgpu_kernel void @clmem_read_simplified(ptr addrspace(1) %buffer) {
; GFX10-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
; GFX10-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24
; GFX10-NEXT: v_mov_b32_e32 v0, 0
-; GFX10-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX10-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX10-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX10-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX10-NEXT: s_mov_b32 s32, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
@@ -342,20 +342,20 @@ entry:
define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1) %buffer) {
; GFX8-LABEL: clmem_read:
; GFX8: ; %bb.0: ; %entry
-; GFX8-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX8-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX8-NEXT: s_mov_b32 s50, -1
-; GFX8-NEXT: s_mov_b32 s51, 0xe80000
-; GFX8-NEXT: s_add_u32 s48, s48, s11
-; GFX8-NEXT: s_addc_u32 s49, s49, 0
+; GFX8-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX8-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX8-NEXT: s_mov_b32 s38, -1
+; GFX8-NEXT: s_mov_b32 s39, 0xe80000
+; GFX8-NEXT: s_add_u32 s36, s36, s11
+; GFX8-NEXT: s_addc_u32 s37, s37, 0
; GFX8-NEXT: s_getpc_b64 s[0:1]
; GFX8-NEXT: s_add_u32 s0, s0, _Z13get_global_idj at gotpcrel32@lo+4
; GFX8-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj at gotpcrel32@hi+12
; GFX8-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24
; GFX8-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
-; GFX8-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX8-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX8-NEXT: v_mov_b32_e32 v31, v0
-; GFX8-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX8-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX8-NEXT: v_mov_b32_e32 v0, 0
; GFX8-NEXT: s_mov_b32 s32, 0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
@@ -469,20 +469,20 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1) %buffer) {
;
; GFX900-LABEL: clmem_read:
; GFX900: ; %bb.0: ; %entry
-; GFX900-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX900-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX900-NEXT: s_mov_b32 s50, -1
-; GFX900-NEXT: s_mov_b32 s51, 0xe00000
-; GFX900-NEXT: s_add_u32 s48, s48, s11
-; GFX900-NEXT: s_addc_u32 s49, s49, 0
+; GFX900-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX900-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX900-NEXT: s_mov_b32 s38, -1
+; GFX900-NEXT: s_mov_b32 s39, 0xe00000
+; GFX900-NEXT: s_add_u32 s36, s36, s11
+; GFX900-NEXT: s_addc_u32 s37, s37, 0
; GFX900-NEXT: s_getpc_b64 s[0:1]
; GFX900-NEXT: s_add_u32 s0, s0, _Z13get_global_idj at gotpcrel32@lo+4
; GFX900-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj at gotpcrel32@hi+12
; GFX900-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24
; GFX900-NEXT: v_mov_b32_e32 v31, v0
; GFX900-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
-; GFX900-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX900-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX900-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX900-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: s_mov_b32 s32, 0
; GFX900-NEXT: s_waitcnt lgkmcnt(0)
@@ -586,12 +586,12 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1) %buffer) {
;
; GFX10-LABEL: clmem_read:
; GFX10: ; %bb.0: ; %entry
-; GFX10-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX10-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX10-NEXT: s_mov_b32 s50, -1
-; GFX10-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX10-NEXT: s_add_u32 s48, s48, s11
-; GFX10-NEXT: s_addc_u32 s49, s49, 0
+; GFX10-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX10-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX10-NEXT: s_mov_b32 s38, -1
+; GFX10-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX10-NEXT: s_add_u32 s36, s36, s11
+; GFX10-NEXT: s_addc_u32 s37, s37, 0
; GFX10-NEXT: s_getpc_b64 s[0:1]
; GFX10-NEXT: s_add_u32 s0, s0, _Z13get_global_idj at gotpcrel32@lo+4
; GFX10-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj at gotpcrel32@hi+12
@@ -599,8 +599,8 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1) %buffer) {
; GFX10-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
; GFX10-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24
; GFX10-NEXT: v_mov_b32_e32 v0, 0
-; GFX10-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX10-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX10-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX10-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX10-NEXT: s_mov_b32 s32, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
@@ -698,20 +698,20 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1) %buffer) {
;
; GFX90A-LABEL: clmem_read:
; GFX90A: ; %bb.0: ; %entry
-; GFX90A-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX90A-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX90A-NEXT: s_mov_b32 s50, -1
-; GFX90A-NEXT: s_mov_b32 s51, 0xe00000
-; GFX90A-NEXT: s_add_u32 s48, s48, s11
-; GFX90A-NEXT: s_addc_u32 s49, s49, 0
+; GFX90A-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX90A-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX90A-NEXT: s_mov_b32 s38, -1
+; GFX90A-NEXT: s_mov_b32 s39, 0xe00000
+; GFX90A-NEXT: s_add_u32 s36, s36, s11
+; GFX90A-NEXT: s_addc_u32 s37, s37, 0
; GFX90A-NEXT: s_getpc_b64 s[0:1]
; GFX90A-NEXT: s_add_u32 s0, s0, _Z13get_global_idj at gotpcrel32@lo+4
; GFX90A-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj at gotpcrel32@hi+12
; GFX90A-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24
; GFX90A-NEXT: v_mov_b32_e32 v31, v0
; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
-; GFX90A-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX90A-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX90A-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX90A-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX90A-NEXT: v_mov_b32_e32 v0, 0
; GFX90A-NEXT: s_mov_b32 s32, 0
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
@@ -1030,20 +1030,20 @@ while.end: ; preds = %while.cond.loopexit
define amdgpu_kernel void @Address32(ptr addrspace(1) %buffer) {
; GFX8-LABEL: Address32:
; GFX8: ; %bb.0: ; %entry
-; GFX8-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX8-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX8-NEXT: s_mov_b32 s50, -1
-; GFX8-NEXT: s_mov_b32 s51, 0xe80000
-; GFX8-NEXT: s_add_u32 s48, s48, s11
-; GFX8-NEXT: s_addc_u32 s49, s49, 0
+; GFX8-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX8-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX8-NEXT: s_mov_b32 s38, -1
+; GFX8-NEXT: s_mov_b32 s39, 0xe80000
+; GFX8-NEXT: s_add_u32 s36, s36, s11
+; GFX8-NEXT: s_addc_u32 s37, s37, 0
; GFX8-NEXT: s_getpc_b64 s[0:1]
; GFX8-NEXT: s_add_u32 s0, s0, _Z13get_global_idj at gotpcrel32@lo+4
; GFX8-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj at gotpcrel32@hi+12
; GFX8-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24
; GFX8-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
-; GFX8-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX8-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX8-NEXT: v_mov_b32_e32 v31, v0
-; GFX8-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX8-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX8-NEXT: v_mov_b32_e32 v0, 0
; GFX8-NEXT: s_mov_b32 s32, 0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
@@ -1116,20 +1116,20 @@ define amdgpu_kernel void @Address32(ptr addrspace(1) %buffer) {
;
; GFX9-LABEL: Address32:
; GFX9: ; %bb.0: ; %entry
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s11
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s11
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_getpc_b64 s[0:1]
; GFX9-NEXT: s_add_u32 s0, s0, _Z13get_global_idj at gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj at gotpcrel32@hi+12
; GFX9-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24
; GFX9-NEXT: v_mov_b32_e32 v31, v0
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
@@ -1173,12 +1173,12 @@ define amdgpu_kernel void @Address32(ptr addrspace(1) %buffer) {
;
; GFX10-LABEL: Address32:
; GFX10: ; %bb.0: ; %entry
-; GFX10-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX10-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX10-NEXT: s_mov_b32 s50, -1
-; GFX10-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX10-NEXT: s_add_u32 s48, s48, s11
-; GFX10-NEXT: s_addc_u32 s49, s49, 0
+; GFX10-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX10-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX10-NEXT: s_mov_b32 s38, -1
+; GFX10-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX10-NEXT: s_add_u32 s36, s36, s11
+; GFX10-NEXT: s_addc_u32 s37, s37, 0
; GFX10-NEXT: s_getpc_b64 s[0:1]
; GFX10-NEXT: s_add_u32 s0, s0, _Z13get_global_idj at gotpcrel32@lo+4
; GFX10-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj at gotpcrel32@hi+12
@@ -1186,8 +1186,8 @@ define amdgpu_kernel void @Address32(ptr addrspace(1) %buffer) {
; GFX10-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
; GFX10-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24
; GFX10-NEXT: v_mov_b32_e32 v0, 0
-; GFX10-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX10-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX10-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX10-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX10-NEXT: s_mov_b32 s32, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
@@ -1345,20 +1345,20 @@ entry:
define amdgpu_kernel void @Offset64(ptr addrspace(1) %buffer) {
; GFX8-LABEL: Offset64:
; GFX8: ; %bb.0: ; %entry
-; GFX8-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX8-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX8-NEXT: s_mov_b32 s50, -1
-; GFX8-NEXT: s_mov_b32 s51, 0xe80000
-; GFX8-NEXT: s_add_u32 s48, s48, s11
-; GFX8-NEXT: s_addc_u32 s49, s49, 0
+; GFX8-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX8-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX8-NEXT: s_mov_b32 s38, -1
+; GFX8-NEXT: s_mov_b32 s39, 0xe80000
+; GFX8-NEXT: s_add_u32 s36, s36, s11
+; GFX8-NEXT: s_addc_u32 s37, s37, 0
; GFX8-NEXT: s_getpc_b64 s[0:1]
; GFX8-NEXT: s_add_u32 s0, s0, _Z13get_global_idj at gotpcrel32@lo+4
; GFX8-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj at gotpcrel32@hi+12
; GFX8-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24
; GFX8-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
-; GFX8-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX8-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX8-NEXT: v_mov_b32_e32 v31, v0
-; GFX8-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX8-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX8-NEXT: v_mov_b32_e32 v0, 0
; GFX8-NEXT: s_mov_b32 s32, 0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
@@ -1397,20 +1397,20 @@ define amdgpu_kernel void @Offset64(ptr addrspace(1) %buffer) {
;
; GFX9-LABEL: Offset64:
; GFX9: ; %bb.0: ; %entry
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s11
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s11
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_getpc_b64 s[0:1]
; GFX9-NEXT: s_add_u32 s0, s0, _Z13get_global_idj at gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj at gotpcrel32@hi+12
; GFX9-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24
; GFX9-NEXT: v_mov_b32_e32 v31, v0
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
@@ -1446,12 +1446,12 @@ define amdgpu_kernel void @Offset64(ptr addrspace(1) %buffer) {
;
; GFX10-LABEL: Offset64:
; GFX10: ; %bb.0: ; %entry
-; GFX10-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX10-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX10-NEXT: s_mov_b32 s50, -1
-; GFX10-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX10-NEXT: s_add_u32 s48, s48, s11
-; GFX10-NEXT: s_addc_u32 s49, s49, 0
+; GFX10-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX10-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX10-NEXT: s_mov_b32 s38, -1
+; GFX10-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX10-NEXT: s_add_u32 s36, s36, s11
+; GFX10-NEXT: s_addc_u32 s37, s37, 0
; GFX10-NEXT: s_getpc_b64 s[0:1]
; GFX10-NEXT: s_add_u32 s0, s0, _Z13get_global_idj at gotpcrel32@lo+4
; GFX10-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj at gotpcrel32@hi+12
@@ -1459,8 +1459,8 @@ define amdgpu_kernel void @Offset64(ptr addrspace(1) %buffer) {
; GFX10-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
; GFX10-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24
; GFX10-NEXT: v_mov_b32_e32 v0, 0
-; GFX10-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX10-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX10-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX10-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX10-NEXT: s_mov_b32 s32, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
@@ -1569,20 +1569,20 @@ entry:
define amdgpu_kernel void @p32Offset64(ptr addrspace(1) %buffer) {
; GFX8-LABEL: p32Offset64:
; GFX8: ; %bb.0: ; %entry
-; GFX8-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX8-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX8-NEXT: s_mov_b32 s50, -1
-; GFX8-NEXT: s_mov_b32 s51, 0xe80000
-; GFX8-NEXT: s_add_u32 s48, s48, s11
-; GFX8-NEXT: s_addc_u32 s49, s49, 0
+; GFX8-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX8-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX8-NEXT: s_mov_b32 s38, -1
+; GFX8-NEXT: s_mov_b32 s39, 0xe80000
+; GFX8-NEXT: s_add_u32 s36, s36, s11
+; GFX8-NEXT: s_addc_u32 s37, s37, 0
; GFX8-NEXT: s_getpc_b64 s[0:1]
; GFX8-NEXT: s_add_u32 s0, s0, _Z13get_global_idj at gotpcrel32@lo+4
; GFX8-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj at gotpcrel32@hi+12
; GFX8-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24
; GFX8-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
-; GFX8-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX8-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX8-NEXT: v_mov_b32_e32 v31, v0
-; GFX8-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX8-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX8-NEXT: v_mov_b32_e32 v0, 0
; GFX8-NEXT: s_mov_b32 s32, 0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
@@ -1619,20 +1619,20 @@ define amdgpu_kernel void @p32Offset64(ptr addrspace(1) %buffer) {
;
; GFX9-LABEL: p32Offset64:
; GFX9: ; %bb.0: ; %entry
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s11
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s11
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_getpc_b64 s[0:1]
; GFX9-NEXT: s_add_u32 s0, s0, _Z13get_global_idj at gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj at gotpcrel32@hi+12
; GFX9-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24
; GFX9-NEXT: v_mov_b32_e32 v31, v0
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
@@ -1664,12 +1664,12 @@ define amdgpu_kernel void @p32Offset64(ptr addrspace(1) %buffer) {
;
; GFX10-LABEL: p32Offset64:
; GFX10: ; %bb.0: ; %entry
-; GFX10-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX10-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX10-NEXT: s_mov_b32 s50, -1
-; GFX10-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX10-NEXT: s_add_u32 s48, s48, s11
-; GFX10-NEXT: s_addc_u32 s49, s49, 0
+; GFX10-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX10-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX10-NEXT: s_mov_b32 s38, -1
+; GFX10-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX10-NEXT: s_add_u32 s36, s36, s11
+; GFX10-NEXT: s_addc_u32 s37, s37, 0
; GFX10-NEXT: s_getpc_b64 s[0:1]
; GFX10-NEXT: s_add_u32 s0, s0, _Z13get_global_idj at gotpcrel32@lo+4
; GFX10-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj at gotpcrel32@hi+12
@@ -1677,8 +1677,8 @@ define amdgpu_kernel void @p32Offset64(ptr addrspace(1) %buffer) {
; GFX10-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
; GFX10-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24
; GFX10-NEXT: v_mov_b32_e32 v0, 0
-; GFX10-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX10-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX10-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX10-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX10-NEXT: s_mov_b32 s32, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
@@ -1776,31 +1776,31 @@ entry:
define amdgpu_kernel void @DiffBase(ptr addrspace(1) %buffer1,
; GFX8-LABEL: DiffBase:
; GFX8: ; %bb.0: ; %entry
-; GFX8-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
-; GFX8-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
-; GFX8-NEXT: s_mov_b32 s66, -1
-; GFX8-NEXT: s_mov_b32 s67, 0xe80000
-; GFX8-NEXT: s_add_u32 s64, s64, s11
-; GFX8-NEXT: s_addc_u32 s65, s65, 0
+; GFX8-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX8-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX8-NEXT: s_mov_b32 s50, -1
+; GFX8-NEXT: s_mov_b32 s51, 0xe80000
+; GFX8-NEXT: s_add_u32 s48, s48, s11
+; GFX8-NEXT: s_addc_u32 s49, s49, 0
; GFX8-NEXT: s_getpc_b64 s[0:1]
; GFX8-NEXT: s_add_u32 s0, s0, _Z13get_global_idj at gotpcrel32@lo+4
; GFX8-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj at gotpcrel32@hi+12
-; GFX8-NEXT: s_load_dwordx4 s[48:51], s[4:5], 0x24
+; GFX8-NEXT: s_load_dwordx4 s[36:39], s[4:5], 0x24
; GFX8-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
-; GFX8-NEXT: s_mov_b64 s[0:1], s[64:65]
+; GFX8-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX8-NEXT: v_mov_b32_e32 v31, v0
-; GFX8-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX8-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX8-NEXT: v_mov_b32_e32 v0, 0
; GFX8-NEXT: s_mov_b32 s32, 0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX8-NEXT: v_lshlrev_b32_e32 v0, 7, v0
; GFX8-NEXT: v_and_b32_e32 v2, 0xffff8000, v0
-; GFX8-NEXT: v_mov_b32_e32 v1, s49
-; GFX8-NEXT: v_add_u32_e32 v0, vcc, s48, v2
+; GFX8-NEXT: v_mov_b32_e32 v1, s37
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, s36, v2
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; GFX8-NEXT: v_mov_b32_e32 v3, s51
-; GFX8-NEXT: v_add_u32_e32 v12, vcc, s50, v2
+; GFX8-NEXT: v_mov_b32_e32 v3, s39
+; GFX8-NEXT: v_add_u32_e32 v12, vcc, s38, v2
; GFX8-NEXT: v_addc_u32_e32 v13, vcc, 0, v3, vcc
; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0x1000, v0
; GFX8-NEXT: v_addc_u32_e32 v3, vcc, 0, v1, vcc
@@ -1839,31 +1839,31 @@ define amdgpu_kernel void @DiffBase(ptr addrspace(1) %buffer1,
;
; GFX9-LABEL: DiffBase:
; GFX9: ; %bb.0: ; %entry
-; GFX9-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s66, -1
-; GFX9-NEXT: s_mov_b32 s67, 0xe00000
-; GFX9-NEXT: s_add_u32 s64, s64, s11
-; GFX9-NEXT: s_addc_u32 s65, s65, 0
+; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s50, -1
+; GFX9-NEXT: s_mov_b32 s51, 0xe00000
+; GFX9-NEXT: s_add_u32 s48, s48, s11
+; GFX9-NEXT: s_addc_u32 s49, s49, 0
; GFX9-NEXT: s_getpc_b64 s[0:1]
; GFX9-NEXT: s_add_u32 s0, s0, _Z13get_global_idj at gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj at gotpcrel32@hi+12
-; GFX9-NEXT: s_load_dwordx4 s[48:51], s[4:5], 0x24
+; GFX9-NEXT: s_load_dwordx4 s[36:39], s[4:5], 0x24
; GFX9-NEXT: v_mov_b32_e32 v31, v0
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65]
-; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 7, v0
; GFX9-NEXT: v_and_b32_e32 v16, 0xffff8000, v0
-; GFX9-NEXT: v_mov_b32_e32 v0, s49
-; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, s48, v16
+; GFX9-NEXT: v_mov_b32_e32 v0, s37
+; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, s36, v16
; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v0, vcc
-; GFX9-NEXT: v_mov_b32_e32 v0, s51
-; GFX9-NEXT: v_add_co_u32_e32 v10, vcc, s50, v16
+; GFX9-NEXT: v_mov_b32_e32 v0, s39
+; GFX9-NEXT: v_add_co_u32_e32 v10, vcc, s38, v16
; GFX9-NEXT: v_addc_co_u32_e32 v11, vcc, 0, v0, vcc
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v2
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
@@ -1893,35 +1893,35 @@ define amdgpu_kernel void @DiffBase(ptr addrspace(1) %buffer1,
; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v15, v3, vcc
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
-; GFX9-NEXT: global_store_dwordx2 v16, v[0:1], s[48:49]
+; GFX9-NEXT: global_store_dwordx2 v16, v[0:1], s[36:37]
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: DiffBase:
; GFX10: ; %bb.0: ; %entry
-; GFX10-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0
-; GFX10-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1
-; GFX10-NEXT: s_mov_b32 s66, -1
-; GFX10-NEXT: s_mov_b32 s67, 0x31c16000
-; GFX10-NEXT: s_add_u32 s64, s64, s11
-; GFX10-NEXT: s_addc_u32 s65, s65, 0
+; GFX10-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
+; GFX10-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
+; GFX10-NEXT: s_mov_b32 s50, -1
+; GFX10-NEXT: s_mov_b32 s51, 0x31c16000
+; GFX10-NEXT: s_add_u32 s48, s48, s11
+; GFX10-NEXT: s_addc_u32 s49, s49, 0
; GFX10-NEXT: s_getpc_b64 s[0:1]
; GFX10-NEXT: s_add_u32 s0, s0, _Z13get_global_idj at gotpcrel32@lo+4
; GFX10-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj at gotpcrel32@hi+12
; GFX10-NEXT: v_mov_b32_e32 v31, v0
; GFX10-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
-; GFX10-NEXT: s_load_dwordx4 s[48:51], s[4:5], 0x24
+; GFX10-NEXT: s_load_dwordx4 s[36:39], s[4:5], 0x24
; GFX10-NEXT: v_mov_b32_e32 v0, 0
-; GFX10-NEXT: s_mov_b64 s[0:1], s[64:65]
-; GFX10-NEXT: s_mov_b64 s[2:3], s[66:67]
+; GFX10-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX10-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX10-NEXT: s_mov_b32 s32, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 7, v0
; GFX10-NEXT: v_and_b32_e32 v16, 0xffff8000, v0
-; GFX10-NEXT: v_add_co_u32 v8, s0, s48, v16
-; GFX10-NEXT: v_add_co_ci_u32_e64 v9, s0, s49, 0, s0
-; GFX10-NEXT: v_add_co_u32 v12, s0, s50, v16
-; GFX10-NEXT: v_add_co_ci_u32_e64 v13, s0, s51, 0, s0
+; GFX10-NEXT: v_add_co_u32 v8, s0, s36, v16
+; GFX10-NEXT: v_add_co_ci_u32_e64 v9, s0, s37, 0, s0
+; GFX10-NEXT: v_add_co_u32 v12, s0, s38, v16
+; GFX10-NEXT: v_add_co_ci_u32_e64 v13, s0, s39, 0, s0
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v8, 0x1800
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v9, vcc_lo
; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v12, 0x3000
@@ -1952,7 +1952,7 @@ define amdgpu_kernel void @DiffBase(ptr addrspace(1) %buffer1,
; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v15, v3, vcc_lo
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX10-NEXT: global_store_dwordx2 v16, v[0:1], s[48:49]
+; GFX10-NEXT: global_store_dwordx2 v16, v[0:1], s[36:37]
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: DiffBase:
@@ -1962,21 +1962,21 @@ define amdgpu_kernel void @DiffBase(ptr addrspace(1) %buffer1,
; GFX11-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj at gotpcrel32@hi+12
; GFX11-NEXT: v_dual_mov_b32 v31, v0 :: v_dual_mov_b32 v0, 0
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX11-NEXT: s_load_b128 s[48:51], s[4:5], 0x24
+; GFX11-NEXT: s_load_b128 s[36:39], s[4:5], 0x24
; GFX11-NEXT: s_mov_b32 s32, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 7, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_and_b32_e32 v12, 0xffff8000, v0
-; GFX11-NEXT: v_add_co_u32 v2, s0, s48, v12
+; GFX11-NEXT: v_add_co_u32 v2, s0, s36, v12
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_add_co_ci_u32_e64 v3, null, s49, 0, s0
-; GFX11-NEXT: v_add_co_u32 v8, s0, s50, v12
+; GFX11-NEXT: v_add_co_ci_u32_e64 v3, null, s37, 0, s0
+; GFX11-NEXT: v_add_co_u32 v8, s0, s38, v12
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v2
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_4)
; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v3, vcc_lo
-; GFX11-NEXT: v_add_co_ci_u32_e64 v9, null, s51, 0, s0
+; GFX11-NEXT: v_add_co_ci_u32_e64 v9, null, s39, 0, s0
; GFX11-NEXT: v_add_co_u32 v2, vcc_lo, v2, 0x2000
; GFX11-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo
; GFX11-NEXT: v_add_co_u32 v4, vcc_lo, 0x2000, v8
@@ -2005,7 +2005,7 @@ define amdgpu_kernel void @DiffBase(ptr addrspace(1) %buffer1,
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX11-NEXT: global_store_b64 v12, v[0:1], s[48:49]
+; GFX11-NEXT: global_store_b64 v12, v[0:1], s[36:37]
; GFX11-NEXT: s_endpgm
ptr addrspace(1) %buffer2) {
entry:
@@ -2046,20 +2046,20 @@ entry:
define amdgpu_kernel void @ReverseOrder(ptr addrspace(1) %buffer) {
; GFX8-LABEL: ReverseOrder:
; GFX8: ; %bb.0: ; %entry
-; GFX8-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX8-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX8-NEXT: s_mov_b32 s50, -1
-; GFX8-NEXT: s_mov_b32 s51, 0xe80000
-; GFX8-NEXT: s_add_u32 s48, s48, s11
-; GFX8-NEXT: s_addc_u32 s49, s49, 0
+; GFX8-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX8-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX8-NEXT: s_mov_b32 s38, -1
+; GFX8-NEXT: s_mov_b32 s39, 0xe80000
+; GFX8-NEXT: s_add_u32 s36, s36, s11
+; GFX8-NEXT: s_addc_u32 s37, s37, 0
; GFX8-NEXT: s_getpc_b64 s[0:1]
; GFX8-NEXT: s_add_u32 s0, s0, _Z13get_global_idj at gotpcrel32@lo+4
; GFX8-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj at gotpcrel32@hi+12
; GFX8-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24
; GFX8-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
-; GFX8-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX8-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX8-NEXT: v_mov_b32_e32 v31, v0
-; GFX8-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX8-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX8-NEXT: v_mov_b32_e32 v0, 0
; GFX8-NEXT: s_mov_b32 s32, 0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
@@ -2127,20 +2127,20 @@ define amdgpu_kernel void @ReverseOrder(ptr addrspace(1) %buffer) {
;
; GFX9-LABEL: ReverseOrder:
; GFX9: ; %bb.0: ; %entry
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s11
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s11
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_getpc_b64 s[0:1]
; GFX9-NEXT: s_add_u32 s0, s0, _Z13get_global_idj at gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj at gotpcrel32@hi+12
; GFX9-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24
; GFX9-NEXT: v_mov_b32_e32 v31, v0
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
@@ -2196,12 +2196,12 @@ define amdgpu_kernel void @ReverseOrder(ptr addrspace(1) %buffer) {
;
; GFX10-LABEL: ReverseOrder:
; GFX10: ; %bb.0: ; %entry
-; GFX10-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX10-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX10-NEXT: s_mov_b32 s50, -1
-; GFX10-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX10-NEXT: s_add_u32 s48, s48, s11
-; GFX10-NEXT: s_addc_u32 s49, s49, 0
+; GFX10-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX10-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX10-NEXT: s_mov_b32 s38, -1
+; GFX10-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX10-NEXT: s_add_u32 s36, s36, s11
+; GFX10-NEXT: s_addc_u32 s37, s37, 0
; GFX10-NEXT: s_getpc_b64 s[0:1]
; GFX10-NEXT: s_add_u32 s0, s0, _Z13get_global_idj at gotpcrel32@lo+4
; GFX10-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj at gotpcrel32@hi+12
@@ -2209,8 +2209,8 @@ define amdgpu_kernel void @ReverseOrder(ptr addrspace(1) %buffer) {
; GFX10-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
; GFX10-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24
; GFX10-NEXT: v_mov_b32_e32 v0, 0
-; GFX10-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX10-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX10-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX10-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX10-NEXT: s_mov_b32 s32, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
@@ -2382,20 +2382,20 @@ entry:
define hidden amdgpu_kernel void @negativeoffset(ptr addrspace(1) nocapture %buffer) {
; GFX8-LABEL: negativeoffset:
; GFX8: ; %bb.0: ; %entry
-; GFX8-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX8-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX8-NEXT: s_mov_b32 s50, -1
-; GFX8-NEXT: s_mov_b32 s51, 0xe80000
-; GFX8-NEXT: s_add_u32 s48, s48, s11
-; GFX8-NEXT: s_addc_u32 s49, s49, 0
+; GFX8-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX8-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX8-NEXT: s_mov_b32 s38, -1
+; GFX8-NEXT: s_mov_b32 s39, 0xe80000
+; GFX8-NEXT: s_add_u32 s36, s36, s11
+; GFX8-NEXT: s_addc_u32 s37, s37, 0
; GFX8-NEXT: s_getpc_b64 s[0:1]
; GFX8-NEXT: s_add_u32 s0, s0, _Z13get_global_idj at gotpcrel32@lo+4
; GFX8-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj at gotpcrel32@hi+12
; GFX8-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24
; GFX8-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
-; GFX8-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GFX8-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX8-NEXT: v_mov_b32_e32 v31, v0
-; GFX8-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX8-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX8-NEXT: v_mov_b32_e32 v0, 0
; GFX8-NEXT: s_mov_b32 s32, 0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
@@ -2423,20 +2423,20 @@ define hidden amdgpu_kernel void @negativeoffset(ptr addrspace(1) nocapture %buf
;
; GFX9-LABEL: negativeoffset:
; GFX9: ; %bb.0: ; %entry
-; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX9-NEXT: s_mov_b32 s50, -1
-; GFX9-NEXT: s_mov_b32 s51, 0xe00000
-; GFX9-NEXT: s_add_u32 s48, s48, s11
-; GFX9-NEXT: s_addc_u32 s49, s49, 0
+; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX9-NEXT: s_mov_b32 s38, -1
+; GFX9-NEXT: s_mov_b32 s39, 0xe00000
+; GFX9-NEXT: s_add_u32 s36, s36, s11
+; GFX9-NEXT: s_addc_u32 s37, s37, 0
; GFX9-NEXT: s_getpc_b64 s[0:1]
; GFX9-NEXT: s_add_u32 s0, s0, _Z13get_global_idj at gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj at gotpcrel32@hi+12
; GFX9-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24
; GFX9-NEXT: v_mov_b32_e32 v31, v0
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
-; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: s_mov_b32 s32, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
@@ -2463,12 +2463,12 @@ define hidden amdgpu_kernel void @negativeoffset(ptr addrspace(1) nocapture %buf
;
; GFX10-LABEL: negativeoffset:
; GFX10: ; %bb.0: ; %entry
-; GFX10-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GFX10-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GFX10-NEXT: s_mov_b32 s50, -1
-; GFX10-NEXT: s_mov_b32 s51, 0x31c16000
-; GFX10-NEXT: s_add_u32 s48, s48, s11
-; GFX10-NEXT: s_addc_u32 s49, s49, 0
+; GFX10-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GFX10-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GFX10-NEXT: s_mov_b32 s38, -1
+; GFX10-NEXT: s_mov_b32 s39, 0x31c16000
+; GFX10-NEXT: s_add_u32 s36, s36, s11
+; GFX10-NEXT: s_addc_u32 s37, s37, 0
; GFX10-NEXT: s_getpc_b64 s[0:1]
; GFX10-NEXT: s_add_u32 s0, s0, _Z13get_global_idj at gotpcrel32@lo+4
; GFX10-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj at gotpcrel32@hi+12
@@ -2476,8 +2476,8 @@ define hidden amdgpu_kernel void @negativeoffset(ptr addrspace(1) nocapture %buf
; GFX10-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
; GFX10-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24
; GFX10-NEXT: v_mov_b32_e32 v0, 0
-; GFX10-NEXT: s_mov_b64 s[0:1], s[48:49]
-; GFX10-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GFX10-NEXT: s_mov_b64 s[0:1], s[36:37]
+; GFX10-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX10-NEXT: s_mov_b32 s32, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
diff --git a/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir b/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir
index 814674804df57..4a0bb6ceccd3f 100644
--- a/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir
+++ b/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir
@@ -42,16 +42,16 @@ body: |
; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $sgpr18_sgpr19 = V_CMP_GT_I32_e64 1, undef %18:vgpr_32, implicit $exec
- ; CHECK-NEXT: renamable $sgpr100_sgpr101 = V_CMP_EQ_U32_e64 0, undef %18:vgpr_32, implicit $exec
+ ; CHECK-NEXT: renamable $sgpr12_sgpr13 = V_CMP_EQ_U32_e64 0, undef %18:vgpr_32, implicit $exec
+ ; CHECK-NEXT: SI_SPILL_S64_SAVE killed renamable $sgpr12_sgpr13, %stack.2, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.2, align 4, addrspace 5)
; CHECK-NEXT: renamable $sgpr12_sgpr13 = V_CMP_NE_U32_e64 0, undef %18:vgpr_32, implicit $exec
; CHECK-NEXT: SI_SPILL_S64_SAVE killed renamable $sgpr12_sgpr13, %stack.3, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.3, align 4, addrspace 5)
- ; CHECK-NEXT: renamable $sgpr12_sgpr13 = V_CMP_GT_I32_e64 0, undef %18:vgpr_32, implicit $exec
- ; CHECK-NEXT: SI_SPILL_S64_SAVE killed renamable $sgpr12_sgpr13, %stack.4, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.4, align 4, addrspace 5)
+ ; CHECK-NEXT: renamable $sgpr34_sgpr35 = V_CMP_GT_I32_e64 0, undef %18:vgpr_32, implicit $exec
; CHECK-NEXT: renamable $sgpr56 = S_MOV_B32 0
; CHECK-NEXT: renamable $sgpr12_sgpr13 = V_CMP_EQ_U32_e64 undef $sgpr4, undef %18:vgpr_32, implicit $exec
- ; CHECK-NEXT: SI_SPILL_S64_SAVE killed renamable $sgpr12_sgpr13, %stack.2, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.2, align 4, addrspace 5)
+ ; CHECK-NEXT: SI_SPILL_S64_SAVE killed renamable $sgpr12_sgpr13, %stack.4, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.4, align 4, addrspace 5)
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_1024_align2 = COPY renamable $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71, implicit $exec
- ; CHECK-NEXT: renamable $sgpr34_sgpr35 = V_CMP_NE_U32_e64 1, undef %18:vgpr_32, implicit $exec
+ ; CHECK-NEXT: renamable $sgpr100_sgpr101 = V_CMP_NE_U32_e64 1, undef %18:vgpr_32, implicit $exec
; CHECK-NEXT: renamable $sgpr57 = S_MOV_B32 1083786240
; CHECK-NEXT: SI_SPILL_S1024_SAVE renamable $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71, %stack.1, implicit $exec, implicit $sgpr32 :: (store (s1024) into %stack.1, align 4, addrspace 5)
; CHECK-NEXT: S_BRANCH %bb.1
@@ -60,7 +60,7 @@ body: |
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.17(0x40000000)
; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr100_sgpr101
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $vcc = S_AND_B64 $exec, renamable $sgpr34_sgpr35, implicit-def dead $scc
+ ; CHECK-NEXT: $vcc = S_AND_B64 $exec, renamable $sgpr100_sgpr101, implicit-def dead $scc
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_1024_align2 = COPY [[COPY]]
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
; CHECK-NEXT: S_BRANCH %bb.17
@@ -70,10 +70,6 @@ body: |
; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr100_sgpr101
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
- ; CHECK-NEXT: renamable $sgpr36 = COPY renamable $sgpr72
- ; CHECK-NEXT: renamable $sgpr37 = COPY renamable $sgpr72
- ; CHECK-NEXT: renamable $sgpr38 = COPY renamable $sgpr72
- ; CHECK-NEXT: renamable $sgpr39 = COPY renamable $sgpr72
; CHECK-NEXT: renamable $sgpr40 = COPY renamable $sgpr72
; CHECK-NEXT: renamable $sgpr41 = COPY renamable $sgpr72
; CHECK-NEXT: renamable $sgpr42 = COPY renamable $sgpr72
@@ -81,36 +77,40 @@ body: |
; CHECK-NEXT: renamable $sgpr44 = COPY renamable $sgpr72
; CHECK-NEXT: renamable $sgpr45 = COPY renamable $sgpr72
; CHECK-NEXT: renamable $sgpr46 = COPY renamable $sgpr72
- ; CHECK-NEXT: renamable $sgpr47 = COPY killed renamable $sgpr72
- ; CHECK-NEXT: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79 = COPY killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47
- ; CHECK-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
- ; CHECK-NEXT: renamable $sgpr80 = COPY killed renamable $sgpr52
- ; CHECK-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47 = COPY killed renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79
- ; CHECK-NEXT: renamable $sgpr48 = COPY renamable $sgpr80
- ; CHECK-NEXT: renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
- ; CHECK-NEXT: renamable $sgpr49 = COPY killed renamable $sgpr68
- ; CHECK-NEXT: renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
- ; CHECK-NEXT: renamable $sgpr50 = COPY killed renamable $sgpr68
- ; CHECK-NEXT: renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
- ; CHECK-NEXT: renamable $sgpr51 = COPY killed renamable $sgpr68
- ; CHECK-NEXT: renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
- ; CHECK-NEXT: renamable $sgpr52 = COPY killed renamable $sgpr68
- ; CHECK-NEXT: renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
- ; CHECK-NEXT: renamable $sgpr53 = COPY killed renamable $sgpr76
- ; CHECK-NEXT: renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
- ; CHECK-NEXT: renamable $sgpr54 = COPY killed renamable $sgpr72
- ; CHECK-NEXT: renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
- ; CHECK-NEXT: renamable $sgpr55 = COPY killed renamable $sgpr72
+ ; CHECK-NEXT: renamable $sgpr47 = COPY renamable $sgpr72
+ ; CHECK-NEXT: renamable $sgpr48 = COPY renamable $sgpr72
+ ; CHECK-NEXT: renamable $sgpr49 = COPY renamable $sgpr72
+ ; CHECK-NEXT: renamable $sgpr50 = COPY renamable $sgpr72
+ ; CHECK-NEXT: renamable $sgpr51 = COPY killed renamable $sgpr72
+ ; CHECK-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 = COPY killed renamable $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51
; CHECK-NEXT: renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
; CHECK-NEXT: renamable $sgpr56 = COPY killed renamable $sgpr72
; CHECK-NEXT: renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
- ; CHECK-NEXT: renamable $sgpr57 = COPY killed renamable $sgpr76
- ; CHECK-NEXT: renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
- ; CHECK-NEXT: renamable $sgpr58 = COPY killed renamable $sgpr76
- ; CHECK-NEXT: renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
- ; CHECK-NEXT: renamable $sgpr59 = COPY killed renamable $sgpr76
+ ; CHECK-NEXT: renamable $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = COPY killed renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55
+ ; CHECK-NEXT: renamable $sgpr52 = COPY renamable $sgpr56
+ ; CHECK-NEXT: renamable $sgpr53 = COPY killed renamable $sgpr76
+ ; CHECK-NEXT: renamable $sgpr56_sgpr57 = COPY renamable $sgpr52_sgpr53
+ ; CHECK-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 = COPY killed renamable $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51
; CHECK-NEXT: renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
- ; CHECK-NEXT: renamable $sgpr60 = COPY killed renamable $sgpr76
+ ; CHECK-NEXT: renamable $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = COPY killed renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55
+ ; CHECK-NEXT: renamable $sgpr52_sgpr53 = COPY renamable $sgpr56_sgpr57
+ ; CHECK-NEXT: renamable $sgpr54 = COPY killed renamable $sgpr76
+ ; CHECK-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47 = COPY killed renamable $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51
+ ; CHECK-NEXT: renamable $sgpr48_sgpr49_sgpr50 = COPY renamable $sgpr52_sgpr53_sgpr54
+ ; CHECK-NEXT: renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
+ ; CHECK-NEXT: renamable $sgpr52_sgpr53_sgpr54 = COPY renamable $sgpr48_sgpr49_sgpr50
+ ; CHECK-NEXT: renamable $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = COPY killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47
+ ; CHECK-NEXT: renamable $sgpr55 = COPY killed renamable $sgpr68
+ ; CHECK-NEXT: renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
+ ; CHECK-NEXT: renamable $sgpr56 = COPY killed renamable $sgpr72
+ ; CHECK-NEXT: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
+ ; CHECK-NEXT: renamable $sgpr57 = COPY killed renamable $sgpr84
+ ; CHECK-NEXT: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
+ ; CHECK-NEXT: renamable $sgpr58 = COPY killed renamable $sgpr84
+ ; CHECK-NEXT: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
+ ; CHECK-NEXT: renamable $sgpr59 = COPY killed renamable $sgpr84
+ ; CHECK-NEXT: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
+ ; CHECK-NEXT: renamable $sgpr60 = COPY killed renamable $sgpr84
; CHECK-NEXT: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
; CHECK-NEXT: renamable $sgpr61 = COPY killed renamable $sgpr80
; CHECK-NEXT: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
@@ -125,6 +125,17 @@ body: |
; CHECK-NEXT: renamable $sgpr66 = COPY killed renamable $sgpr84
; CHECK-NEXT: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
; CHECK-NEXT: renamable $sgpr67 = COPY killed renamable $sgpr84
+ ; CHECK-NEXT: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
+ ; CHECK-NEXT: renamable $sgpr68 = COPY killed renamable $sgpr84
+ ; CHECK-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = COPY killed renamable $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55
+ ; CHECK-NEXT: renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63 = COPY renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67
+ ; CHECK-NEXT: renamable $sgpr64 = COPY renamable $sgpr68
+ ; CHECK-NEXT: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
+ ; CHECK-NEXT: renamable $sgpr65 = COPY killed renamable $sgpr84
+ ; CHECK-NEXT: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
+ ; CHECK-NEXT: renamable $sgpr66 = COPY killed renamable $sgpr84
+ ; CHECK-NEXT: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
+ ; CHECK-NEXT: renamable $sgpr67 = COPY killed renamable $sgpr84
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_1024_align2 = COPY killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67, implicit $exec
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.11, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.5
@@ -157,21 +168,20 @@ body: |
; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr100_sgpr101
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $sgpr12_sgpr13 = SI_SPILL_S64_RESTORE %stack.3, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.3, align 4, addrspace 5)
- ; CHECK-NEXT: renamable $sgpr12_sgpr13 = S_AND_B64 killed renamable $sgpr12_sgpr13, undef renamable $sgpr62_sgpr63, implicit-def dead $scc
- ; CHECK-NEXT: renamable $sgpr62_sgpr63 = V_CMP_GT_I32_e64 0, undef %18:vgpr_32, implicit $exec
+ ; CHECK-NEXT: renamable $sgpr12_sgpr13 = S_AND_B64 killed renamable $sgpr12_sgpr13, undef renamable $sgpr54_sgpr55, implicit-def dead $scc
+ ; CHECK-NEXT: renamable $sgpr54_sgpr55 = V_CMP_GT_I32_e64 0, undef %18:vgpr_32, implicit $exec
; CHECK-NEXT: $exec = S_MOV_B64_term killed renamable $sgpr12_sgpr13
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.12, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.6:
; CHECK-NEXT: successors: %bb.7(0x80000000)
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr62_sgpr63, $sgpr100_sgpr101
+ ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr54_sgpr55, $sgpr100_sgpr101
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $sgpr12_sgpr13 = SI_SPILL_S64_RESTORE %stack.4, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.4, align 4, addrspace 5)
- ; CHECK-NEXT: dead [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed $sgpr12_sgpr13, implicit $exec
+ ; CHECK-NEXT: dead [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr34_sgpr35, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.7:
; CHECK-NEXT: successors: %bb.8(0x80000000)
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr62_sgpr63, $sgpr100_sgpr101
+ ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr54_sgpr55, $sgpr100_sgpr101
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $sgpr64_sgpr65 = nofpexcept V_CMP_NLT_F64_e64 0, undef $sgpr4_sgpr5, 0, undef %29:vreg_64_align2, 0, implicit $mode, implicit $exec
; CHECK-NEXT: renamable $sgpr66_sgpr67 = nofpexcept V_CMP_NLT_F64_e64 0, 4607182418800017408, 0, undef %29:vreg_64_align2, 0, implicit $mode, implicit $exec
@@ -179,14 +189,14 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.8:
; CHECK-NEXT: successors: %bb.10(0x40000000), %bb.9(0x40000000)
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr100_sgpr101
+ ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr100_sgpr101
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $vcc = S_AND_B64 $exec, renamable $sgpr64_sgpr65, implicit-def dead $scc
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.10, implicit $vcc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.9:
; CHECK-NEXT: successors: %bb.10(0x40000000), %bb.17(0x40000000)
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr100_sgpr101
+ ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr100_sgpr101
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5)
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_64_align2 = COPY killed renamable $sgpr84_sgpr85, implicit $exec
@@ -197,8 +207,8 @@ body: |
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
; CHECK-NEXT: renamable $sgpr68_sgpr69 = COPY killed renamable $sgpr4_sgpr5
; CHECK-NEXT: $sgpr4_sgpr5 = COPY renamable $sgpr68_sgpr69
- ; CHECK-NEXT: renamable $sgpr78_sgpr79 = COPY killed renamable $sgpr6_sgpr7
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY renamable $sgpr78_sgpr79
+ ; CHECK-NEXT: renamable $sgpr70_sgpr71 = COPY killed renamable $sgpr6_sgpr7
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY renamable $sgpr70_sgpr71
; CHECK-NEXT: renamable $sgpr80_sgpr81 = COPY killed renamable $sgpr10_sgpr11
; CHECK-NEXT: $sgpr10_sgpr11 = COPY renamable $sgpr80_sgpr81
; CHECK-NEXT: $sgpr12 = COPY renamable $sgpr14
@@ -207,18 +217,18 @@ body: |
; CHECK-NEXT: renamable $sgpr33 = COPY killed renamable $sgpr16
; CHECK-NEXT: renamable $sgpr83 = COPY killed renamable $sgpr15
; CHECK-NEXT: renamable $sgpr85 = COPY killed renamable $sgpr14
- ; CHECK-NEXT: renamable $sgpr36_sgpr37 = COPY killed renamable $sgpr18_sgpr19
+ ; CHECK-NEXT: renamable $sgpr48_sgpr49 = COPY killed renamable $sgpr18_sgpr19
; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL undef renamable $sgpr12_sgpr13, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
; CHECK-NEXT: $sgpr8_sgpr9 = COPY renamable $sgpr82_sgpr83
; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL undef renamable $sgpr12_sgpr13, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr8_sgpr9
- ; CHECK-NEXT: renamable $sgpr18_sgpr19 = COPY killed renamable $sgpr36_sgpr37
+ ; CHECK-NEXT: renamable $sgpr18_sgpr19 = COPY killed renamable $sgpr48_sgpr49
; CHECK-NEXT: renamable $sgpr14 = COPY killed renamable $sgpr85
; CHECK-NEXT: renamable $sgpr15 = COPY killed renamable $sgpr83
; CHECK-NEXT: renamable $sgpr16 = COPY killed renamable $sgpr33
; CHECK-NEXT: renamable $sgpr4_sgpr5 = COPY killed renamable $sgpr68_sgpr69
- ; CHECK-NEXT: renamable $sgpr6_sgpr7 = COPY killed renamable $sgpr78_sgpr79
+ ; CHECK-NEXT: renamable $sgpr6_sgpr7 = COPY killed renamable $sgpr70_sgpr71
; CHECK-NEXT: renamable $sgpr8 = COPY killed renamable $sgpr84
; CHECK-NEXT: renamable $sgpr10_sgpr11 = COPY killed renamable $sgpr80_sgpr81
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
@@ -228,7 +238,7 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.10:
; CHECK-NEXT: successors: %bb.8(0x40000000), %bb.12(0x40000000)
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr100_sgpr101
+ ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr100_sgpr101
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.8, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.12
@@ -242,16 +252,16 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.12:
; CHECK-NEXT: successors: %bb.11(0x40000000), %bb.13(0x40000000)
- ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr62_sgpr63, $sgpr100_sgpr101
+ ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr54_sgpr55, $sgpr100_sgpr101
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $exec = S_MOV_B64_term killed renamable $sgpr62_sgpr63
+ ; CHECK-NEXT: $exec = S_MOV_B64_term killed renamable $sgpr54_sgpr55
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.11, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.13:
; CHECK-NEXT: successors: %bb.15(0x40000000), %bb.14(0x40000000)
; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr100_sgpr101
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $sgpr12_sgpr13 = SI_SPILL_S64_RESTORE %stack.2, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.2, align 4, addrspace 5)
+ ; CHECK-NEXT: renamable $sgpr12_sgpr13 = SI_SPILL_S64_RESTORE %stack.4, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.4, align 4, addrspace 5)
; CHECK-NEXT: $vcc = S_AND_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def dead $scc
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.15, implicit $vcc
; CHECK-NEXT: S_BRANCH %bb.14
@@ -264,7 +274,8 @@ body: |
; CHECK-NEXT: successors: %bb.11(0x40000000), %bb.16(0x40000000)
; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr100_sgpr101
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $vcc = S_AND_B64 $exec, renamable $sgpr100_sgpr101, implicit-def dead $scc
+ ; CHECK-NEXT: renamable $sgpr12_sgpr13 = SI_SPILL_S64_RESTORE %stack.2, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.2, align 4, addrspace 5)
+ ; CHECK-NEXT: $vcc = S_AND_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def dead $scc
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.11, implicit $vcc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.16:
diff --git a/llvm/test/CodeGen/AMDGPU/select.f16.ll b/llvm/test/CodeGen/AMDGPU/select.f16.ll
index 926ebba0ced21..ac9bb27b1c1c8 100644
--- a/llvm/test/CodeGen/AMDGPU/select.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/select.f16.ll
@@ -1905,12 +1905,12 @@ define <16 x half> @v_vselect_v16f16(<16 x half> %a, <16 x half> %b, <16 x i32>
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v16
; VI-NEXT: v_cmp_eq_u32_e64 s[18:19], 0, v17
-; VI-NEXT: v_cmp_eq_u32_e64 s[38:39], 0, v29
+; VI-NEXT: v_cmp_eq_u32_e64 s[40:41], 0, v29
; VI-NEXT: v_lshrrev_b32_e32 v16, 16, v6
; VI-NEXT: v_lshrrev_b32_e32 v17, 16, v14
; VI-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v18
; VI-NEXT: v_cmp_eq_u32_e64 s[28:29], 0, v27
-; VI-NEXT: v_cndmask_b32_e64 v16, v17, v16, s[38:39]
+; VI-NEXT: v_cndmask_b32_e64 v16, v17, v16, s[40:41]
; VI-NEXT: v_lshrrev_b32_e32 v17, 16, v5
; VI-NEXT: v_lshrrev_b32_e32 v18, 16, v13
; VI-NEXT: v_cmp_eq_u32_e64 s[20:21], 0, v19
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-update-only-slot-indexes.ll b/llvm/test/CodeGen/AMDGPU/sgpr-spill-update-only-slot-indexes.ll
index 634d077e41d37..47810346c50b7 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-update-only-slot-indexes.ll
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-update-only-slot-indexes.ll
@@ -9,15 +9,15 @@ declare void @foo()
define amdgpu_kernel void @kernel() {
; GCN-LABEL: kernel:
; GCN: ; %bb.0:
-; GCN-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; GCN-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; GCN-NEXT: s_mov_b32 s50, -1
+; GCN-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; GCN-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; GCN-NEXT: s_mov_b32 s38, -1
; GCN-NEXT: ; implicit-def: $vgpr40 : SGPR spill to VGPR lane
-; GCN-NEXT: s_mov_b32 s51, 0xe00000
+; GCN-NEXT: s_mov_b32 s39, 0xe00000
; GCN-NEXT: v_writelane_b32 v40, s4, 0
-; GCN-NEXT: s_add_u32 s48, s48, s11
+; GCN-NEXT: s_add_u32 s36, s36, s11
; GCN-NEXT: v_writelane_b32 v40, s5, 1
-; GCN-NEXT: s_addc_u32 s49, s49, 0
+; GCN-NEXT: s_addc_u32 s37, s37, 0
; GCN-NEXT: s_mov_b64 s[4:5], s[0:1]
; GCN-NEXT: v_readlane_b32 s0, v40, 0
; GCN-NEXT: s_mov_b32 s13, s9
@@ -34,9 +34,9 @@ define amdgpu_kernel void @kernel() {
; GCN-NEXT: s_mov_b64 s[6:7], s[2:3]
; GCN-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GCN-NEXT: v_lshlrev_b32_e32 v1, 10, v1
-; GCN-NEXT: s_mov_b64 s[0:1], s[48:49]
+; GCN-NEXT: s_mov_b64 s[0:1], s[36:37]
; GCN-NEXT: v_or3_b32 v31, v0, v1, v2
-; GCN-NEXT: s_mov_b64 s[2:3], s[50:51]
+; GCN-NEXT: s_mov_b64 s[2:3], s[38:39]
; GCN-NEXT: s_mov_b32 s32, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v8i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v8i64.ll
index e2d2a65c41b4f..66584546e1043 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v8i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v8i64.ll
@@ -15006,8 +15006,8 @@ define void @s_shuffle_v2i64_v8i64__15_6() {
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_writelane_b32 v0, s36, 0
; GFX900-NEXT: v_writelane_b32 v0, s37, 1
-; GFX900-NEXT: v_writelane_b32 v0, s46, 2
-; GFX900-NEXT: v_writelane_b32 v0, s47, 3
+; GFX900-NEXT: v_writelane_b32 v0, s38, 2
+; GFX900-NEXT: v_writelane_b32 v0, s39, 3
; GFX900-NEXT: v_writelane_b32 v0, s48, 4
; GFX900-NEXT: v_writelane_b32 v0, s49, 5
; GFX900-NEXT: v_writelane_b32 v0, s50, 6
@@ -15029,8 +15029,8 @@ define void @s_shuffle_v2i64_v8i64__15_6() {
; GFX900-NEXT: v_readlane_b32 s50, v0, 6
; GFX900-NEXT: v_readlane_b32 s49, v0, 5
; GFX900-NEXT: v_readlane_b32 s48, v0, 4
-; GFX900-NEXT: v_readlane_b32 s47, v0, 3
-; GFX900-NEXT: v_readlane_b32 s46, v0, 2
+; GFX900-NEXT: v_readlane_b32 s39, v0, 3
+; GFX900-NEXT: v_readlane_b32 s38, v0, 2
; GFX900-NEXT: v_readlane_b32 s37, v0, 1
; GFX900-NEXT: v_readlane_b32 s36, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -15047,8 +15047,8 @@ define void @s_shuffle_v2i64_v8i64__15_6() {
; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s46, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s47, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
@@ -15070,8 +15070,8 @@ define void @s_shuffle_v2i64_v8i64__15_6() {
; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s47, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s46, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -15124,8 +15124,8 @@ define void @s_shuffle_v2i64_v8i64__15_7() {
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_writelane_b32 v0, s36, 0
; GFX900-NEXT: v_writelane_b32 v0, s37, 1
-; GFX900-NEXT: v_writelane_b32 v0, s46, 2
-; GFX900-NEXT: v_writelane_b32 v0, s47, 3
+; GFX900-NEXT: v_writelane_b32 v0, s38, 2
+; GFX900-NEXT: v_writelane_b32 v0, s39, 3
; GFX900-NEXT: v_writelane_b32 v0, s48, 4
; GFX900-NEXT: v_writelane_b32 v0, s49, 5
; GFX900-NEXT: v_writelane_b32 v0, s50, 6
@@ -15147,8 +15147,8 @@ define void @s_shuffle_v2i64_v8i64__15_7() {
; GFX900-NEXT: v_readlane_b32 s50, v0, 6
; GFX900-NEXT: v_readlane_b32 s49, v0, 5
; GFX900-NEXT: v_readlane_b32 s48, v0, 4
-; GFX900-NEXT: v_readlane_b32 s47, v0, 3
-; GFX900-NEXT: v_readlane_b32 s46, v0, 2
+; GFX900-NEXT: v_readlane_b32 s39, v0, 3
+; GFX900-NEXT: v_readlane_b32 s38, v0, 2
; GFX900-NEXT: v_readlane_b32 s37, v0, 1
; GFX900-NEXT: v_readlane_b32 s36, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -15165,8 +15165,8 @@ define void @s_shuffle_v2i64_v8i64__15_7() {
; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s46, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s47, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
@@ -15188,8 +15188,8 @@ define void @s_shuffle_v2i64_v8i64__15_7() {
; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s47, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s46, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -16056,8 +16056,8 @@ define void @s_shuffle_v2i64_v8i64__10_0() {
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_writelane_b32 v0, s36, 0
; GFX900-NEXT: v_writelane_b32 v0, s37, 1
-; GFX900-NEXT: v_writelane_b32 v0, s46, 2
-; GFX900-NEXT: v_writelane_b32 v0, s47, 3
+; GFX900-NEXT: v_writelane_b32 v0, s38, 2
+; GFX900-NEXT: v_writelane_b32 v0, s39, 3
; GFX900-NEXT: v_writelane_b32 v0, s48, 4
; GFX900-NEXT: v_writelane_b32 v0, s49, 5
; GFX900-NEXT: v_writelane_b32 v0, s50, 6
@@ -16077,8 +16077,8 @@ define void @s_shuffle_v2i64_v8i64__10_0() {
; GFX900-NEXT: v_readlane_b32 s50, v0, 6
; GFX900-NEXT: v_readlane_b32 s49, v0, 5
; GFX900-NEXT: v_readlane_b32 s48, v0, 4
-; GFX900-NEXT: v_readlane_b32 s47, v0, 3
-; GFX900-NEXT: v_readlane_b32 s46, v0, 2
+; GFX900-NEXT: v_readlane_b32 s39, v0, 3
+; GFX900-NEXT: v_readlane_b32 s38, v0, 2
; GFX900-NEXT: v_readlane_b32 s37, v0, 1
; GFX900-NEXT: v_readlane_b32 s36, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -16095,8 +16095,8 @@ define void @s_shuffle_v2i64_v8i64__10_0() {
; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s46, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s47, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
@@ -16116,8 +16116,8 @@ define void @s_shuffle_v2i64_v8i64__10_0() {
; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s47, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s46, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -16785,8 +16785,8 @@ define void @s_shuffle_v2i64_v8i64__10_1() {
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_writelane_b32 v0, s36, 0
; GFX900-NEXT: v_writelane_b32 v0, s37, 1
-; GFX900-NEXT: v_writelane_b32 v0, s46, 2
-; GFX900-NEXT: v_writelane_b32 v0, s47, 3
+; GFX900-NEXT: v_writelane_b32 v0, s38, 2
+; GFX900-NEXT: v_writelane_b32 v0, s39, 3
; GFX900-NEXT: v_writelane_b32 v0, s48, 4
; GFX900-NEXT: v_writelane_b32 v0, s49, 5
; GFX900-NEXT: v_writelane_b32 v0, s50, 6
@@ -16806,8 +16806,8 @@ define void @s_shuffle_v2i64_v8i64__10_1() {
; GFX900-NEXT: v_readlane_b32 s50, v0, 6
; GFX900-NEXT: v_readlane_b32 s49, v0, 5
; GFX900-NEXT: v_readlane_b32 s48, v0, 4
-; GFX900-NEXT: v_readlane_b32 s47, v0, 3
-; GFX900-NEXT: v_readlane_b32 s46, v0, 2
+; GFX900-NEXT: v_readlane_b32 s39, v0, 3
+; GFX900-NEXT: v_readlane_b32 s38, v0, 2
; GFX900-NEXT: v_readlane_b32 s37, v0, 1
; GFX900-NEXT: v_readlane_b32 s36, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -16824,8 +16824,8 @@ define void @s_shuffle_v2i64_v8i64__10_1() {
; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s46, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s47, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
@@ -16845,8 +16845,8 @@ define void @s_shuffle_v2i64_v8i64__10_1() {
; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s47, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s46, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -20853,8 +20853,8 @@ define void @s_shuffle_v2i64_v8i64__9_6() {
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_writelane_b32 v0, s36, 0
; GFX900-NEXT: v_writelane_b32 v0, s37, 1
-; GFX900-NEXT: v_writelane_b32 v0, s46, 2
-; GFX900-NEXT: v_writelane_b32 v0, s47, 3
+; GFX900-NEXT: v_writelane_b32 v0, s38, 2
+; GFX900-NEXT: v_writelane_b32 v0, s39, 3
; GFX900-NEXT: v_writelane_b32 v0, s48, 4
; GFX900-NEXT: v_writelane_b32 v0, s49, 5
; GFX900-NEXT: v_writelane_b32 v0, s50, 6
@@ -20876,8 +20876,8 @@ define void @s_shuffle_v2i64_v8i64__9_6() {
; GFX900-NEXT: v_readlane_b32 s50, v0, 6
; GFX900-NEXT: v_readlane_b32 s49, v0, 5
; GFX900-NEXT: v_readlane_b32 s48, v0, 4
-; GFX900-NEXT: v_readlane_b32 s47, v0, 3
-; GFX900-NEXT: v_readlane_b32 s46, v0, 2
+; GFX900-NEXT: v_readlane_b32 s39, v0, 3
+; GFX900-NEXT: v_readlane_b32 s38, v0, 2
; GFX900-NEXT: v_readlane_b32 s37, v0, 1
; GFX900-NEXT: v_readlane_b32 s36, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -20894,8 +20894,8 @@ define void @s_shuffle_v2i64_v8i64__9_6() {
; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s46, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s47, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
@@ -20917,8 +20917,8 @@ define void @s_shuffle_v2i64_v8i64__9_6() {
; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s47, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s46, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -21027,8 +21027,8 @@ define void @s_shuffle_v2i64_v8i64__11_6() {
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_writelane_b32 v0, s36, 0
; GFX900-NEXT: v_writelane_b32 v0, s37, 1
-; GFX900-NEXT: v_writelane_b32 v0, s46, 2
-; GFX900-NEXT: v_writelane_b32 v0, s47, 3
+; GFX900-NEXT: v_writelane_b32 v0, s38, 2
+; GFX900-NEXT: v_writelane_b32 v0, s39, 3
; GFX900-NEXT: v_writelane_b32 v0, s48, 4
; GFX900-NEXT: v_writelane_b32 v0, s49, 5
; GFX900-NEXT: v_writelane_b32 v0, s50, 6
@@ -21050,8 +21050,8 @@ define void @s_shuffle_v2i64_v8i64__11_6() {
; GFX900-NEXT: v_readlane_b32 s50, v0, 6
; GFX900-NEXT: v_readlane_b32 s49, v0, 5
; GFX900-NEXT: v_readlane_b32 s48, v0, 4
-; GFX900-NEXT: v_readlane_b32 s47, v0, 3
-; GFX900-NEXT: v_readlane_b32 s46, v0, 2
+; GFX900-NEXT: v_readlane_b32 s39, v0, 3
+; GFX900-NEXT: v_readlane_b32 s38, v0, 2
; GFX900-NEXT: v_readlane_b32 s37, v0, 1
; GFX900-NEXT: v_readlane_b32 s36, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -21068,8 +21068,8 @@ define void @s_shuffle_v2i64_v8i64__11_6() {
; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s46, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s47, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
@@ -21091,8 +21091,8 @@ define void @s_shuffle_v2i64_v8i64__11_6() {
; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s47, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s46, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -21145,8 +21145,8 @@ define void @s_shuffle_v2i64_v8i64__12_6() {
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_writelane_b32 v0, s36, 0
; GFX900-NEXT: v_writelane_b32 v0, s37, 1
-; GFX900-NEXT: v_writelane_b32 v0, s46, 2
-; GFX900-NEXT: v_writelane_b32 v0, s47, 3
+; GFX900-NEXT: v_writelane_b32 v0, s38, 2
+; GFX900-NEXT: v_writelane_b32 v0, s39, 3
; GFX900-NEXT: v_writelane_b32 v0, s48, 4
; GFX900-NEXT: v_writelane_b32 v0, s49, 5
; GFX900-NEXT: v_writelane_b32 v0, s50, 6
@@ -21168,8 +21168,8 @@ define void @s_shuffle_v2i64_v8i64__12_6() {
; GFX900-NEXT: v_readlane_b32 s50, v0, 6
; GFX900-NEXT: v_readlane_b32 s49, v0, 5
; GFX900-NEXT: v_readlane_b32 s48, v0, 4
-; GFX900-NEXT: v_readlane_b32 s47, v0, 3
-; GFX900-NEXT: v_readlane_b32 s46, v0, 2
+; GFX900-NEXT: v_readlane_b32 s39, v0, 3
+; GFX900-NEXT: v_readlane_b32 s38, v0, 2
; GFX900-NEXT: v_readlane_b32 s37, v0, 1
; GFX900-NEXT: v_readlane_b32 s36, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -21186,8 +21186,8 @@ define void @s_shuffle_v2i64_v8i64__12_6() {
; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s46, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s47, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
@@ -21209,8 +21209,8 @@ define void @s_shuffle_v2i64_v8i64__12_6() {
; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s47, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s46, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -21251,8 +21251,8 @@ define void @s_shuffle_v2i64_v8i64__13_6() {
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_writelane_b32 v0, s36, 0
; GFX900-NEXT: v_writelane_b32 v0, s37, 1
-; GFX900-NEXT: v_writelane_b32 v0, s46, 2
-; GFX900-NEXT: v_writelane_b32 v0, s47, 3
+; GFX900-NEXT: v_writelane_b32 v0, s38, 2
+; GFX900-NEXT: v_writelane_b32 v0, s39, 3
; GFX900-NEXT: v_writelane_b32 v0, s48, 4
; GFX900-NEXT: v_writelane_b32 v0, s49, 5
; GFX900-NEXT: v_writelane_b32 v0, s50, 6
@@ -21274,8 +21274,8 @@ define void @s_shuffle_v2i64_v8i64__13_6() {
; GFX900-NEXT: v_readlane_b32 s50, v0, 6
; GFX900-NEXT: v_readlane_b32 s49, v0, 5
; GFX900-NEXT: v_readlane_b32 s48, v0, 4
-; GFX900-NEXT: v_readlane_b32 s47, v0, 3
-; GFX900-NEXT: v_readlane_b32 s46, v0, 2
+; GFX900-NEXT: v_readlane_b32 s39, v0, 3
+; GFX900-NEXT: v_readlane_b32 s38, v0, 2
; GFX900-NEXT: v_readlane_b32 s37, v0, 1
; GFX900-NEXT: v_readlane_b32 s36, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -21292,8 +21292,8 @@ define void @s_shuffle_v2i64_v8i64__13_6() {
; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s46, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s47, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
@@ -21315,8 +21315,8 @@ define void @s_shuffle_v2i64_v8i64__13_6() {
; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s47, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s46, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -21369,8 +21369,8 @@ define void @s_shuffle_v2i64_v8i64__14_6() {
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_writelane_b32 v0, s36, 0
; GFX900-NEXT: v_writelane_b32 v0, s37, 1
-; GFX900-NEXT: v_writelane_b32 v0, s46, 2
-; GFX900-NEXT: v_writelane_b32 v0, s47, 3
+; GFX900-NEXT: v_writelane_b32 v0, s38, 2
+; GFX900-NEXT: v_writelane_b32 v0, s39, 3
; GFX900-NEXT: v_writelane_b32 v0, s48, 4
; GFX900-NEXT: v_writelane_b32 v0, s49, 5
; GFX900-NEXT: v_writelane_b32 v0, s50, 6
@@ -21392,8 +21392,8 @@ define void @s_shuffle_v2i64_v8i64__14_6() {
; GFX900-NEXT: v_readlane_b32 s50, v0, 6
; GFX900-NEXT: v_readlane_b32 s49, v0, 5
; GFX900-NEXT: v_readlane_b32 s48, v0, 4
-; GFX900-NEXT: v_readlane_b32 s47, v0, 3
-; GFX900-NEXT: v_readlane_b32 s46, v0, 2
+; GFX900-NEXT: v_readlane_b32 s39, v0, 3
+; GFX900-NEXT: v_readlane_b32 s38, v0, 2
; GFX900-NEXT: v_readlane_b32 s37, v0, 1
; GFX900-NEXT: v_readlane_b32 s36, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -21410,8 +21410,8 @@ define void @s_shuffle_v2i64_v8i64__14_6() {
; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s46, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s47, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
@@ -21433,8 +21433,8 @@ define void @s_shuffle_v2i64_v8i64__14_6() {
; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s47, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s46, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -21913,8 +21913,8 @@ define void @s_shuffle_v2i64_v8i64__9_7() {
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_writelane_b32 v0, s36, 0
; GFX900-NEXT: v_writelane_b32 v0, s37, 1
-; GFX900-NEXT: v_writelane_b32 v0, s46, 2
-; GFX900-NEXT: v_writelane_b32 v0, s47, 3
+; GFX900-NEXT: v_writelane_b32 v0, s38, 2
+; GFX900-NEXT: v_writelane_b32 v0, s39, 3
; GFX900-NEXT: v_writelane_b32 v0, s48, 4
; GFX900-NEXT: v_writelane_b32 v0, s49, 5
; GFX900-NEXT: v_writelane_b32 v0, s50, 6
@@ -21936,8 +21936,8 @@ define void @s_shuffle_v2i64_v8i64__9_7() {
; GFX900-NEXT: v_readlane_b32 s50, v0, 6
; GFX900-NEXT: v_readlane_b32 s49, v0, 5
; GFX900-NEXT: v_readlane_b32 s48, v0, 4
-; GFX900-NEXT: v_readlane_b32 s47, v0, 3
-; GFX900-NEXT: v_readlane_b32 s46, v0, 2
+; GFX900-NEXT: v_readlane_b32 s39, v0, 3
+; GFX900-NEXT: v_readlane_b32 s38, v0, 2
; GFX900-NEXT: v_readlane_b32 s37, v0, 1
; GFX900-NEXT: v_readlane_b32 s36, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -21954,8 +21954,8 @@ define void @s_shuffle_v2i64_v8i64__9_7() {
; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s46, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s47, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
@@ -21977,8 +21977,8 @@ define void @s_shuffle_v2i64_v8i64__9_7() {
; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s47, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s46, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -22087,8 +22087,8 @@ define void @s_shuffle_v2i64_v8i64__11_7() {
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_writelane_b32 v0, s36, 0
; GFX900-NEXT: v_writelane_b32 v0, s37, 1
-; GFX900-NEXT: v_writelane_b32 v0, s46, 2
-; GFX900-NEXT: v_writelane_b32 v0, s47, 3
+; GFX900-NEXT: v_writelane_b32 v0, s38, 2
+; GFX900-NEXT: v_writelane_b32 v0, s39, 3
; GFX900-NEXT: v_writelane_b32 v0, s48, 4
; GFX900-NEXT: v_writelane_b32 v0, s49, 5
; GFX900-NEXT: v_writelane_b32 v0, s50, 6
@@ -22110,8 +22110,8 @@ define void @s_shuffle_v2i64_v8i64__11_7() {
; GFX900-NEXT: v_readlane_b32 s50, v0, 6
; GFX900-NEXT: v_readlane_b32 s49, v0, 5
; GFX900-NEXT: v_readlane_b32 s48, v0, 4
-; GFX900-NEXT: v_readlane_b32 s47, v0, 3
-; GFX900-NEXT: v_readlane_b32 s46, v0, 2
+; GFX900-NEXT: v_readlane_b32 s39, v0, 3
+; GFX900-NEXT: v_readlane_b32 s38, v0, 2
; GFX900-NEXT: v_readlane_b32 s37, v0, 1
; GFX900-NEXT: v_readlane_b32 s36, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -22128,8 +22128,8 @@ define void @s_shuffle_v2i64_v8i64__11_7() {
; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s46, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s47, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
@@ -22151,8 +22151,8 @@ define void @s_shuffle_v2i64_v8i64__11_7() {
; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s47, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s46, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -22205,8 +22205,8 @@ define void @s_shuffle_v2i64_v8i64__12_7() {
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_writelane_b32 v0, s36, 0
; GFX900-NEXT: v_writelane_b32 v0, s37, 1
-; GFX900-NEXT: v_writelane_b32 v0, s46, 2
-; GFX900-NEXT: v_writelane_b32 v0, s47, 3
+; GFX900-NEXT: v_writelane_b32 v0, s38, 2
+; GFX900-NEXT: v_writelane_b32 v0, s39, 3
; GFX900-NEXT: v_writelane_b32 v0, s48, 4
; GFX900-NEXT: v_writelane_b32 v0, s49, 5
; GFX900-NEXT: v_writelane_b32 v0, s50, 6
@@ -22228,8 +22228,8 @@ define void @s_shuffle_v2i64_v8i64__12_7() {
; GFX900-NEXT: v_readlane_b32 s50, v0, 6
; GFX900-NEXT: v_readlane_b32 s49, v0, 5
; GFX900-NEXT: v_readlane_b32 s48, v0, 4
-; GFX900-NEXT: v_readlane_b32 s47, v0, 3
-; GFX900-NEXT: v_readlane_b32 s46, v0, 2
+; GFX900-NEXT: v_readlane_b32 s39, v0, 3
+; GFX900-NEXT: v_readlane_b32 s38, v0, 2
; GFX900-NEXT: v_readlane_b32 s37, v0, 1
; GFX900-NEXT: v_readlane_b32 s36, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -22246,8 +22246,8 @@ define void @s_shuffle_v2i64_v8i64__12_7() {
; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s46, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s47, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
@@ -22269,8 +22269,8 @@ define void @s_shuffle_v2i64_v8i64__12_7() {
; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s47, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s46, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -22311,8 +22311,8 @@ define void @s_shuffle_v2i64_v8i64__13_7() {
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_writelane_b32 v0, s36, 0
; GFX900-NEXT: v_writelane_b32 v0, s37, 1
-; GFX900-NEXT: v_writelane_b32 v0, s46, 2
-; GFX900-NEXT: v_writelane_b32 v0, s47, 3
+; GFX900-NEXT: v_writelane_b32 v0, s38, 2
+; GFX900-NEXT: v_writelane_b32 v0, s39, 3
; GFX900-NEXT: v_writelane_b32 v0, s48, 4
; GFX900-NEXT: v_writelane_b32 v0, s49, 5
; GFX900-NEXT: v_writelane_b32 v0, s50, 6
@@ -22334,8 +22334,8 @@ define void @s_shuffle_v2i64_v8i64__13_7() {
; GFX900-NEXT: v_readlane_b32 s50, v0, 6
; GFX900-NEXT: v_readlane_b32 s49, v0, 5
; GFX900-NEXT: v_readlane_b32 s48, v0, 4
-; GFX900-NEXT: v_readlane_b32 s47, v0, 3
-; GFX900-NEXT: v_readlane_b32 s46, v0, 2
+; GFX900-NEXT: v_readlane_b32 s39, v0, 3
+; GFX900-NEXT: v_readlane_b32 s38, v0, 2
; GFX900-NEXT: v_readlane_b32 s37, v0, 1
; GFX900-NEXT: v_readlane_b32 s36, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -22352,8 +22352,8 @@ define void @s_shuffle_v2i64_v8i64__13_7() {
; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s46, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s47, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
@@ -22375,8 +22375,8 @@ define void @s_shuffle_v2i64_v8i64__13_7() {
; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s47, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s46, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -22429,8 +22429,8 @@ define void @s_shuffle_v2i64_v8i64__14_7() {
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_writelane_b32 v0, s36, 0
; GFX900-NEXT: v_writelane_b32 v0, s37, 1
-; GFX900-NEXT: v_writelane_b32 v0, s46, 2
-; GFX900-NEXT: v_writelane_b32 v0, s47, 3
+; GFX900-NEXT: v_writelane_b32 v0, s38, 2
+; GFX900-NEXT: v_writelane_b32 v0, s39, 3
; GFX900-NEXT: v_writelane_b32 v0, s48, 4
; GFX900-NEXT: v_writelane_b32 v0, s49, 5
; GFX900-NEXT: v_writelane_b32 v0, s50, 6
@@ -22452,8 +22452,8 @@ define void @s_shuffle_v2i64_v8i64__14_7() {
; GFX900-NEXT: v_readlane_b32 s50, v0, 6
; GFX900-NEXT: v_readlane_b32 s49, v0, 5
; GFX900-NEXT: v_readlane_b32 s48, v0, 4
-; GFX900-NEXT: v_readlane_b32 s47, v0, 3
-; GFX900-NEXT: v_readlane_b32 s46, v0, 2
+; GFX900-NEXT: v_readlane_b32 s39, v0, 3
+; GFX900-NEXT: v_readlane_b32 s38, v0, 2
; GFX900-NEXT: v_readlane_b32 s37, v0, 1
; GFX900-NEXT: v_readlane_b32 s36, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -22470,8 +22470,8 @@ define void @s_shuffle_v2i64_v8i64__14_7() {
; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s46, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s47, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
@@ -22493,8 +22493,8 @@ define void @s_shuffle_v2i64_v8i64__14_7() {
; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s47, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s46, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -23411,8 +23411,8 @@ define void @s_shuffle_v2i64_v8i64__3_9() {
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_writelane_b32 v0, s36, 0
; GFX900-NEXT: v_writelane_b32 v0, s37, 1
-; GFX900-NEXT: v_writelane_b32 v0, s46, 2
-; GFX900-NEXT: v_writelane_b32 v0, s47, 3
+; GFX900-NEXT: v_writelane_b32 v0, s38, 2
+; GFX900-NEXT: v_writelane_b32 v0, s39, 3
; GFX900-NEXT: v_writelane_b32 v0, s48, 4
; GFX900-NEXT: v_writelane_b32 v0, s49, 5
; GFX900-NEXT: v_writelane_b32 v0, s50, 6
@@ -23432,8 +23432,8 @@ define void @s_shuffle_v2i64_v8i64__3_9() {
; GFX900-NEXT: v_readlane_b32 s50, v0, 6
; GFX900-NEXT: v_readlane_b32 s49, v0, 5
; GFX900-NEXT: v_readlane_b32 s48, v0, 4
-; GFX900-NEXT: v_readlane_b32 s47, v0, 3
-; GFX900-NEXT: v_readlane_b32 s46, v0, 2
+; GFX900-NEXT: v_readlane_b32 s39, v0, 3
+; GFX900-NEXT: v_readlane_b32 s38, v0, 2
; GFX900-NEXT: v_readlane_b32 s37, v0, 1
; GFX900-NEXT: v_readlane_b32 s36, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -23450,8 +23450,8 @@ define void @s_shuffle_v2i64_v8i64__3_9() {
; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s46, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s47, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
@@ -23471,8 +23471,8 @@ define void @s_shuffle_v2i64_v8i64__3_9() {
; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s47, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s46, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -23684,8 +23684,8 @@ define void @s_shuffle_v2i64_v8i64__6_9() {
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_writelane_b32 v0, s36, 0
; GFX900-NEXT: v_writelane_b32 v0, s37, 1
-; GFX900-NEXT: v_writelane_b32 v0, s46, 2
-; GFX900-NEXT: v_writelane_b32 v0, s47, 3
+; GFX900-NEXT: v_writelane_b32 v0, s38, 2
+; GFX900-NEXT: v_writelane_b32 v0, s39, 3
; GFX900-NEXT: v_writelane_b32 v0, s48, 4
; GFX900-NEXT: v_writelane_b32 v0, s49, 5
; GFX900-NEXT: v_writelane_b32 v0, s50, 6
@@ -23707,8 +23707,8 @@ define void @s_shuffle_v2i64_v8i64__6_9() {
; GFX900-NEXT: v_readlane_b32 s50, v0, 6
; GFX900-NEXT: v_readlane_b32 s49, v0, 5
; GFX900-NEXT: v_readlane_b32 s48, v0, 4
-; GFX900-NEXT: v_readlane_b32 s47, v0, 3
-; GFX900-NEXT: v_readlane_b32 s46, v0, 2
+; GFX900-NEXT: v_readlane_b32 s39, v0, 3
+; GFX900-NEXT: v_readlane_b32 s38, v0, 2
; GFX900-NEXT: v_readlane_b32 s37, v0, 1
; GFX900-NEXT: v_readlane_b32 s36, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -23725,8 +23725,8 @@ define void @s_shuffle_v2i64_v8i64__6_9() {
; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s46, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s47, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
@@ -23748,8 +23748,8 @@ define void @s_shuffle_v2i64_v8i64__6_9() {
; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s47, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s46, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -24528,8 +24528,8 @@ define void @s_shuffle_v2i64_v8i64__6_10() {
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_writelane_b32 v0, s36, 0
; GFX900-NEXT: v_writelane_b32 v0, s37, 1
-; GFX900-NEXT: v_writelane_b32 v0, s46, 2
-; GFX900-NEXT: v_writelane_b32 v0, s47, 3
+; GFX900-NEXT: v_writelane_b32 v0, s38, 2
+; GFX900-NEXT: v_writelane_b32 v0, s39, 3
; GFX900-NEXT: v_writelane_b32 v0, s48, 4
; GFX900-NEXT: v_writelane_b32 v0, s49, 5
; GFX900-NEXT: v_writelane_b32 v0, s50, 6
@@ -24551,8 +24551,8 @@ define void @s_shuffle_v2i64_v8i64__6_10() {
; GFX900-NEXT: v_readlane_b32 s50, v0, 6
; GFX900-NEXT: v_readlane_b32 s49, v0, 5
; GFX900-NEXT: v_readlane_b32 s48, v0, 4
-; GFX900-NEXT: v_readlane_b32 s47, v0, 3
-; GFX900-NEXT: v_readlane_b32 s46, v0, 2
+; GFX900-NEXT: v_readlane_b32 s39, v0, 3
+; GFX900-NEXT: v_readlane_b32 s38, v0, 2
; GFX900-NEXT: v_readlane_b32 s37, v0, 1
; GFX900-NEXT: v_readlane_b32 s36, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -24569,8 +24569,8 @@ define void @s_shuffle_v2i64_v8i64__6_10() {
; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s46, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s47, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
@@ -24592,8 +24592,8 @@ define void @s_shuffle_v2i64_v8i64__6_10() {
; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s47, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s46, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -24646,8 +24646,8 @@ define void @s_shuffle_v2i64_v8i64__7_10() {
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_writelane_b32 v0, s36, 0
; GFX900-NEXT: v_writelane_b32 v0, s37, 1
-; GFX900-NEXT: v_writelane_b32 v0, s46, 2
-; GFX900-NEXT: v_writelane_b32 v0, s47, 3
+; GFX900-NEXT: v_writelane_b32 v0, s38, 2
+; GFX900-NEXT: v_writelane_b32 v0, s39, 3
; GFX900-NEXT: v_writelane_b32 v0, s48, 4
; GFX900-NEXT: v_writelane_b32 v0, s49, 5
; GFX900-NEXT: v_writelane_b32 v0, s50, 6
@@ -24669,8 +24669,8 @@ define void @s_shuffle_v2i64_v8i64__7_10() {
; GFX900-NEXT: v_readlane_b32 s50, v0, 6
; GFX900-NEXT: v_readlane_b32 s49, v0, 5
; GFX900-NEXT: v_readlane_b32 s48, v0, 4
-; GFX900-NEXT: v_readlane_b32 s47, v0, 3
-; GFX900-NEXT: v_readlane_b32 s46, v0, 2
+; GFX900-NEXT: v_readlane_b32 s39, v0, 3
+; GFX900-NEXT: v_readlane_b32 s38, v0, 2
; GFX900-NEXT: v_readlane_b32 s37, v0, 1
; GFX900-NEXT: v_readlane_b32 s36, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -24687,8 +24687,8 @@ define void @s_shuffle_v2i64_v8i64__7_10() {
; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s46, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s47, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
@@ -24710,8 +24710,8 @@ define void @s_shuffle_v2i64_v8i64__7_10() {
; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s47, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s46, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -25159,8 +25159,8 @@ define void @s_shuffle_v2i64_v8i64__1_11() {
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_writelane_b32 v0, s36, 0
; GFX900-NEXT: v_writelane_b32 v0, s37, 1
-; GFX900-NEXT: v_writelane_b32 v0, s46, 2
-; GFX900-NEXT: v_writelane_b32 v0, s47, 3
+; GFX900-NEXT: v_writelane_b32 v0, s38, 2
+; GFX900-NEXT: v_writelane_b32 v0, s39, 3
; GFX900-NEXT: v_writelane_b32 v0, s48, 4
; GFX900-NEXT: v_writelane_b32 v0, s49, 5
; GFX900-NEXT: v_writelane_b32 v0, s50, 6
@@ -25180,8 +25180,8 @@ define void @s_shuffle_v2i64_v8i64__1_11() {
; GFX900-NEXT: v_readlane_b32 s50, v0, 6
; GFX900-NEXT: v_readlane_b32 s49, v0, 5
; GFX900-NEXT: v_readlane_b32 s48, v0, 4
-; GFX900-NEXT: v_readlane_b32 s47, v0, 3
-; GFX900-NEXT: v_readlane_b32 s46, v0, 2
+; GFX900-NEXT: v_readlane_b32 s39, v0, 3
+; GFX900-NEXT: v_readlane_b32 s38, v0, 2
; GFX900-NEXT: v_readlane_b32 s37, v0, 1
; GFX900-NEXT: v_readlane_b32 s36, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -25198,8 +25198,8 @@ define void @s_shuffle_v2i64_v8i64__1_11() {
; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s46, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s47, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
@@ -25219,8 +25219,8 @@ define void @s_shuffle_v2i64_v8i64__1_11() {
; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s47, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s46, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -25544,8 +25544,8 @@ define void @s_shuffle_v2i64_v8i64__6_11() {
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_writelane_b32 v0, s36, 0
; GFX900-NEXT: v_writelane_b32 v0, s37, 1
-; GFX900-NEXT: v_writelane_b32 v0, s46, 2
-; GFX900-NEXT: v_writelane_b32 v0, s47, 3
+; GFX900-NEXT: v_writelane_b32 v0, s38, 2
+; GFX900-NEXT: v_writelane_b32 v0, s39, 3
; GFX900-NEXT: v_writelane_b32 v0, s48, 4
; GFX900-NEXT: v_writelane_b32 v0, s49, 5
; GFX900-NEXT: v_writelane_b32 v0, s50, 6
@@ -25567,8 +25567,8 @@ define void @s_shuffle_v2i64_v8i64__6_11() {
; GFX900-NEXT: v_readlane_b32 s50, v0, 6
; GFX900-NEXT: v_readlane_b32 s49, v0, 5
; GFX900-NEXT: v_readlane_b32 s48, v0, 4
-; GFX900-NEXT: v_readlane_b32 s47, v0, 3
-; GFX900-NEXT: v_readlane_b32 s46, v0, 2
+; GFX900-NEXT: v_readlane_b32 s39, v0, 3
+; GFX900-NEXT: v_readlane_b32 s38, v0, 2
; GFX900-NEXT: v_readlane_b32 s37, v0, 1
; GFX900-NEXT: v_readlane_b32 s36, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -25585,8 +25585,8 @@ define void @s_shuffle_v2i64_v8i64__6_11() {
; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s46, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s47, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
@@ -25608,8 +25608,8 @@ define void @s_shuffle_v2i64_v8i64__6_11() {
; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s47, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s46, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -26388,8 +26388,8 @@ define void @s_shuffle_v2i64_v8i64__6_12() {
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_writelane_b32 v0, s36, 0
; GFX900-NEXT: v_writelane_b32 v0, s37, 1
-; GFX900-NEXT: v_writelane_b32 v0, s46, 2
-; GFX900-NEXT: v_writelane_b32 v0, s47, 3
+; GFX900-NEXT: v_writelane_b32 v0, s38, 2
+; GFX900-NEXT: v_writelane_b32 v0, s39, 3
; GFX900-NEXT: v_writelane_b32 v0, s48, 4
; GFX900-NEXT: v_writelane_b32 v0, s49, 5
; GFX900-NEXT: v_writelane_b32 v0, s50, 6
@@ -26411,8 +26411,8 @@ define void @s_shuffle_v2i64_v8i64__6_12() {
; GFX900-NEXT: v_readlane_b32 s50, v0, 6
; GFX900-NEXT: v_readlane_b32 s49, v0, 5
; GFX900-NEXT: v_readlane_b32 s48, v0, 4
-; GFX900-NEXT: v_readlane_b32 s47, v0, 3
-; GFX900-NEXT: v_readlane_b32 s46, v0, 2
+; GFX900-NEXT: v_readlane_b32 s39, v0, 3
+; GFX900-NEXT: v_readlane_b32 s38, v0, 2
; GFX900-NEXT: v_readlane_b32 s37, v0, 1
; GFX900-NEXT: v_readlane_b32 s36, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -26429,8 +26429,8 @@ define void @s_shuffle_v2i64_v8i64__6_12() {
; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s46, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s47, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
@@ -26452,8 +26452,8 @@ define void @s_shuffle_v2i64_v8i64__6_12() {
; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s47, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s46, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -26506,8 +26506,8 @@ define void @s_shuffle_v2i64_v8i64__7_12() {
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_writelane_b32 v0, s36, 0
; GFX900-NEXT: v_writelane_b32 v0, s37, 1
-; GFX900-NEXT: v_writelane_b32 v0, s46, 2
-; GFX900-NEXT: v_writelane_b32 v0, s47, 3
+; GFX900-NEXT: v_writelane_b32 v0, s38, 2
+; GFX900-NEXT: v_writelane_b32 v0, s39, 3
; GFX900-NEXT: v_writelane_b32 v0, s48, 4
; GFX900-NEXT: v_writelane_b32 v0, s49, 5
; GFX900-NEXT: v_writelane_b32 v0, s50, 6
@@ -26529,8 +26529,8 @@ define void @s_shuffle_v2i64_v8i64__7_12() {
; GFX900-NEXT: v_readlane_b32 s50, v0, 6
; GFX900-NEXT: v_readlane_b32 s49, v0, 5
; GFX900-NEXT: v_readlane_b32 s48, v0, 4
-; GFX900-NEXT: v_readlane_b32 s47, v0, 3
-; GFX900-NEXT: v_readlane_b32 s46, v0, 2
+; GFX900-NEXT: v_readlane_b32 s39, v0, 3
+; GFX900-NEXT: v_readlane_b32 s38, v0, 2
; GFX900-NEXT: v_readlane_b32 s37, v0, 1
; GFX900-NEXT: v_readlane_b32 s36, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -26547,8 +26547,8 @@ define void @s_shuffle_v2i64_v8i64__7_12() {
; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s46, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s47, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
@@ -26570,8 +26570,8 @@ define void @s_shuffle_v2i64_v8i64__7_12() {
; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s47, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s46, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -27283,8 +27283,8 @@ define void @s_shuffle_v2i64_v8i64__6_13() {
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_writelane_b32 v0, s36, 0
; GFX900-NEXT: v_writelane_b32 v0, s37, 1
-; GFX900-NEXT: v_writelane_b32 v0, s46, 2
-; GFX900-NEXT: v_writelane_b32 v0, s47, 3
+; GFX900-NEXT: v_writelane_b32 v0, s38, 2
+; GFX900-NEXT: v_writelane_b32 v0, s39, 3
; GFX900-NEXT: v_writelane_b32 v0, s48, 4
; GFX900-NEXT: v_writelane_b32 v0, s49, 5
; GFX900-NEXT: v_writelane_b32 v0, s50, 6
@@ -27306,8 +27306,8 @@ define void @s_shuffle_v2i64_v8i64__6_13() {
; GFX900-NEXT: v_readlane_b32 s50, v0, 6
; GFX900-NEXT: v_readlane_b32 s49, v0, 5
; GFX900-NEXT: v_readlane_b32 s48, v0, 4
-; GFX900-NEXT: v_readlane_b32 s47, v0, 3
-; GFX900-NEXT: v_readlane_b32 s46, v0, 2
+; GFX900-NEXT: v_readlane_b32 s39, v0, 3
+; GFX900-NEXT: v_readlane_b32 s38, v0, 2
; GFX900-NEXT: v_readlane_b32 s37, v0, 1
; GFX900-NEXT: v_readlane_b32 s36, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -27324,8 +27324,8 @@ define void @s_shuffle_v2i64_v8i64__6_13() {
; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s46, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s47, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
@@ -27347,8 +27347,8 @@ define void @s_shuffle_v2i64_v8i64__6_13() {
; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s47, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s46, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -27401,8 +27401,8 @@ define void @s_shuffle_v2i64_v8i64__7_13() {
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_writelane_b32 v0, s36, 0
; GFX900-NEXT: v_writelane_b32 v0, s37, 1
-; GFX900-NEXT: v_writelane_b32 v0, s46, 2
-; GFX900-NEXT: v_writelane_b32 v0, s47, 3
+; GFX900-NEXT: v_writelane_b32 v0, s38, 2
+; GFX900-NEXT: v_writelane_b32 v0, s39, 3
; GFX900-NEXT: v_writelane_b32 v0, s48, 4
; GFX900-NEXT: v_writelane_b32 v0, s49, 5
; GFX900-NEXT: v_writelane_b32 v0, s50, 6
@@ -27424,8 +27424,8 @@ define void @s_shuffle_v2i64_v8i64__7_13() {
; GFX900-NEXT: v_readlane_b32 s50, v0, 6
; GFX900-NEXT: v_readlane_b32 s49, v0, 5
; GFX900-NEXT: v_readlane_b32 s48, v0, 4
-; GFX900-NEXT: v_readlane_b32 s47, v0, 3
-; GFX900-NEXT: v_readlane_b32 s46, v0, 2
+; GFX900-NEXT: v_readlane_b32 s39, v0, 3
+; GFX900-NEXT: v_readlane_b32 s38, v0, 2
; GFX900-NEXT: v_readlane_b32 s37, v0, 1
; GFX900-NEXT: v_readlane_b32 s36, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -27442,8 +27442,8 @@ define void @s_shuffle_v2i64_v8i64__7_13() {
; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s46, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s47, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
@@ -27465,8 +27465,8 @@ define void @s_shuffle_v2i64_v8i64__7_13() {
; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s47, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s46, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -28241,8 +28241,8 @@ define void @s_shuffle_v2i64_v8i64__6_14() {
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_writelane_b32 v0, s36, 0
; GFX900-NEXT: v_writelane_b32 v0, s37, 1
-; GFX900-NEXT: v_writelane_b32 v0, s46, 2
-; GFX900-NEXT: v_writelane_b32 v0, s47, 3
+; GFX900-NEXT: v_writelane_b32 v0, s38, 2
+; GFX900-NEXT: v_writelane_b32 v0, s39, 3
; GFX900-NEXT: v_writelane_b32 v0, s48, 4
; GFX900-NEXT: v_writelane_b32 v0, s49, 5
; GFX900-NEXT: v_writelane_b32 v0, s50, 6
@@ -28264,8 +28264,8 @@ define void @s_shuffle_v2i64_v8i64__6_14() {
; GFX900-NEXT: v_readlane_b32 s50, v0, 6
; GFX900-NEXT: v_readlane_b32 s49, v0, 5
; GFX900-NEXT: v_readlane_b32 s48, v0, 4
-; GFX900-NEXT: v_readlane_b32 s47, v0, 3
-; GFX900-NEXT: v_readlane_b32 s46, v0, 2
+; GFX900-NEXT: v_readlane_b32 s39, v0, 3
+; GFX900-NEXT: v_readlane_b32 s38, v0, 2
; GFX900-NEXT: v_readlane_b32 s37, v0, 1
; GFX900-NEXT: v_readlane_b32 s36, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -28282,8 +28282,8 @@ define void @s_shuffle_v2i64_v8i64__6_14() {
; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s46, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s47, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
@@ -28305,8 +28305,8 @@ define void @s_shuffle_v2i64_v8i64__6_14() {
; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s47, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s46, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -28359,8 +28359,8 @@ define void @s_shuffle_v2i64_v8i64__7_14() {
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_writelane_b32 v0, s36, 0
; GFX900-NEXT: v_writelane_b32 v0, s37, 1
-; GFX900-NEXT: v_writelane_b32 v0, s46, 2
-; GFX900-NEXT: v_writelane_b32 v0, s47, 3
+; GFX900-NEXT: v_writelane_b32 v0, s38, 2
+; GFX900-NEXT: v_writelane_b32 v0, s39, 3
; GFX900-NEXT: v_writelane_b32 v0, s48, 4
; GFX900-NEXT: v_writelane_b32 v0, s49, 5
; GFX900-NEXT: v_writelane_b32 v0, s50, 6
@@ -28382,8 +28382,8 @@ define void @s_shuffle_v2i64_v8i64__7_14() {
; GFX900-NEXT: v_readlane_b32 s50, v0, 6
; GFX900-NEXT: v_readlane_b32 s49, v0, 5
; GFX900-NEXT: v_readlane_b32 s48, v0, 4
-; GFX900-NEXT: v_readlane_b32 s47, v0, 3
-; GFX900-NEXT: v_readlane_b32 s46, v0, 2
+; GFX900-NEXT: v_readlane_b32 s39, v0, 3
+; GFX900-NEXT: v_readlane_b32 s38, v0, 2
; GFX900-NEXT: v_readlane_b32 s37, v0, 1
; GFX900-NEXT: v_readlane_b32 s36, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -28400,8 +28400,8 @@ define void @s_shuffle_v2i64_v8i64__7_14() {
; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s46, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s47, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
@@ -28423,8 +28423,8 @@ define void @s_shuffle_v2i64_v8i64__7_14() {
; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s47, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s46, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -29223,8 +29223,8 @@ define void @s_shuffle_v2i64_v8i64__6_15() {
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_writelane_b32 v0, s36, 0
; GFX900-NEXT: v_writelane_b32 v0, s37, 1
-; GFX900-NEXT: v_writelane_b32 v0, s46, 2
-; GFX900-NEXT: v_writelane_b32 v0, s47, 3
+; GFX900-NEXT: v_writelane_b32 v0, s38, 2
+; GFX900-NEXT: v_writelane_b32 v0, s39, 3
; GFX900-NEXT: v_writelane_b32 v0, s48, 4
; GFX900-NEXT: v_writelane_b32 v0, s49, 5
; GFX900-NEXT: v_writelane_b32 v0, s50, 6
@@ -29246,8 +29246,8 @@ define void @s_shuffle_v2i64_v8i64__6_15() {
; GFX900-NEXT: v_readlane_b32 s50, v0, 6
; GFX900-NEXT: v_readlane_b32 s49, v0, 5
; GFX900-NEXT: v_readlane_b32 s48, v0, 4
-; GFX900-NEXT: v_readlane_b32 s47, v0, 3
-; GFX900-NEXT: v_readlane_b32 s46, v0, 2
+; GFX900-NEXT: v_readlane_b32 s39, v0, 3
+; GFX900-NEXT: v_readlane_b32 s38, v0, 2
; GFX900-NEXT: v_readlane_b32 s37, v0, 1
; GFX900-NEXT: v_readlane_b32 s36, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -29264,8 +29264,8 @@ define void @s_shuffle_v2i64_v8i64__6_15() {
; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s46, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s47, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
@@ -29287,8 +29287,8 @@ define void @s_shuffle_v2i64_v8i64__6_15() {
; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s47, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s46, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -29341,8 +29341,8 @@ define void @s_shuffle_v2i64_v8i64__7_15() {
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: v_writelane_b32 v0, s36, 0
; GFX900-NEXT: v_writelane_b32 v0, s37, 1
-; GFX900-NEXT: v_writelane_b32 v0, s46, 2
-; GFX900-NEXT: v_writelane_b32 v0, s47, 3
+; GFX900-NEXT: v_writelane_b32 v0, s38, 2
+; GFX900-NEXT: v_writelane_b32 v0, s39, 3
; GFX900-NEXT: v_writelane_b32 v0, s48, 4
; GFX900-NEXT: v_writelane_b32 v0, s49, 5
; GFX900-NEXT: v_writelane_b32 v0, s50, 6
@@ -29364,8 +29364,8 @@ define void @s_shuffle_v2i64_v8i64__7_15() {
; GFX900-NEXT: v_readlane_b32 s50, v0, 6
; GFX900-NEXT: v_readlane_b32 s49, v0, 5
; GFX900-NEXT: v_readlane_b32 s48, v0, 4
-; GFX900-NEXT: v_readlane_b32 s47, v0, 3
-; GFX900-NEXT: v_readlane_b32 s46, v0, 2
+; GFX900-NEXT: v_readlane_b32 s39, v0, 3
+; GFX900-NEXT: v_readlane_b32 s38, v0, 2
; GFX900-NEXT: v_readlane_b32 s37, v0, 1
; GFX900-NEXT: v_readlane_b32 s36, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
@@ -29382,8 +29382,8 @@ define void @s_shuffle_v2i64_v8i64__7_15() {
; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s46, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s47, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
@@ -29405,8 +29405,8 @@ define void @s_shuffle_v2i64_v8i64__7_15() {
; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s47, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s46, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
diff --git a/llvm/test/CodeGen/AMDGPU/sibling-call.ll b/llvm/test/CodeGen/AMDGPU/sibling-call.ll
index 3447cd161c653..0221bb0cf4f35 100644
--- a/llvm/test/CodeGen/AMDGPU/sibling-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/sibling-call.ll
@@ -610,16 +610,16 @@ define hidden fastcc i32 @indirect_divergent_sibling_call_i32_fastcc_i32_i32(ptr
; FIJI-NEXT: v_writelane_b32 v40, s35, 3
; FIJI-NEXT: v_writelane_b32 v40, s36, 4
; FIJI-NEXT: v_writelane_b32 v40, s37, 5
-; FIJI-NEXT: v_writelane_b32 v40, s46, 6
-; FIJI-NEXT: v_writelane_b32 v40, s47, 7
+; FIJI-NEXT: v_writelane_b32 v40, s38, 6
+; FIJI-NEXT: v_writelane_b32 v40, s39, 7
; FIJI-NEXT: v_writelane_b32 v40, s48, 8
; FIJI-NEXT: v_writelane_b32 v40, s49, 9
; FIJI-NEXT: v_writelane_b32 v40, s50, 10
; FIJI-NEXT: v_writelane_b32 v40, s51, 11
; FIJI-NEXT: v_writelane_b32 v40, s52, 12
; FIJI-NEXT: v_writelane_b32 v40, s53, 13
-; FIJI-NEXT: v_writelane_b32 v40, s62, 14
-; FIJI-NEXT: v_writelane_b32 v40, s63, 15
+; FIJI-NEXT: v_writelane_b32 v40, s54, 14
+; FIJI-NEXT: v_writelane_b32 v40, s55, 15
; FIJI-NEXT: v_writelane_b32 v40, s64, 16
; FIJI-NEXT: s_mov_b32 s50, s15
; FIJI-NEXT: s_mov_b32 s51, s14
@@ -627,10 +627,10 @@ define hidden fastcc i32 @indirect_divergent_sibling_call_i32_fastcc_i32_i32(ptr
; FIJI-NEXT: s_mov_b32 s53, s12
; FIJI-NEXT: s_mov_b64 s[34:35], s[10:11]
; FIJI-NEXT: s_mov_b64 s[36:37], s[8:9]
-; FIJI-NEXT: s_mov_b64 s[46:47], s[6:7]
+; FIJI-NEXT: s_mov_b64 s[38:39], s[6:7]
; FIJI-NEXT: s_mov_b64 s[48:49], s[4:5]
; FIJI-NEXT: v_add_u32_e32 v3, vcc, v3, v4
-; FIJI-NEXT: s_mov_b64 s[62:63], exec
+; FIJI-NEXT: s_mov_b64 s[54:55], exec
; FIJI-NEXT: s_addk_i32 s32, 0x400
; FIJI-NEXT: v_writelane_b32 v40, s65, 17
; FIJI-NEXT: .LBB18_1: ; =>This Inner Loop Header: Depth=1
@@ -639,7 +639,7 @@ define hidden fastcc i32 @indirect_divergent_sibling_call_i32_fastcc_i32_i32(ptr
; FIJI-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
; FIJI-NEXT: s_and_saveexec_b64 s[64:65], vcc
; FIJI-NEXT: s_mov_b64 s[4:5], s[48:49]
-; FIJI-NEXT: s_mov_b64 s[6:7], s[46:47]
+; FIJI-NEXT: s_mov_b64 s[6:7], s[38:39]
; FIJI-NEXT: s_mov_b64 s[8:9], s[36:37]
; FIJI-NEXT: s_mov_b64 s[10:11], s[34:35]
; FIJI-NEXT: s_mov_b32 s12, s53
@@ -657,20 +657,20 @@ define hidden fastcc i32 @indirect_divergent_sibling_call_i32_fastcc_i32_i32(ptr
; FIJI-NEXT: s_xor_b64 exec, exec, s[64:65]
; FIJI-NEXT: s_cbranch_execnz .LBB18_1
; FIJI-NEXT: ; %bb.2:
-; FIJI-NEXT: s_mov_b64 exec, s[62:63]
+; FIJI-NEXT: s_mov_b64 exec, s[54:55]
; FIJI-NEXT: v_mov_b32_e32 v0, v4
; FIJI-NEXT: v_readlane_b32 s65, v40, 17
; FIJI-NEXT: v_readlane_b32 s64, v40, 16
-; FIJI-NEXT: v_readlane_b32 s63, v40, 15
-; FIJI-NEXT: v_readlane_b32 s62, v40, 14
+; FIJI-NEXT: v_readlane_b32 s55, v40, 15
+; FIJI-NEXT: v_readlane_b32 s54, v40, 14
; FIJI-NEXT: v_readlane_b32 s53, v40, 13
; FIJI-NEXT: v_readlane_b32 s52, v40, 12
; FIJI-NEXT: v_readlane_b32 s51, v40, 11
; FIJI-NEXT: v_readlane_b32 s50, v40, 10
; FIJI-NEXT: v_readlane_b32 s49, v40, 9
; FIJI-NEXT: v_readlane_b32 s48, v40, 8
-; FIJI-NEXT: v_readlane_b32 s47, v40, 7
-; FIJI-NEXT: v_readlane_b32 s46, v40, 6
+; FIJI-NEXT: v_readlane_b32 s39, v40, 7
+; FIJI-NEXT: v_readlane_b32 s38, v40, 6
; FIJI-NEXT: v_readlane_b32 s37, v40, 5
; FIJI-NEXT: v_readlane_b32 s36, v40, 4
; FIJI-NEXT: v_readlane_b32 s35, v40, 3
@@ -701,16 +701,16 @@ define hidden fastcc i32 @indirect_divergent_sibling_call_i32_fastcc_i32_i32(ptr
; HAWAII-NEXT: v_writelane_b32 v40, s35, 3
; HAWAII-NEXT: v_writelane_b32 v40, s36, 4
; HAWAII-NEXT: v_writelane_b32 v40, s37, 5
-; HAWAII-NEXT: v_writelane_b32 v40, s46, 6
-; HAWAII-NEXT: v_writelane_b32 v40, s47, 7
+; HAWAII-NEXT: v_writelane_b32 v40, s38, 6
+; HAWAII-NEXT: v_writelane_b32 v40, s39, 7
; HAWAII-NEXT: v_writelane_b32 v40, s48, 8
; HAWAII-NEXT: v_writelane_b32 v40, s49, 9
; HAWAII-NEXT: v_writelane_b32 v40, s50, 10
; HAWAII-NEXT: v_writelane_b32 v40, s51, 11
; HAWAII-NEXT: v_writelane_b32 v40, s52, 12
; HAWAII-NEXT: v_writelane_b32 v40, s53, 13
-; HAWAII-NEXT: v_writelane_b32 v40, s62, 14
-; HAWAII-NEXT: v_writelane_b32 v40, s63, 15
+; HAWAII-NEXT: v_writelane_b32 v40, s54, 14
+; HAWAII-NEXT: v_writelane_b32 v40, s55, 15
; HAWAII-NEXT: v_writelane_b32 v40, s64, 16
; HAWAII-NEXT: s_mov_b32 s50, s15
; HAWAII-NEXT: s_mov_b32 s51, s14
@@ -718,10 +718,10 @@ define hidden fastcc i32 @indirect_divergent_sibling_call_i32_fastcc_i32_i32(ptr
; HAWAII-NEXT: s_mov_b32 s53, s12
; HAWAII-NEXT: s_mov_b64 s[34:35], s[10:11]
; HAWAII-NEXT: s_mov_b64 s[36:37], s[8:9]
-; HAWAII-NEXT: s_mov_b64 s[46:47], s[6:7]
+; HAWAII-NEXT: s_mov_b64 s[38:39], s[6:7]
; HAWAII-NEXT: s_mov_b64 s[48:49], s[4:5]
; HAWAII-NEXT: v_add_i32_e32 v3, vcc, v3, v4
-; HAWAII-NEXT: s_mov_b64 s[62:63], exec
+; HAWAII-NEXT: s_mov_b64 s[54:55], exec
; HAWAII-NEXT: s_addk_i32 s32, 0x400
; HAWAII-NEXT: v_writelane_b32 v40, s65, 17
; HAWAII-NEXT: .LBB18_1: ; =>This Inner Loop Header: Depth=1
@@ -730,7 +730,7 @@ define hidden fastcc i32 @indirect_divergent_sibling_call_i32_fastcc_i32_i32(ptr
; HAWAII-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
; HAWAII-NEXT: s_and_saveexec_b64 s[64:65], vcc
; HAWAII-NEXT: s_mov_b64 s[4:5], s[48:49]
-; HAWAII-NEXT: s_mov_b64 s[6:7], s[46:47]
+; HAWAII-NEXT: s_mov_b64 s[6:7], s[38:39]
; HAWAII-NEXT: s_mov_b64 s[8:9], s[36:37]
; HAWAII-NEXT: s_mov_b64 s[10:11], s[34:35]
; HAWAII-NEXT: s_mov_b32 s12, s53
@@ -748,20 +748,20 @@ define hidden fastcc i32 @indirect_divergent_sibling_call_i32_fastcc_i32_i32(ptr
; HAWAII-NEXT: s_xor_b64 exec, exec, s[64:65]
; HAWAII-NEXT: s_cbranch_execnz .LBB18_1
; HAWAII-NEXT: ; %bb.2:
-; HAWAII-NEXT: s_mov_b64 exec, s[62:63]
+; HAWAII-NEXT: s_mov_b64 exec, s[54:55]
; HAWAII-NEXT: v_mov_b32_e32 v0, v4
; HAWAII-NEXT: v_readlane_b32 s65, v40, 17
; HAWAII-NEXT: v_readlane_b32 s64, v40, 16
-; HAWAII-NEXT: v_readlane_b32 s63, v40, 15
-; HAWAII-NEXT: v_readlane_b32 s62, v40, 14
+; HAWAII-NEXT: v_readlane_b32 s55, v40, 15
+; HAWAII-NEXT: v_readlane_b32 s54, v40, 14
; HAWAII-NEXT: v_readlane_b32 s53, v40, 13
; HAWAII-NEXT: v_readlane_b32 s52, v40, 12
; HAWAII-NEXT: v_readlane_b32 s51, v40, 11
; HAWAII-NEXT: v_readlane_b32 s50, v40, 10
; HAWAII-NEXT: v_readlane_b32 s49, v40, 9
; HAWAII-NEXT: v_readlane_b32 s48, v40, 8
-; HAWAII-NEXT: v_readlane_b32 s47, v40, 7
-; HAWAII-NEXT: v_readlane_b32 s46, v40, 6
+; HAWAII-NEXT: v_readlane_b32 s39, v40, 7
+; HAWAII-NEXT: v_readlane_b32 s38, v40, 6
; HAWAII-NEXT: v_readlane_b32 s37, v40, 5
; HAWAII-NEXT: v_readlane_b32 s36, v40, 4
; HAWAII-NEXT: v_readlane_b32 s35, v40, 3
@@ -792,16 +792,16 @@ define hidden fastcc i32 @indirect_divergent_sibling_call_i32_fastcc_i32_i32(ptr
; GFX9-NEXT: v_writelane_b32 v40, s35, 3
; GFX9-NEXT: v_writelane_b32 v40, s36, 4
; GFX9-NEXT: v_writelane_b32 v40, s37, 5
-; GFX9-NEXT: v_writelane_b32 v40, s46, 6
-; GFX9-NEXT: v_writelane_b32 v40, s47, 7
+; GFX9-NEXT: v_writelane_b32 v40, s38, 6
+; GFX9-NEXT: v_writelane_b32 v40, s39, 7
; GFX9-NEXT: v_writelane_b32 v40, s48, 8
; GFX9-NEXT: v_writelane_b32 v40, s49, 9
; GFX9-NEXT: v_writelane_b32 v40, s50, 10
; GFX9-NEXT: v_writelane_b32 v40, s51, 11
; GFX9-NEXT: v_writelane_b32 v40, s52, 12
; GFX9-NEXT: v_writelane_b32 v40, s53, 13
-; GFX9-NEXT: v_writelane_b32 v40, s62, 14
-; GFX9-NEXT: v_writelane_b32 v40, s63, 15
+; GFX9-NEXT: v_writelane_b32 v40, s54, 14
+; GFX9-NEXT: v_writelane_b32 v40, s55, 15
; GFX9-NEXT: v_writelane_b32 v40, s64, 16
; GFX9-NEXT: s_mov_b32 s50, s15
; GFX9-NEXT: s_mov_b32 s51, s14
@@ -809,10 +809,10 @@ define hidden fastcc i32 @indirect_divergent_sibling_call_i32_fastcc_i32_i32(ptr
; GFX9-NEXT: s_mov_b32 s53, s12
; GFX9-NEXT: s_mov_b64 s[34:35], s[10:11]
; GFX9-NEXT: s_mov_b64 s[36:37], s[8:9]
-; GFX9-NEXT: s_mov_b64 s[46:47], s[6:7]
+; GFX9-NEXT: s_mov_b64 s[38:39], s[6:7]
; GFX9-NEXT: s_mov_b64 s[48:49], s[4:5]
; GFX9-NEXT: v_add_u32_e32 v3, v3, v4
-; GFX9-NEXT: s_mov_b64 s[62:63], exec
+; GFX9-NEXT: s_mov_b64 s[54:55], exec
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s65, 17
; GFX9-NEXT: .LBB18_1: ; =>This Inner Loop Header: Depth=1
@@ -821,7 +821,7 @@ define hidden fastcc i32 @indirect_divergent_sibling_call_i32_fastcc_i32_i32(ptr
; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
; GFX9-NEXT: s_and_saveexec_b64 s[64:65], vcc
; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39]
; GFX9-NEXT: s_mov_b64 s[8:9], s[36:37]
; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
; GFX9-NEXT: s_mov_b32 s12, s53
@@ -839,20 +839,20 @@ define hidden fastcc i32 @indirect_divergent_sibling_call_i32_fastcc_i32_i32(ptr
; GFX9-NEXT: s_xor_b64 exec, exec, s[64:65]
; GFX9-NEXT: s_cbranch_execnz .LBB18_1
; GFX9-NEXT: ; %bb.2:
-; GFX9-NEXT: s_mov_b64 exec, s[62:63]
+; GFX9-NEXT: s_mov_b64 exec, s[54:55]
; GFX9-NEXT: v_mov_b32_e32 v0, v4
; GFX9-NEXT: v_readlane_b32 s65, v40, 17
; GFX9-NEXT: v_readlane_b32 s64, v40, 16
-; GFX9-NEXT: v_readlane_b32 s63, v40, 15
-; GFX9-NEXT: v_readlane_b32 s62, v40, 14
+; GFX9-NEXT: v_readlane_b32 s55, v40, 15
+; GFX9-NEXT: v_readlane_b32 s54, v40, 14
; GFX9-NEXT: v_readlane_b32 s53, v40, 13
; GFX9-NEXT: v_readlane_b32 s52, v40, 12
; GFX9-NEXT: v_readlane_b32 s51, v40, 11
; GFX9-NEXT: v_readlane_b32 s50, v40, 10
; GFX9-NEXT: v_readlane_b32 s49, v40, 9
; GFX9-NEXT: v_readlane_b32 s48, v40, 8
-; GFX9-NEXT: v_readlane_b32 s47, v40, 7
-; GFX9-NEXT: v_readlane_b32 s46, v40, 6
+; GFX9-NEXT: v_readlane_b32 s39, v40, 7
+; GFX9-NEXT: v_readlane_b32 s38, v40, 6
; GFX9-NEXT: v_readlane_b32 s37, v40, 5
; GFX9-NEXT: v_readlane_b32 s36, v40, 4
; GFX9-NEXT: v_readlane_b32 s35, v40, 3
diff --git a/llvm/test/CodeGen/AMDGPU/snippet-copy-bundle-regression.mir b/llvm/test/CodeGen/AMDGPU/snippet-copy-bundle-regression.mir
index 6b5c624356f47..cf23a9d1e8a57 100644
--- a/llvm/test/CodeGen/AMDGPU/snippet-copy-bundle-regression.mir
+++ b/llvm/test/CodeGen/AMDGPU/snippet-copy-bundle-regression.mir
@@ -34,78 +34,56 @@ body: |
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $sgpr34_sgpr35 = IMPLICIT_DEF
; CHECK-NEXT: dead [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: renamable $sgpr49 = IMPLICIT_DEF
- ; CHECK-NEXT: renamable $sgpr46_sgpr47 = COPY undef $sgpr8_sgpr9
+ ; CHECK-NEXT: renamable $sgpr35 = IMPLICIT_DEF
+ ; CHECK-NEXT: renamable $sgpr38_sgpr39 = COPY undef $sgpr8_sgpr9
; CHECK-NEXT: renamable $sgpr36_sgpr37 = IMPLICIT_DEF
- ; CHECK-NEXT: renamable $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX8_IMM renamable $sgpr46_sgpr47, 0, 0 :: (dereferenceable invariant load (s256), align 16, addrspace 4)
- ; CHECK-NEXT: dead renamable $sgpr4 = S_LOAD_DWORD_IMM renamable $sgpr46_sgpr47, 48, 0 :: (dereferenceable invariant load (s32), align 16, addrspace 4)
- ; CHECK-NEXT: dead renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM renamable $sgpr8_sgpr9, 0, 0 :: (invariant load (s64), align 16, addrspace 4)
+ ; CHECK-NEXT: renamable $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 = S_LOAD_DWORDX8_IMM renamable $sgpr38_sgpr39, 0, 0 :: (dereferenceable invariant load (s256), align 16, addrspace 4)
+ ; CHECK-NEXT: dead renamable $sgpr4 = S_LOAD_DWORD_IMM renamable $sgpr38_sgpr39, 48, 0 :: (dereferenceable invariant load (s32), align 16, addrspace 4)
+ ; CHECK-NEXT: dead renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM renamable $sgpr48_sgpr49, 0, 0 :: (invariant load (s64), align 16, addrspace 4)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
- ; CHECK-NEXT: renamable $sgpr10_sgpr11 = COPY renamable $sgpr14_sgpr15
- ; CHECK-NEXT: renamable $sgpr9 = COPY renamable $sgpr13
- ; CHECK-NEXT: renamable $vgpr23 = IMPLICIT_DEF
- ; CHECK-NEXT: $vgpr23 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, killed $vgpr23, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11
- ; CHECK-NEXT: $vgpr23 = SI_SPILL_S32_TO_VGPR $sgpr5, 1, killed $vgpr23
- ; CHECK-NEXT: $vgpr23 = SI_SPILL_S32_TO_VGPR $sgpr6, 2, killed $vgpr23
- ; CHECK-NEXT: $vgpr23 = SI_SPILL_S32_TO_VGPR $sgpr7, 3, killed $vgpr23
- ; CHECK-NEXT: $vgpr23 = SI_SPILL_S32_TO_VGPR $sgpr8, 4, killed $vgpr23
- ; CHECK-NEXT: $vgpr23 = SI_SPILL_S32_TO_VGPR $sgpr9, 5, killed $vgpr23
- ; CHECK-NEXT: $vgpr23 = SI_SPILL_S32_TO_VGPR $sgpr10, 6, killed $vgpr23
- ; CHECK-NEXT: $vgpr23 = SI_SPILL_S32_TO_VGPR $sgpr11, 7, killed $vgpr23, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11
- ; CHECK-NEXT: SI_SPILL_WWM_V32_SAVE killed $vgpr23, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
- ; CHECK-NEXT: $vgpr1 = COPY killed renamable $sgpr15
+ ; CHECK-NEXT: $vgpr1 = COPY renamable $sgpr55
; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL undef renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit undef $sgpr15, implicit $vgpr31, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
- ; CHECK-NEXT: $vcc = COPY renamable $sgpr48_sgpr49
+ ; CHECK-NEXT: $vcc = COPY renamable $sgpr34_sgpr35
; CHECK-NEXT: S_CBRANCH_VCCZ %bb.2, implicit undef $vcc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.3(0x80000000)
- ; CHECK-NEXT: liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr46_sgpr47
+ ; CHECK-NEXT: liveins: $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55:0x000000000000FC00
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX8_IMM renamable $sgpr46_sgpr47, 56, 0 :: (dereferenceable invariant load (s256), align 8, addrspace 4)
+ ; CHECK-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX8_IMM renamable $sgpr38_sgpr39, 56, 0 :: (dereferenceable invariant load (s256), align 8, addrspace 4)
; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x80000000)
- ; CHECK-NEXT: liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr46_sgpr47
+ ; CHECK-NEXT: liveins: $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55:0x000000000000FC00
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX8_IMM renamable $sgpr46_sgpr47, 56, 0 :: (dereferenceable invariant load (s256), align 8, addrspace 4)
+ ; CHECK-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX8_IMM renamable $sgpr38_sgpr39, 56, 0 :: (dereferenceable invariant load (s256), align 8, addrspace 4)
; CHECK-NEXT: S_CMP_LG_U64 renamable $sgpr4_sgpr5, 0, implicit-def $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000)
- ; CHECK-NEXT: liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr46_sgpr47, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11:0x00000000000003F0
+ ; CHECK-NEXT: liveins: $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11:0x00000000000003F0, $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55:0x000000000000FC00
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_CBRANCH_VCCZ %bb.5, implicit undef $vcc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.5(0x80000000)
- ; CHECK-NEXT: liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr46_sgpr47, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11:0x00000000000003F0
+ ; CHECK-NEXT: liveins: $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11:0x00000000000003F0, $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55:0x000000000000FC00
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_CMP_EQ_U32 renamable $sgpr8, 0, implicit-def $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
- ; CHECK-NEXT: liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr46_sgpr47, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11:0x00000000000000F0
+ ; CHECK-NEXT: liveins: $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11:0x00000000000000F0, $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55:0x000000000000FC00
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: dead renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr46_sgpr47, 40, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+ ; CHECK-NEXT: dead renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr38_sgpr39, 40, 0 :: (dereferenceable invariant load (s64), addrspace 4)
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR undef [[DEF]], undef [[DEF]], killed renamable $sgpr6_sgpr7, 0, 0, implicit $exec :: (store (s32), addrspace 1)
- ; CHECK-NEXT: renamable $vgpr23 = SI_SPILL_WWM_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
- ; CHECK-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr23, 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11
- ; CHECK-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR $vgpr23, 1
- ; CHECK-NEXT: $sgpr6 = SI_RESTORE_S32_FROM_VGPR $vgpr23, 2
- ; CHECK-NEXT: $sgpr7 = SI_RESTORE_S32_FROM_VGPR $vgpr23, 3
- ; CHECK-NEXT: $sgpr8 = SI_RESTORE_S32_FROM_VGPR $vgpr23, 4
- ; CHECK-NEXT: $sgpr9 = SI_RESTORE_S32_FROM_VGPR $vgpr23, 5
- ; CHECK-NEXT: $sgpr10 = SI_RESTORE_S32_FROM_VGPR $vgpr23, 6
- ; CHECK-NEXT: $sgpr11 = SI_RESTORE_S32_FROM_VGPR killed $vgpr23, 7
- ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR undef [[DEF]], undef [[DEF]], renamable $sgpr10_sgpr11, 0, 0, implicit $exec :: (store (s32), addrspace 1)
- ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr9
+ ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR undef [[DEF]], undef [[DEF]], renamable $sgpr54_sgpr55, 0, 0, implicit $exec :: (store (s32), addrspace 1)
+ ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr53
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
; CHECK-NEXT: $sgpr6_sgpr7 = COPY killed renamable $sgpr36_sgpr37
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY killed renamable $sgpr34_sgpr35
+ ; CHECK-NEXT: renamable $sgpr10_sgpr11 = IMPLICIT_DEF
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
; CHECK-NEXT: S_ENDPGM 0
bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir b/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir
index adaef348a0388..7f4f9489ea4b7 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir
@@ -53,7 +53,7 @@ body: |
bb.0:
liveins: $sgpr30_sgpr31, $sgpr10, $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71, $sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79, $sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87, $sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
; GCN-LABEL: name: sgpr_spill_lane_crossover
- ; GCN: liveins: $sgpr10, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr94, $sgpr95, $vgpr63, $sgpr30_sgpr31, $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71, $sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79, $sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87, $sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+ ; GCN: liveins: $sgpr10, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $vgpr63, $sgpr30_sgpr31, $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71, $sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79, $sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87, $sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
; GCN-NEXT: {{ $}}
; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr64, 0, $vgpr63
; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr65, 1, $vgpr63
@@ -61,16 +61,16 @@ body: |
; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr67, 3, $vgpr63
; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr68, 4, $vgpr63
; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr69, 5, $vgpr63
- ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr78, 6, $vgpr63
- ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr79, 7, $vgpr63
+ ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr70, 6, $vgpr63
+ ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr71, 7, $vgpr63
; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr80, 8, $vgpr63
; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr81, 9, $vgpr63
; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr82, 10, $vgpr63
; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr83, 11, $vgpr63
; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr84, 12, $vgpr63
; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr85, 13, $vgpr63
- ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr94, 14, $vgpr63
- ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr95, 15, $vgpr63
+ ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr86, 14, $vgpr63
+ ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr87, 15, $vgpr63
; GCN-NEXT: S_NOP 0
; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 0, [[DEF]]
diff --git a/llvm/test/CodeGen/AMDGPU/spill-sgpr-used-for-exec-copy.mir b/llvm/test/CodeGen/AMDGPU/spill-sgpr-used-for-exec-copy.mir
index fcd835c7f09da..6e8a5126ca823 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-sgpr-used-for-exec-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-sgpr-used-for-exec-copy.mir
@@ -60,11 +60,11 @@ body: |
; GCN-NEXT: {{ $}}
; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr30, 0, killed $vgpr0
; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr31, 1, killed $vgpr0
- ; GCN-NEXT: $sgpr38_sgpr39 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+ ; GCN-NEXT: $sgpr40_sgpr41 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
; GCN-NEXT: $sgpr28_sgpr29 = IMPLICIT_DEF
; GCN-NEXT: $vgpr1 = COPY $vgpr0
; GCN-NEXT: S_NOP 0, implicit $sgpr28_sgpr29
- ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr38_sgpr39
+ ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr40_sgpr41
; GCN-NEXT: $sgpr30 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 0
; GCN-NEXT: $sgpr31 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 1
; GCN-NEXT: S_SETPC_B64 $sgpr30_sgpr31, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8_sgpr9_sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr15, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $vcc
diff --git a/llvm/test/CodeGen/AMDGPU/spill_more_than_wavesize_csr_sgprs.ll b/llvm/test/CodeGen/AMDGPU/spill_more_than_wavesize_csr_sgprs.ll
index f7ea8109beea4..fba85455ef693 100644
--- a/llvm/test/CodeGen/AMDGPU/spill_more_than_wavesize_csr_sgprs.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill_more_than_wavesize_csr_sgprs.ll
@@ -11,32 +11,32 @@ define void @spill_more_than_wavesize_csr_sgprs() {
; CHECK-NEXT: v_writelane_b32 v0, s35, 0
; CHECK-NEXT: v_writelane_b32 v0, s36, 1
; CHECK-NEXT: v_writelane_b32 v0, s37, 2
-; CHECK-NEXT: v_writelane_b32 v0, s46, 3
-; CHECK-NEXT: v_writelane_b32 v0, s47, 4
+; CHECK-NEXT: v_writelane_b32 v0, s38, 3
+; CHECK-NEXT: v_writelane_b32 v0, s39, 4
; CHECK-NEXT: v_writelane_b32 v0, s48, 5
; CHECK-NEXT: v_writelane_b32 v0, s49, 6
; CHECK-NEXT: v_writelane_b32 v0, s50, 7
; CHECK-NEXT: v_writelane_b32 v0, s51, 8
; CHECK-NEXT: v_writelane_b32 v0, s52, 9
; CHECK-NEXT: v_writelane_b32 v0, s53, 10
-; CHECK-NEXT: v_writelane_b32 v0, s62, 11
-; CHECK-NEXT: v_writelane_b32 v0, s63, 12
+; CHECK-NEXT: v_writelane_b32 v0, s54, 11
+; CHECK-NEXT: v_writelane_b32 v0, s55, 12
; CHECK-NEXT: v_writelane_b32 v0, s64, 13
; CHECK-NEXT: v_writelane_b32 v0, s65, 14
; CHECK-NEXT: v_writelane_b32 v0, s66, 15
; CHECK-NEXT: v_writelane_b32 v0, s67, 16
; CHECK-NEXT: v_writelane_b32 v0, s68, 17
; CHECK-NEXT: v_writelane_b32 v0, s69, 18
-; CHECK-NEXT: v_writelane_b32 v0, s78, 19
-; CHECK-NEXT: v_writelane_b32 v0, s79, 20
+; CHECK-NEXT: v_writelane_b32 v0, s70, 19
+; CHECK-NEXT: v_writelane_b32 v0, s71, 20
; CHECK-NEXT: v_writelane_b32 v0, s80, 21
; CHECK-NEXT: v_writelane_b32 v0, s81, 22
; CHECK-NEXT: v_writelane_b32 v0, s82, 23
; CHECK-NEXT: v_writelane_b32 v0, s83, 24
; CHECK-NEXT: v_writelane_b32 v0, s84, 25
; CHECK-NEXT: v_writelane_b32 v0, s85, 26
-; CHECK-NEXT: v_writelane_b32 v0, s94, 27
-; CHECK-NEXT: v_writelane_b32 v0, s95, 28
+; CHECK-NEXT: v_writelane_b32 v0, s86, 27
+; CHECK-NEXT: v_writelane_b32 v0, s87, 28
; CHECK-NEXT: v_writelane_b32 v0, s96, 29
; CHECK-NEXT: v_writelane_b32 v0, s97, 30
; CHECK-NEXT: v_writelane_b32 v0, s98, 31
@@ -53,32 +53,32 @@ define void @spill_more_than_wavesize_csr_sgprs() {
; CHECK-NEXT: v_readlane_b32 s98, v0, 31
; CHECK-NEXT: v_readlane_b32 s97, v0, 30
; CHECK-NEXT: v_readlane_b32 s96, v0, 29
-; CHECK-NEXT: v_readlane_b32 s95, v0, 28
-; CHECK-NEXT: v_readlane_b32 s94, v0, 27
+; CHECK-NEXT: v_readlane_b32 s87, v0, 28
+; CHECK-NEXT: v_readlane_b32 s86, v0, 27
; CHECK-NEXT: v_readlane_b32 s85, v0, 26
; CHECK-NEXT: v_readlane_b32 s84, v0, 25
; CHECK-NEXT: v_readlane_b32 s83, v0, 24
; CHECK-NEXT: v_readlane_b32 s82, v0, 23
; CHECK-NEXT: v_readlane_b32 s81, v0, 22
; CHECK-NEXT: v_readlane_b32 s80, v0, 21
-; CHECK-NEXT: v_readlane_b32 s79, v0, 20
-; CHECK-NEXT: v_readlane_b32 s78, v0, 19
+; CHECK-NEXT: v_readlane_b32 s71, v0, 20
+; CHECK-NEXT: v_readlane_b32 s70, v0, 19
; CHECK-NEXT: v_readlane_b32 s69, v0, 18
; CHECK-NEXT: v_readlane_b32 s68, v0, 17
; CHECK-NEXT: v_readlane_b32 s67, v0, 16
; CHECK-NEXT: v_readlane_b32 s66, v0, 15
; CHECK-NEXT: v_readlane_b32 s65, v0, 14
; CHECK-NEXT: v_readlane_b32 s64, v0, 13
-; CHECK-NEXT: v_readlane_b32 s63, v0, 12
-; CHECK-NEXT: v_readlane_b32 s62, v0, 11
+; CHECK-NEXT: v_readlane_b32 s55, v0, 12
+; CHECK-NEXT: v_readlane_b32 s54, v0, 11
; CHECK-NEXT: v_readlane_b32 s53, v0, 10
; CHECK-NEXT: v_readlane_b32 s52, v0, 9
; CHECK-NEXT: v_readlane_b32 s51, v0, 8
; CHECK-NEXT: v_readlane_b32 s50, v0, 7
; CHECK-NEXT: v_readlane_b32 s49, v0, 6
; CHECK-NEXT: v_readlane_b32 s48, v0, 5
-; CHECK-NEXT: v_readlane_b32 s47, v0, 4
-; CHECK-NEXT: v_readlane_b32 s46, v0, 3
+; CHECK-NEXT: v_readlane_b32 s39, v0, 4
+; CHECK-NEXT: v_readlane_b32 s38, v0, 3
; CHECK-NEXT: v_readlane_b32 s37, v0, 2
; CHECK-NEXT: v_readlane_b32 s36, v0, 1
; CHECK-NEXT: v_readlane_b32 s35, v0, 0
@@ -110,32 +110,32 @@ define void @spill_more_than_wavesize_csr_sgprs_with_stack_object() {
; CHECK-NEXT: v_writelane_b32 v1, s35, 0
; CHECK-NEXT: v_writelane_b32 v1, s36, 1
; CHECK-NEXT: v_writelane_b32 v1, s37, 2
-; CHECK-NEXT: v_writelane_b32 v1, s46, 3
-; CHECK-NEXT: v_writelane_b32 v1, s47, 4
+; CHECK-NEXT: v_writelane_b32 v1, s38, 3
+; CHECK-NEXT: v_writelane_b32 v1, s39, 4
; CHECK-NEXT: v_writelane_b32 v1, s48, 5
; CHECK-NEXT: v_writelane_b32 v1, s49, 6
; CHECK-NEXT: v_writelane_b32 v1, s50, 7
; CHECK-NEXT: v_writelane_b32 v1, s51, 8
; CHECK-NEXT: v_writelane_b32 v1, s52, 9
; CHECK-NEXT: v_writelane_b32 v1, s53, 10
-; CHECK-NEXT: v_writelane_b32 v1, s62, 11
-; CHECK-NEXT: v_writelane_b32 v1, s63, 12
+; CHECK-NEXT: v_writelane_b32 v1, s54, 11
+; CHECK-NEXT: v_writelane_b32 v1, s55, 12
; CHECK-NEXT: v_writelane_b32 v1, s64, 13
; CHECK-NEXT: v_writelane_b32 v1, s65, 14
; CHECK-NEXT: v_writelane_b32 v1, s66, 15
; CHECK-NEXT: v_writelane_b32 v1, s67, 16
; CHECK-NEXT: v_writelane_b32 v1, s68, 17
; CHECK-NEXT: v_writelane_b32 v1, s69, 18
-; CHECK-NEXT: v_writelane_b32 v1, s78, 19
-; CHECK-NEXT: v_writelane_b32 v1, s79, 20
+; CHECK-NEXT: v_writelane_b32 v1, s70, 19
+; CHECK-NEXT: v_writelane_b32 v1, s71, 20
; CHECK-NEXT: v_writelane_b32 v1, s80, 21
; CHECK-NEXT: v_writelane_b32 v1, s81, 22
; CHECK-NEXT: v_writelane_b32 v1, s82, 23
; CHECK-NEXT: v_writelane_b32 v1, s83, 24
; CHECK-NEXT: v_writelane_b32 v1, s84, 25
; CHECK-NEXT: v_writelane_b32 v1, s85, 26
-; CHECK-NEXT: v_writelane_b32 v1, s94, 27
-; CHECK-NEXT: v_writelane_b32 v1, s95, 28
+; CHECK-NEXT: v_writelane_b32 v1, s86, 27
+; CHECK-NEXT: v_writelane_b32 v1, s87, 28
; CHECK-NEXT: v_writelane_b32 v1, s96, 29
; CHECK-NEXT: v_writelane_b32 v1, s97, 30
; CHECK-NEXT: v_writelane_b32 v1, s98, 31
@@ -155,32 +155,32 @@ define void @spill_more_than_wavesize_csr_sgprs_with_stack_object() {
; CHECK-NEXT: v_readlane_b32 s98, v1, 31
; CHECK-NEXT: v_readlane_b32 s97, v1, 30
; CHECK-NEXT: v_readlane_b32 s96, v1, 29
-; CHECK-NEXT: v_readlane_b32 s95, v1, 28
-; CHECK-NEXT: v_readlane_b32 s94, v1, 27
+; CHECK-NEXT: v_readlane_b32 s87, v1, 28
+; CHECK-NEXT: v_readlane_b32 s86, v1, 27
; CHECK-NEXT: v_readlane_b32 s85, v1, 26
; CHECK-NEXT: v_readlane_b32 s84, v1, 25
; CHECK-NEXT: v_readlane_b32 s83, v1, 24
; CHECK-NEXT: v_readlane_b32 s82, v1, 23
; CHECK-NEXT: v_readlane_b32 s81, v1, 22
; CHECK-NEXT: v_readlane_b32 s80, v1, 21
-; CHECK-NEXT: v_readlane_b32 s79, v1, 20
-; CHECK-NEXT: v_readlane_b32 s78, v1, 19
+; CHECK-NEXT: v_readlane_b32 s71, v1, 20
+; CHECK-NEXT: v_readlane_b32 s70, v1, 19
; CHECK-NEXT: v_readlane_b32 s69, v1, 18
; CHECK-NEXT: v_readlane_b32 s68, v1, 17
; CHECK-NEXT: v_readlane_b32 s67, v1, 16
; CHECK-NEXT: v_readlane_b32 s66, v1, 15
; CHECK-NEXT: v_readlane_b32 s65, v1, 14
; CHECK-NEXT: v_readlane_b32 s64, v1, 13
-; CHECK-NEXT: v_readlane_b32 s63, v1, 12
-; CHECK-NEXT: v_readlane_b32 s62, v1, 11
+; CHECK-NEXT: v_readlane_b32 s55, v1, 12
+; CHECK-NEXT: v_readlane_b32 s54, v1, 11
; CHECK-NEXT: v_readlane_b32 s53, v1, 10
; CHECK-NEXT: v_readlane_b32 s52, v1, 9
; CHECK-NEXT: v_readlane_b32 s51, v1, 8
; CHECK-NEXT: v_readlane_b32 s50, v1, 7
; CHECK-NEXT: v_readlane_b32 s49, v1, 6
; CHECK-NEXT: v_readlane_b32 s48, v1, 5
-; CHECK-NEXT: v_readlane_b32 s47, v1, 4
-; CHECK-NEXT: v_readlane_b32 s46, v1, 3
+; CHECK-NEXT: v_readlane_b32 s39, v1, 4
+; CHECK-NEXT: v_readlane_b32 s38, v1, 3
; CHECK-NEXT: v_readlane_b32 s37, v1, 2
; CHECK-NEXT: v_readlane_b32 s36, v1, 1
; CHECK-NEXT: v_readlane_b32 s35, v1, 0
diff --git a/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll b/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll
index 89bb346ee98df..d4d3b37a0ed1e 100644
--- a/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll
+++ b/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll
@@ -11,12 +11,12 @@ define amdgpu_kernel void @kernel_background_evaluate(ptr addrspace(5) %kg, ptr
; MUBUF-LABEL: kernel_background_evaluate:
; MUBUF: ; %bb.0: ; %entry
; MUBUF-NEXT: s_load_dword s0, s[4:5], 0x24
-; MUBUF-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0
-; MUBUF-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1
-; MUBUF-NEXT: s_mov_b32 s50, -1
-; MUBUF-NEXT: s_mov_b32 s51, 0x31c16000
-; MUBUF-NEXT: s_add_u32 s48, s48, s11
-; MUBUF-NEXT: s_addc_u32 s49, s49, 0
+; MUBUF-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
+; MUBUF-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
+; MUBUF-NEXT: s_mov_b32 s38, -1
+; MUBUF-NEXT: s_mov_b32 s39, 0x31c16000
+; MUBUF-NEXT: s_add_u32 s36, s36, s11
+; MUBUF-NEXT: s_addc_u32 s37, s37, 0
; MUBUF-NEXT: v_mov_b32_e32 v1, 0x2000
; MUBUF-NEXT: v_mov_b32_e32 v2, 0x4000
; MUBUF-NEXT: v_mov_b32_e32 v3, 0
@@ -27,8 +27,8 @@ define amdgpu_kernel void @kernel_background_evaluate(ptr addrspace(5) %kg, ptr
; MUBUF-NEXT: s_mov_b32 s32, 0xc0000
; MUBUF-NEXT: s_waitcnt lgkmcnt(0)
; MUBUF-NEXT: v_mov_b32_e32 v0, s0
-; MUBUF-NEXT: s_mov_b64 s[0:1], s[48:49]
-; MUBUF-NEXT: s_mov_b64 s[2:3], s[50:51]
+; MUBUF-NEXT: s_mov_b64 s[0:1], s[36:37]
+; MUBUF-NEXT: s_mov_b64 s[2:3], s[38:39]
; MUBUF-NEXT: s_swappc_b64 s[30:31], s[4:5]
; MUBUF-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
; MUBUF-NEXT: s_and_saveexec_b32 s0, vcc_lo
@@ -37,12 +37,12 @@ define amdgpu_kernel void @kernel_background_evaluate(ptr addrspace(5) %kg, ptr
; MUBUF-NEXT: v_mov_b32_e32 v0, 0x4004
; MUBUF-NEXT: s_mov_b32 s0, 0x41c64e6d
; MUBUF-NEXT: s_clause 0x1
-; MUBUF-NEXT: buffer_load_dword v1, v0, s[48:51], 0 offen
-; MUBUF-NEXT: buffer_load_dword v2, v0, s[48:51], 0 offen offset:4
+; MUBUF-NEXT: buffer_load_dword v1, v0, s[36:39], 0 offen
+; MUBUF-NEXT: buffer_load_dword v2, v0, s[36:39], 0 offen offset:4
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: v_add_nc_u32_e32 v0, v2, v1
; MUBUF-NEXT: v_mad_u64_u32 v[0:1], s0, v0, s0, 0x3039
-; MUBUF-NEXT: buffer_store_dword v0, v0, s[48:51], 0 offen
+; MUBUF-NEXT: buffer_store_dword v0, v0, s[36:39], 0 offen
; MUBUF-NEXT: .LBB0_2: ; %shader_eval_surface.exit
; MUBUF-NEXT: s_endpgm
;
diff --git a/llvm/test/CodeGen/AMDGPU/stack-realign.ll b/llvm/test/CodeGen/AMDGPU/stack-realign.ll
index 3a078a64aa28e..4ddde7f297172 100644
--- a/llvm/test/CodeGen/AMDGPU/stack-realign.ll
+++ b/llvm/test/CodeGen/AMDGPU/stack-realign.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --function no_free_scratch_sgpr_for_bp_copy --version 5
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; Check that we properly realign the stack. While 4-byte access is all
@@ -416,21 +416,21 @@ define void @no_free_scratch_sgpr_for_bp_copy(<32 x i32> %a, i32 %b) #0 {
; GCN-LABEL: no_free_scratch_sgpr_for_bp_copy:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: s_mov_b32 s39, s34
+; GCN-NEXT: s_mov_b32 s41, s34
; GCN-NEXT: s_mov_b32 s34, s32
; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s34 offset:4
-; GCN-NEXT: s_mov_b32 s38, s33
+; GCN-NEXT: s_mov_b32 s40, s33
; GCN-NEXT: s_add_i32 s33, s32, 0x1fc0
; GCN-NEXT: s_and_b32 s33, s33, 0xffffe000
; GCN-NEXT: s_addk_i32 s32, 0x6000
; GCN-NEXT: s_mov_b32 s32, s34
-; GCN-NEXT: s_mov_b32 s34, s39
+; GCN-NEXT: s_mov_b32 s34, s41
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:128
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: s_mov_b32 s33, s38
+; GCN-NEXT: s_mov_b32 s33, s40
; GCN-NEXT: s_setpc_b64 s[30:31]
%local_val = alloca i32, align 128, addrspace(5)
store volatile i32 %b, ptr addrspace(5) %local_val, align 128
@@ -454,78 +454,45 @@ define void @no_free_regs_spill_bp_to_memory(<32 x i32> %a, i32 %b) #5 {
; GCN-NEXT: s_xor_saveexec_b64 s[6:7], -1
; GCN-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, s[6:7]
-; GCN-NEXT: v_mov_b32_e32 v0, s4
-; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill
-; GCN-NEXT: v_mov_b32_e32 v0, s34
+; GCN-NEXT: v_writelane_b32 v39, s4, 32
+; GCN-NEXT: v_writelane_b32 v39, s34, 33
; GCN-NEXT: s_mov_b32 s34, s32
-; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:140 ; 4-byte Folded Spill
; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s34 offset:4
; GCN-NEXT: v_writelane_b32 v39, s39, 0
-; GCN-NEXT: v_writelane_b32 v39, s40, 1
-; GCN-NEXT: v_writelane_b32 v39, s41, 2
-; GCN-NEXT: v_writelane_b32 v39, s42, 3
-; GCN-NEXT: v_writelane_b32 v39, s43, 4
-; GCN-NEXT: v_writelane_b32 v39, s44, 5
-; GCN-NEXT: v_writelane_b32 v39, s45, 6
-; GCN-NEXT: v_writelane_b32 v39, s46, 7
-; GCN-NEXT: v_writelane_b32 v39, s47, 8
-; GCN-NEXT: v_writelane_b32 v39, s48, 9
-; GCN-NEXT: v_writelane_b32 v39, s49, 10
-; GCN-NEXT: v_writelane_b32 v39, s50, 11
-; GCN-NEXT: v_writelane_b32 v39, s51, 12
-; GCN-NEXT: v_writelane_b32 v39, s52, 13
-; GCN-NEXT: v_writelane_b32 v39, s53, 14
-; GCN-NEXT: v_writelane_b32 v39, s54, 15
-; GCN-NEXT: v_writelane_b32 v39, s55, 16
-; GCN-NEXT: v_writelane_b32 v39, s56, 17
-; GCN-NEXT: v_writelane_b32 v39, s57, 18
-; GCN-NEXT: v_writelane_b32 v39, s58, 19
-; GCN-NEXT: v_writelane_b32 v39, s59, 20
-; GCN-NEXT: v_writelane_b32 v39, s60, 21
-; GCN-NEXT: v_writelane_b32 v39, s61, 22
-; GCN-NEXT: v_writelane_b32 v39, s62, 23
-; GCN-NEXT: v_writelane_b32 v39, s63, 24
-; GCN-NEXT: v_writelane_b32 v39, s64, 25
-; GCN-NEXT: v_writelane_b32 v39, s65, 26
-; GCN-NEXT: v_writelane_b32 v39, s66, 27
-; GCN-NEXT: v_writelane_b32 v39, s67, 28
-; GCN-NEXT: v_writelane_b32 v39, s68, 29
-; GCN-NEXT: v_writelane_b32 v39, s69, 30
-; GCN-NEXT: v_writelane_b32 v39, s70, 31
-; GCN-NEXT: v_writelane_b32 v39, s71, 32
-; GCN-NEXT: v_writelane_b32 v39, s72, 33
-; GCN-NEXT: v_writelane_b32 v39, s73, 34
-; GCN-NEXT: v_writelane_b32 v39, s74, 35
-; GCN-NEXT: v_writelane_b32 v39, s75, 36
-; GCN-NEXT: v_writelane_b32 v39, s76, 37
-; GCN-NEXT: v_writelane_b32 v39, s77, 38
-; GCN-NEXT: v_writelane_b32 v39, s78, 39
-; GCN-NEXT: v_writelane_b32 v39, s79, 40
-; GCN-NEXT: v_writelane_b32 v39, s80, 41
-; GCN-NEXT: v_writelane_b32 v39, s81, 42
-; GCN-NEXT: v_writelane_b32 v39, s82, 43
-; GCN-NEXT: v_writelane_b32 v39, s83, 44
-; GCN-NEXT: v_writelane_b32 v39, s84, 45
-; GCN-NEXT: v_writelane_b32 v39, s85, 46
-; GCN-NEXT: v_writelane_b32 v39, s86, 47
-; GCN-NEXT: v_writelane_b32 v39, s87, 48
-; GCN-NEXT: v_writelane_b32 v39, s88, 49
-; GCN-NEXT: v_writelane_b32 v39, s89, 50
-; GCN-NEXT: v_writelane_b32 v39, s90, 51
-; GCN-NEXT: v_writelane_b32 v39, s91, 52
-; GCN-NEXT: v_writelane_b32 v39, s92, 53
-; GCN-NEXT: v_writelane_b32 v39, s93, 54
-; GCN-NEXT: v_writelane_b32 v39, s94, 55
-; GCN-NEXT: v_writelane_b32 v39, s95, 56
-; GCN-NEXT: v_writelane_b32 v39, s96, 57
-; GCN-NEXT: v_writelane_b32 v39, s97, 58
-; GCN-NEXT: v_writelane_b32 v39, s98, 59
-; GCN-NEXT: v_writelane_b32 v39, s99, 60
-; GCN-NEXT: v_writelane_b32 v39, s100, 61
-; GCN-NEXT: v_writelane_b32 v39, s101, 62
-; GCN-NEXT: v_writelane_b32 v39, s102, 63
+; GCN-NEXT: v_writelane_b32 v39, s48, 1
+; GCN-NEXT: v_writelane_b32 v39, s49, 2
+; GCN-NEXT: v_writelane_b32 v39, s50, 3
+; GCN-NEXT: v_writelane_b32 v39, s51, 4
+; GCN-NEXT: v_writelane_b32 v39, s52, 5
+; GCN-NEXT: v_writelane_b32 v39, s53, 6
+; GCN-NEXT: v_writelane_b32 v39, s54, 7
+; GCN-NEXT: v_writelane_b32 v39, s55, 8
+; GCN-NEXT: v_writelane_b32 v39, s64, 9
+; GCN-NEXT: v_writelane_b32 v39, s65, 10
+; GCN-NEXT: v_writelane_b32 v39, s66, 11
+; GCN-NEXT: v_writelane_b32 v39, s67, 12
+; GCN-NEXT: v_writelane_b32 v39, s68, 13
+; GCN-NEXT: v_writelane_b32 v39, s69, 14
+; GCN-NEXT: v_writelane_b32 v39, s70, 15
+; GCN-NEXT: v_writelane_b32 v39, s71, 16
+; GCN-NEXT: v_writelane_b32 v39, s80, 17
+; GCN-NEXT: v_writelane_b32 v39, s81, 18
+; GCN-NEXT: v_writelane_b32 v39, s82, 19
+; GCN-NEXT: v_writelane_b32 v39, s83, 20
+; GCN-NEXT: v_writelane_b32 v39, s84, 21
+; GCN-NEXT: v_writelane_b32 v39, s85, 22
+; GCN-NEXT: v_writelane_b32 v39, s86, 23
+; GCN-NEXT: v_writelane_b32 v39, s87, 24
+; GCN-NEXT: v_writelane_b32 v39, s96, 25
+; GCN-NEXT: v_writelane_b32 v39, s97, 26
+; GCN-NEXT: v_writelane_b32 v39, s98, 27
+; GCN-NEXT: v_writelane_b32 v39, s99, 28
+; GCN-NEXT: v_writelane_b32 v39, s100, 29
+; GCN-NEXT: v_writelane_b32 v39, s101, 30
; GCN-NEXT: s_addk_i32 s32, 0x6000
+; GCN-NEXT: v_writelane_b32 v39, s102, 31
; GCN-NEXT: s_mov_b32 s32, s34
+; GCN-NEXT: v_readlane_b32 s34, v39, 33
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:128
; GCN-NEXT: s_waitcnt vmcnt(0)
@@ -535,76 +502,39 @@ define void @no_free_regs_spill_bp_to_memory(<32 x i32> %a, i32 %b) #5 {
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; clobber all VGPRs
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:136 ; 4-byte Folded Reload
-; GCN-NEXT: v_readlane_b32 s102, v39, 63
-; GCN-NEXT: v_readlane_b32 s101, v39, 62
-; GCN-NEXT: v_readlane_b32 s100, v39, 61
-; GCN-NEXT: v_readlane_b32 s99, v39, 60
-; GCN-NEXT: v_readlane_b32 s98, v39, 59
-; GCN-NEXT: v_readlane_b32 s97, v39, 58
-; GCN-NEXT: v_readlane_b32 s96, v39, 57
-; GCN-NEXT: v_readlane_b32 s95, v39, 56
-; GCN-NEXT: v_readlane_b32 s94, v39, 55
-; GCN-NEXT: v_readlane_b32 s93, v39, 54
-; GCN-NEXT: v_readlane_b32 s92, v39, 53
-; GCN-NEXT: v_readlane_b32 s91, v39, 52
-; GCN-NEXT: v_readlane_b32 s90, v39, 51
-; GCN-NEXT: v_readlane_b32 s89, v39, 50
-; GCN-NEXT: v_readlane_b32 s88, v39, 49
-; GCN-NEXT: v_readlane_b32 s87, v39, 48
-; GCN-NEXT: v_readlane_b32 s86, v39, 47
-; GCN-NEXT: v_readlane_b32 s85, v39, 46
-; GCN-NEXT: v_readlane_b32 s84, v39, 45
-; GCN-NEXT: v_readlane_b32 s83, v39, 44
-; GCN-NEXT: v_readlane_b32 s82, v39, 43
-; GCN-NEXT: v_readlane_b32 s81, v39, 42
-; GCN-NEXT: v_readlane_b32 s80, v39, 41
-; GCN-NEXT: v_readlane_b32 s79, v39, 40
-; GCN-NEXT: v_readlane_b32 s78, v39, 39
-; GCN-NEXT: v_readlane_b32 s77, v39, 38
-; GCN-NEXT: v_readlane_b32 s76, v39, 37
-; GCN-NEXT: v_readlane_b32 s75, v39, 36
-; GCN-NEXT: v_readlane_b32 s74, v39, 35
-; GCN-NEXT: v_readlane_b32 s73, v39, 34
-; GCN-NEXT: v_readlane_b32 s72, v39, 33
-; GCN-NEXT: v_readlane_b32 s71, v39, 32
-; GCN-NEXT: v_readlane_b32 s70, v39, 31
-; GCN-NEXT: v_readlane_b32 s69, v39, 30
-; GCN-NEXT: v_readlane_b32 s68, v39, 29
-; GCN-NEXT: v_readlane_b32 s67, v39, 28
-; GCN-NEXT: v_readlane_b32 s66, v39, 27
-; GCN-NEXT: v_readlane_b32 s65, v39, 26
-; GCN-NEXT: v_readlane_b32 s64, v39, 25
-; GCN-NEXT: v_readlane_b32 s63, v39, 24
-; GCN-NEXT: v_readlane_b32 s62, v39, 23
-; GCN-NEXT: v_readlane_b32 s61, v39, 22
-; GCN-NEXT: v_readlane_b32 s60, v39, 21
-; GCN-NEXT: v_readlane_b32 s59, v39, 20
-; GCN-NEXT: v_readlane_b32 s58, v39, 19
-; GCN-NEXT: v_readlane_b32 s57, v39, 18
-; GCN-NEXT: v_readlane_b32 s56, v39, 17
-; GCN-NEXT: v_readlane_b32 s55, v39, 16
-; GCN-NEXT: v_readlane_b32 s54, v39, 15
-; GCN-NEXT: v_readlane_b32 s53, v39, 14
-; GCN-NEXT: v_readlane_b32 s52, v39, 13
-; GCN-NEXT: v_readlane_b32 s51, v39, 12
-; GCN-NEXT: v_readlane_b32 s50, v39, 11
-; GCN-NEXT: v_readlane_b32 s49, v39, 10
-; GCN-NEXT: v_readlane_b32 s48, v39, 9
-; GCN-NEXT: v_readlane_b32 s47, v39, 8
-; GCN-NEXT: v_readlane_b32 s46, v39, 7
-; GCN-NEXT: v_readlane_b32 s45, v39, 6
-; GCN-NEXT: v_readlane_b32 s44, v39, 5
-; GCN-NEXT: v_readlane_b32 s43, v39, 4
-; GCN-NEXT: v_readlane_b32 s42, v39, 3
-; GCN-NEXT: v_readlane_b32 s41, v39, 2
-; GCN-NEXT: v_readlane_b32 s40, v39, 1
+; GCN-NEXT: v_readlane_b32 s102, v39, 31
+; GCN-NEXT: v_readlane_b32 s101, v39, 30
+; GCN-NEXT: v_readlane_b32 s100, v39, 29
+; GCN-NEXT: v_readlane_b32 s99, v39, 28
+; GCN-NEXT: v_readlane_b32 s98, v39, 27
+; GCN-NEXT: v_readlane_b32 s97, v39, 26
+; GCN-NEXT: v_readlane_b32 s96, v39, 25
+; GCN-NEXT: v_readlane_b32 s87, v39, 24
+; GCN-NEXT: v_readlane_b32 s86, v39, 23
+; GCN-NEXT: v_readlane_b32 s85, v39, 22
+; GCN-NEXT: v_readlane_b32 s84, v39, 21
+; GCN-NEXT: v_readlane_b32 s83, v39, 20
+; GCN-NEXT: v_readlane_b32 s82, v39, 19
+; GCN-NEXT: v_readlane_b32 s81, v39, 18
+; GCN-NEXT: v_readlane_b32 s80, v39, 17
+; GCN-NEXT: v_readlane_b32 s71, v39, 16
+; GCN-NEXT: v_readlane_b32 s70, v39, 15
+; GCN-NEXT: v_readlane_b32 s69, v39, 14
+; GCN-NEXT: v_readlane_b32 s68, v39, 13
+; GCN-NEXT: v_readlane_b32 s67, v39, 12
+; GCN-NEXT: v_readlane_b32 s66, v39, 11
+; GCN-NEXT: v_readlane_b32 s65, v39, 10
+; GCN-NEXT: v_readlane_b32 s64, v39, 9
+; GCN-NEXT: v_readlane_b32 s55, v39, 8
+; GCN-NEXT: v_readlane_b32 s54, v39, 7
+; GCN-NEXT: v_readlane_b32 s53, v39, 6
+; GCN-NEXT: v_readlane_b32 s52, v39, 5
+; GCN-NEXT: v_readlane_b32 s51, v39, 4
+; GCN-NEXT: v_readlane_b32 s50, v39, 3
+; GCN-NEXT: v_readlane_b32 s49, v39, 2
+; GCN-NEXT: v_readlane_b32 s48, v39, 1
; GCN-NEXT: v_readlane_b32 s39, v39, 0
-; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_readfirstlane_b32 s4, v0
-; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:140 ; 4-byte Folded Reload
-; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_readfirstlane_b32 s34, v0
+; GCN-NEXT: v_readlane_b32 s4, v39, 32
; GCN-NEXT: s_xor_saveexec_b64 s[6:7], -1
; GCN-NEXT: buffer_load_dword v39, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[6:7]
@@ -647,81 +577,46 @@ define void @spill_bp_to_memory_scratch_reg_needed_mubuf_offset(<32 x i32> %a, i
; GCN-NEXT: s_add_i32 s5, s33, 0x42100
; GCN-NEXT: buffer_store_dword v39, off, s[0:3], s5 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, s[6:7]
-; GCN-NEXT: v_mov_b32_e32 v0, s4
-; GCN-NEXT: s_add_i32 s5, s33, 0x42200
-; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill
-; GCN-NEXT: v_mov_b32_e32 v0, s34
-; GCN-NEXT: s_add_i32 s5, s33, 0x42300
+; GCN-NEXT: v_writelane_b32 v39, s4, 32
+; GCN-NEXT: v_writelane_b32 v39, s34, 33
; GCN-NEXT: s_mov_b32 s34, s32
-; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill
; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s34 offset:4
; GCN-NEXT: v_writelane_b32 v39, s39, 0
-; GCN-NEXT: v_writelane_b32 v39, s40, 1
-; GCN-NEXT: v_writelane_b32 v39, s41, 2
-; GCN-NEXT: v_writelane_b32 v39, s42, 3
-; GCN-NEXT: v_writelane_b32 v39, s43, 4
-; GCN-NEXT: v_writelane_b32 v39, s44, 5
-; GCN-NEXT: v_writelane_b32 v39, s45, 6
-; GCN-NEXT: v_writelane_b32 v39, s46, 7
-; GCN-NEXT: v_writelane_b32 v39, s47, 8
-; GCN-NEXT: v_writelane_b32 v39, s48, 9
-; GCN-NEXT: v_writelane_b32 v39, s49, 10
-; GCN-NEXT: v_writelane_b32 v39, s50, 11
-; GCN-NEXT: v_writelane_b32 v39, s51, 12
-; GCN-NEXT: v_writelane_b32 v39, s52, 13
-; GCN-NEXT: v_writelane_b32 v39, s53, 14
-; GCN-NEXT: v_writelane_b32 v39, s54, 15
-; GCN-NEXT: v_writelane_b32 v39, s55, 16
-; GCN-NEXT: v_writelane_b32 v39, s56, 17
-; GCN-NEXT: v_writelane_b32 v39, s57, 18
-; GCN-NEXT: v_writelane_b32 v39, s58, 19
-; GCN-NEXT: v_writelane_b32 v39, s59, 20
-; GCN-NEXT: v_writelane_b32 v39, s60, 21
-; GCN-NEXT: v_writelane_b32 v39, s61, 22
-; GCN-NEXT: v_writelane_b32 v39, s62, 23
-; GCN-NEXT: v_writelane_b32 v39, s63, 24
-; GCN-NEXT: v_writelane_b32 v39, s64, 25
-; GCN-NEXT: v_writelane_b32 v39, s65, 26
-; GCN-NEXT: v_writelane_b32 v39, s66, 27
-; GCN-NEXT: v_writelane_b32 v39, s67, 28
-; GCN-NEXT: v_writelane_b32 v39, s68, 29
-; GCN-NEXT: v_writelane_b32 v39, s69, 30
-; GCN-NEXT: v_writelane_b32 v39, s70, 31
-; GCN-NEXT: v_writelane_b32 v39, s71, 32
-; GCN-NEXT: v_writelane_b32 v39, s72, 33
-; GCN-NEXT: v_writelane_b32 v39, s73, 34
-; GCN-NEXT: v_writelane_b32 v39, s74, 35
-; GCN-NEXT: v_writelane_b32 v39, s75, 36
-; GCN-NEXT: v_writelane_b32 v39, s76, 37
-; GCN-NEXT: v_writelane_b32 v39, s77, 38
-; GCN-NEXT: v_writelane_b32 v39, s78, 39
-; GCN-NEXT: v_writelane_b32 v39, s79, 40
-; GCN-NEXT: v_writelane_b32 v39, s80, 41
-; GCN-NEXT: v_writelane_b32 v39, s81, 42
-; GCN-NEXT: v_writelane_b32 v39, s82, 43
-; GCN-NEXT: v_writelane_b32 v39, s83, 44
-; GCN-NEXT: v_writelane_b32 v39, s84, 45
-; GCN-NEXT: v_writelane_b32 v39, s85, 46
-; GCN-NEXT: v_writelane_b32 v39, s86, 47
-; GCN-NEXT: v_writelane_b32 v39, s87, 48
-; GCN-NEXT: v_writelane_b32 v39, s88, 49
-; GCN-NEXT: v_writelane_b32 v39, s89, 50
-; GCN-NEXT: v_writelane_b32 v39, s90, 51
-; GCN-NEXT: v_writelane_b32 v39, s91, 52
-; GCN-NEXT: v_writelane_b32 v39, s92, 53
-; GCN-NEXT: v_writelane_b32 v39, s93, 54
-; GCN-NEXT: v_writelane_b32 v39, s94, 55
-; GCN-NEXT: v_writelane_b32 v39, s95, 56
-; GCN-NEXT: v_writelane_b32 v39, s96, 57
-; GCN-NEXT: v_writelane_b32 v39, s97, 58
-; GCN-NEXT: v_writelane_b32 v39, s98, 59
-; GCN-NEXT: v_writelane_b32 v39, s99, 60
-; GCN-NEXT: v_writelane_b32 v39, s100, 61
-; GCN-NEXT: v_writelane_b32 v39, s101, 62
+; GCN-NEXT: v_writelane_b32 v39, s48, 1
+; GCN-NEXT: v_writelane_b32 v39, s49, 2
+; GCN-NEXT: v_writelane_b32 v39, s50, 3
+; GCN-NEXT: v_writelane_b32 v39, s51, 4
+; GCN-NEXT: v_writelane_b32 v39, s52, 5
+; GCN-NEXT: v_writelane_b32 v39, s53, 6
+; GCN-NEXT: v_writelane_b32 v39, s54, 7
+; GCN-NEXT: v_writelane_b32 v39, s55, 8
+; GCN-NEXT: v_writelane_b32 v39, s64, 9
+; GCN-NEXT: v_writelane_b32 v39, s65, 10
+; GCN-NEXT: v_writelane_b32 v39, s66, 11
+; GCN-NEXT: v_writelane_b32 v39, s67, 12
+; GCN-NEXT: v_writelane_b32 v39, s68, 13
+; GCN-NEXT: v_writelane_b32 v39, s69, 14
+; GCN-NEXT: v_writelane_b32 v39, s70, 15
+; GCN-NEXT: v_writelane_b32 v39, s71, 16
+; GCN-NEXT: v_writelane_b32 v39, s80, 17
+; GCN-NEXT: v_writelane_b32 v39, s81, 18
+; GCN-NEXT: v_writelane_b32 v39, s82, 19
+; GCN-NEXT: v_writelane_b32 v39, s83, 20
+; GCN-NEXT: v_writelane_b32 v39, s84, 21
+; GCN-NEXT: v_writelane_b32 v39, s85, 22
+; GCN-NEXT: v_writelane_b32 v39, s86, 23
+; GCN-NEXT: v_writelane_b32 v39, s87, 24
+; GCN-NEXT: v_writelane_b32 v39, s96, 25
+; GCN-NEXT: v_writelane_b32 v39, s97, 26
+; GCN-NEXT: v_writelane_b32 v39, s98, 27
+; GCN-NEXT: v_writelane_b32 v39, s99, 28
+; GCN-NEXT: v_writelane_b32 v39, s100, 29
+; GCN-NEXT: v_writelane_b32 v39, s101, 30
; GCN-NEXT: v_mov_b32_e32 v1, 0x1080
-; GCN-NEXT: v_writelane_b32 v39, s102, 63
; GCN-NEXT: s_add_i32 s32, s32, 0x46000
+; GCN-NEXT: v_writelane_b32 v39, s102, 31
; GCN-NEXT: s_mov_b32 s32, s34
+; GCN-NEXT: v_readlane_b32 s34, v39, 33
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: buffer_store_dword v0, v1, s[0:3], s33 offen
; GCN-NEXT: s_waitcnt vmcnt(0)
@@ -731,78 +626,39 @@ define void @spill_bp_to_memory_scratch_reg_needed_mubuf_offset(<32 x i32> %a, i
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; clobber all VGPRs
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: s_add_i32 s5, s33, 0x42200
-; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload
-; GCN-NEXT: s_add_i32 s5, s33, 0x42300
-; GCN-NEXT: v_readlane_b32 s102, v39, 63
-; GCN-NEXT: v_readlane_b32 s101, v39, 62
-; GCN-NEXT: v_readlane_b32 s100, v39, 61
-; GCN-NEXT: v_readlane_b32 s99, v39, 60
-; GCN-NEXT: v_readlane_b32 s98, v39, 59
-; GCN-NEXT: v_readlane_b32 s97, v39, 58
-; GCN-NEXT: v_readlane_b32 s96, v39, 57
-; GCN-NEXT: v_readlane_b32 s95, v39, 56
-; GCN-NEXT: v_readlane_b32 s94, v39, 55
-; GCN-NEXT: v_readlane_b32 s93, v39, 54
-; GCN-NEXT: v_readlane_b32 s92, v39, 53
-; GCN-NEXT: v_readlane_b32 s91, v39, 52
-; GCN-NEXT: v_readlane_b32 s90, v39, 51
-; GCN-NEXT: v_readlane_b32 s89, v39, 50
-; GCN-NEXT: v_readlane_b32 s88, v39, 49
-; GCN-NEXT: v_readlane_b32 s87, v39, 48
-; GCN-NEXT: v_readlane_b32 s86, v39, 47
-; GCN-NEXT: v_readlane_b32 s85, v39, 46
-; GCN-NEXT: v_readlane_b32 s84, v39, 45
-; GCN-NEXT: v_readlane_b32 s83, v39, 44
-; GCN-NEXT: v_readlane_b32 s82, v39, 43
-; GCN-NEXT: v_readlane_b32 s81, v39, 42
-; GCN-NEXT: v_readlane_b32 s80, v39, 41
-; GCN-NEXT: v_readlane_b32 s79, v39, 40
-; GCN-NEXT: v_readlane_b32 s78, v39, 39
-; GCN-NEXT: v_readlane_b32 s77, v39, 38
-; GCN-NEXT: v_readlane_b32 s76, v39, 37
-; GCN-NEXT: v_readlane_b32 s75, v39, 36
-; GCN-NEXT: v_readlane_b32 s74, v39, 35
-; GCN-NEXT: v_readlane_b32 s73, v39, 34
-; GCN-NEXT: v_readlane_b32 s72, v39, 33
-; GCN-NEXT: v_readlane_b32 s71, v39, 32
-; GCN-NEXT: v_readlane_b32 s70, v39, 31
-; GCN-NEXT: v_readlane_b32 s69, v39, 30
-; GCN-NEXT: v_readlane_b32 s68, v39, 29
-; GCN-NEXT: v_readlane_b32 s67, v39, 28
-; GCN-NEXT: v_readlane_b32 s66, v39, 27
-; GCN-NEXT: v_readlane_b32 s65, v39, 26
-; GCN-NEXT: v_readlane_b32 s64, v39, 25
-; GCN-NEXT: v_readlane_b32 s63, v39, 24
-; GCN-NEXT: v_readlane_b32 s62, v39, 23
-; GCN-NEXT: v_readlane_b32 s61, v39, 22
-; GCN-NEXT: v_readlane_b32 s60, v39, 21
-; GCN-NEXT: v_readlane_b32 s59, v39, 20
-; GCN-NEXT: v_readlane_b32 s58, v39, 19
-; GCN-NEXT: v_readlane_b32 s57, v39, 18
-; GCN-NEXT: v_readlane_b32 s56, v39, 17
-; GCN-NEXT: v_readlane_b32 s55, v39, 16
-; GCN-NEXT: v_readlane_b32 s54, v39, 15
-; GCN-NEXT: v_readlane_b32 s53, v39, 14
-; GCN-NEXT: v_readlane_b32 s52, v39, 13
-; GCN-NEXT: v_readlane_b32 s51, v39, 12
-; GCN-NEXT: v_readlane_b32 s50, v39, 11
-; GCN-NEXT: v_readlane_b32 s49, v39, 10
-; GCN-NEXT: v_readlane_b32 s48, v39, 9
-; GCN-NEXT: v_readlane_b32 s47, v39, 8
-; GCN-NEXT: v_readlane_b32 s46, v39, 7
-; GCN-NEXT: v_readlane_b32 s45, v39, 6
-; GCN-NEXT: v_readlane_b32 s44, v39, 5
-; GCN-NEXT: v_readlane_b32 s43, v39, 4
-; GCN-NEXT: v_readlane_b32 s42, v39, 3
-; GCN-NEXT: v_readlane_b32 s41, v39, 2
-; GCN-NEXT: v_readlane_b32 s40, v39, 1
+; GCN-NEXT: v_readlane_b32 s102, v39, 31
+; GCN-NEXT: v_readlane_b32 s101, v39, 30
+; GCN-NEXT: v_readlane_b32 s100, v39, 29
+; GCN-NEXT: v_readlane_b32 s99, v39, 28
+; GCN-NEXT: v_readlane_b32 s98, v39, 27
+; GCN-NEXT: v_readlane_b32 s97, v39, 26
+; GCN-NEXT: v_readlane_b32 s96, v39, 25
+; GCN-NEXT: v_readlane_b32 s87, v39, 24
+; GCN-NEXT: v_readlane_b32 s86, v39, 23
+; GCN-NEXT: v_readlane_b32 s85, v39, 22
+; GCN-NEXT: v_readlane_b32 s84, v39, 21
+; GCN-NEXT: v_readlane_b32 s83, v39, 20
+; GCN-NEXT: v_readlane_b32 s82, v39, 19
+; GCN-NEXT: v_readlane_b32 s81, v39, 18
+; GCN-NEXT: v_readlane_b32 s80, v39, 17
+; GCN-NEXT: v_readlane_b32 s71, v39, 16
+; GCN-NEXT: v_readlane_b32 s70, v39, 15
+; GCN-NEXT: v_readlane_b32 s69, v39, 14
+; GCN-NEXT: v_readlane_b32 s68, v39, 13
+; GCN-NEXT: v_readlane_b32 s67, v39, 12
+; GCN-NEXT: v_readlane_b32 s66, v39, 11
+; GCN-NEXT: v_readlane_b32 s65, v39, 10
+; GCN-NEXT: v_readlane_b32 s64, v39, 9
+; GCN-NEXT: v_readlane_b32 s55, v39, 8
+; GCN-NEXT: v_readlane_b32 s54, v39, 7
+; GCN-NEXT: v_readlane_b32 s53, v39, 6
+; GCN-NEXT: v_readlane_b32 s52, v39, 5
+; GCN-NEXT: v_readlane_b32 s51, v39, 4
+; GCN-NEXT: v_readlane_b32 s50, v39, 3
+; GCN-NEXT: v_readlane_b32 s49, v39, 2
+; GCN-NEXT: v_readlane_b32 s48, v39, 1
; GCN-NEXT: v_readlane_b32 s39, v39, 0
-; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_readfirstlane_b32 s4, v0
-; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload
-; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_readfirstlane_b32 s34, v0
+; GCN-NEXT: v_readlane_b32 s4, v39, 32
; GCN-NEXT: s_xor_saveexec_b64 s[6:7], -1
; GCN-NEXT: s_add_i32 s5, s33, 0x42100
; GCN-NEXT: buffer_load_dword v39, off, s[0:3], s5 ; 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll b/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll
index f7300c921a745..04da358a49bc3 100644
--- a/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll
+++ b/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll
@@ -32,14 +32,14 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS1-LABEL: kernel:
; GLOBALNESS1: ; %bb.0: ; %bb
; GLOBALNESS1-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GLOBALNESS1-NEXT: s_load_dwordx4 s[96:99], s[8:9], 0x0
+; GLOBALNESS1-NEXT: s_load_dwordx4 s[52:55], s[8:9], 0x0
; GLOBALNESS1-NEXT: s_load_dword s6, s[8:9], 0x14
; GLOBALNESS1-NEXT: v_mov_b32_e32 v41, v0
; GLOBALNESS1-NEXT: v_mov_b32_e32 v42, 0
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[0:1], 0, 0
; GLOBALNESS1-NEXT: global_store_dword v[0:1], v42, off
; GLOBALNESS1-NEXT: s_waitcnt lgkmcnt(0)
-; GLOBALNESS1-NEXT: global_load_dword v2, v42, s[96:97]
+; GLOBALNESS1-NEXT: global_load_dword v2, v42, s[52:53]
; GLOBALNESS1-NEXT: s_mov_b64 s[48:49], s[4:5]
; GLOBALNESS1-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x18
; GLOBALNESS1-NEXT: s_load_dword s7, s[8:9], 0x20
@@ -49,7 +49,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS1-NEXT: v_mov_b32_e32 v0, 0
; GLOBALNESS1-NEXT: s_addc_u32 s1, s1, 0
; GLOBALNESS1-NEXT: v_mov_b32_e32 v1, 0x40994400
-; GLOBALNESS1-NEXT: s_bitcmp1_b32 s98, 0
+; GLOBALNESS1-NEXT: s_bitcmp1_b32 s54, 0
; GLOBALNESS1-NEXT: s_waitcnt lgkmcnt(0)
; GLOBALNESS1-NEXT: v_cmp_ngt_f64_e32 vcc, s[4:5], v[0:1]
; GLOBALNESS1-NEXT: v_cmp_ngt_f64_e64 s[4:5], s[4:5], 0
@@ -68,7 +68,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS1-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
; GLOBALNESS1-NEXT: s_cselect_b64 s[4:5], -1, 0
; GLOBALNESS1-NEXT: s_xor_b64 s[4:5], s[4:5], -1
-; GLOBALNESS1-NEXT: s_mov_b64 s[46:47], s[8:9]
+; GLOBALNESS1-NEXT: s_mov_b64 s[38:39], s[8:9]
; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[8:9], 1, v1
; GLOBALNESS1-NEXT: ; implicit-def: $vgpr56 : SGPR spill to VGPR lane
; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[66:67], 1, v0
@@ -76,7 +76,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS1-NEXT: v_writelane_b32 v56, s8, 0
; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[68:69], 1, v0
; GLOBALNESS1-NEXT: v_writelane_b32 v56, s9, 1
-; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[62:63], 1, v3
+; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[70:71], 1, v3
; GLOBALNESS1-NEXT: s_mov_b32 s82, s16
; GLOBALNESS1-NEXT: s_mov_b32 s83, s15
; GLOBALNESS1-NEXT: s_mov_b32 s84, s14
@@ -102,8 +102,8 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS1-NEXT: v_writelane_b32 v56, s4, 6
; GLOBALNESS1-NEXT: v_writelane_b32 v56, s5, 7
; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[80:81], 1, v1
-; GLOBALNESS1-NEXT: v_writelane_b32 v56, s62, 8
-; GLOBALNESS1-NEXT: v_writelane_b32 v56, s63, 9
+; GLOBALNESS1-NEXT: v_writelane_b32 v56, s70, 8
+; GLOBALNESS1-NEXT: v_writelane_b32 v56, s71, 9
; GLOBALNESS1-NEXT: s_branch .LBB1_4
; GLOBALNESS1-NEXT: .LBB1_1: ; %bb70.i
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
@@ -127,10 +127,10 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS1-NEXT: v_mov_b32_e32 v0, 0x80
; GLOBALNESS1-NEXT: v_mov_b32_e32 v1, 0
; GLOBALNESS1-NEXT: flat_load_dword v40, v[0:1]
-; GLOBALNESS1-NEXT: s_add_u32 s8, s46, 40
+; GLOBALNESS1-NEXT: s_add_u32 s8, s38, 40
; GLOBALNESS1-NEXT: buffer_store_dword v42, off, s[0:3], 0
; GLOBALNESS1-NEXT: flat_load_dword v46, v[0:1]
-; GLOBALNESS1-NEXT: s_addc_u32 s9, s47, 0
+; GLOBALNESS1-NEXT: s_addc_u32 s9, s39, 0
; GLOBALNESS1-NEXT: s_getpc_b64 s[4:5]
; GLOBALNESS1-NEXT: s_add_u32 s4, s4, wobble at gotpcrel32@lo+4
; GLOBALNESS1-NEXT: s_addc_u32 s5, s5, wobble at gotpcrel32@hi+12
@@ -144,17 +144,17 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v41
; GLOBALNESS1-NEXT: s_waitcnt lgkmcnt(0)
; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[62:63]
+; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[70:71]
; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], -1
; GLOBALNESS1-NEXT: ; implicit-def: $sgpr4_sgpr5
; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_9
; GLOBALNESS1-NEXT: ; %bb.5: ; %NodeBlock
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
-; GLOBALNESS1-NEXT: s_cmp_lt_i32 s99, 1
+; GLOBALNESS1-NEXT: s_cmp_lt_i32 s55, 1
; GLOBALNESS1-NEXT: s_cbranch_scc1 .LBB1_7
; GLOBALNESS1-NEXT: ; %bb.6: ; %LeafBlock12
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
-; GLOBALNESS1-NEXT: s_cmp_lg_u32 s99, 1
+; GLOBALNESS1-NEXT: s_cmp_lg_u32 s55, 1
; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], -1
; GLOBALNESS1-NEXT: s_cselect_b64 s[6:7], -1, 0
; GLOBALNESS1-NEXT: s_cbranch_execz .LBB1_8
@@ -164,7 +164,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS1-NEXT: ; implicit-def: $sgpr4_sgpr5
; GLOBALNESS1-NEXT: .LBB1_8: ; %LeafBlock
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
-; GLOBALNESS1-NEXT: s_cmp_lg_u32 s99, 0
+; GLOBALNESS1-NEXT: s_cmp_lg_u32 s55, 0
; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], 0
; GLOBALNESS1-NEXT: s_cselect_b64 s[6:7], -1, 0
; GLOBALNESS1-NEXT: .LBB1_9: ; %Flow25
@@ -176,10 +176,10 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[2:3], 0, 0
; GLOBALNESS1-NEXT: flat_load_dword v0, v[2:3]
; GLOBALNESS1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GLOBALNESS1-NEXT: v_cmp_gt_i32_e64 s[94:95], 0, v0
+; GLOBALNESS1-NEXT: v_cmp_gt_i32_e64 s[86:87], 0, v0
; GLOBALNESS1-NEXT: v_mov_b32_e32 v0, 0
; GLOBALNESS1-NEXT: v_mov_b32_e32 v1, 0x3ff00000
-; GLOBALNESS1-NEXT: s_and_saveexec_b64 s[52:53], s[94:95]
+; GLOBALNESS1-NEXT: s_and_saveexec_b64 s[52:53], s[86:87]
; GLOBALNESS1-NEXT: s_cbranch_execz .LBB1_26
; GLOBALNESS1-NEXT: ; %bb.11: ; %bb33.i
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
@@ -238,33 +238,33 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_15
; GLOBALNESS1-NEXT: ; %bb.22: ; %bb55.i
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_16 Depth=2
-; GLOBALNESS1-NEXT: s_add_u32 s78, s46, 40
-; GLOBALNESS1-NEXT: s_addc_u32 s79, s47, 0
+; GLOBALNESS1-NEXT: s_add_u32 s70, s38, 40
+; GLOBALNESS1-NEXT: s_addc_u32 s71, s39, 0
; GLOBALNESS1-NEXT: s_getpc_b64 s[4:5]
; GLOBALNESS1-NEXT: s_add_u32 s4, s4, wobble at gotpcrel32@lo+4
; GLOBALNESS1-NEXT: s_addc_u32 s5, s5, wobble at gotpcrel32@hi+12
-; GLOBALNESS1-NEXT: s_load_dwordx2 s[62:63], s[4:5], 0x0
+; GLOBALNESS1-NEXT: s_load_dwordx2 s[54:55], s[4:5], 0x0
; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[48:49]
; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], s[36:37]
-; GLOBALNESS1-NEXT: s_mov_b64 s[8:9], s[78:79]
+; GLOBALNESS1-NEXT: s_mov_b64 s[8:9], s[70:71]
; GLOBALNESS1-NEXT: s_mov_b64 s[10:11], s[34:35]
; GLOBALNESS1-NEXT: s_mov_b32 s12, s84
; GLOBALNESS1-NEXT: s_mov_b32 s13, s83
; GLOBALNESS1-NEXT: s_mov_b32 s14, s82
; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v41
; GLOBALNESS1-NEXT: s_waitcnt lgkmcnt(0)
-; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[62:63]
+; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[54:55]
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[46:47], 0, 0
; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[48:49]
; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], s[36:37]
-; GLOBALNESS1-NEXT: s_mov_b64 s[8:9], s[78:79]
+; GLOBALNESS1-NEXT: s_mov_b64 s[8:9], s[70:71]
; GLOBALNESS1-NEXT: s_mov_b64 s[10:11], s[34:35]
; GLOBALNESS1-NEXT: s_mov_b32 s12, s84
; GLOBALNESS1-NEXT: s_mov_b32 s13, s83
; GLOBALNESS1-NEXT: s_mov_b32 s14, s82
; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v41
; GLOBALNESS1-NEXT: global_store_dwordx2 v[46:47], v[44:45], off
-; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[62:63]
+; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[54:55]
; GLOBALNESS1-NEXT: s_and_saveexec_b64 s[4:5], s[96:97]
; GLOBALNESS1-NEXT: s_cbranch_execz .LBB1_14
; GLOBALNESS1-NEXT: ; %bb.23: ; %bb62.i
@@ -278,14 +278,16 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS1-NEXT: s_branch .LBB1_3
; GLOBALNESS1-NEXT: .LBB1_25: ; %Flow23
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
-; GLOBALNESS1-NEXT: s_load_dwordx4 s[96:99], s[46:47], 0x0
-; GLOBALNESS1-NEXT: v_readlane_b32 s62, v56, 8
+; GLOBALNESS1-NEXT: s_load_dwordx4 s[4:7], s[38:39], 0x0
+; GLOBALNESS1-NEXT: v_readlane_b32 s70, v56, 8
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[0:1], 0, 0
-; GLOBALNESS1-NEXT: v_readlane_b32 s63, v56, 9
+; GLOBALNESS1-NEXT: v_readlane_b32 s71, v56, 9
+; GLOBALNESS1-NEXT: s_waitcnt lgkmcnt(0)
+; GLOBALNESS1-NEXT: s_mov_b32 s55, s7
; GLOBALNESS1-NEXT: .LBB1_26: ; %Flow24
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS1-NEXT: s_or_b64 exec, exec, s[52:53]
-; GLOBALNESS1-NEXT: s_and_saveexec_b64 s[4:5], s[94:95]
+; GLOBALNESS1-NEXT: s_and_saveexec_b64 s[4:5], s[86:87]
; GLOBALNESS1-NEXT: s_cbranch_execz .LBB1_2
; GLOBALNESS1-NEXT: ; %bb.27: ; %bb67.i
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
@@ -310,8 +312,8 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], -1
; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_32
; GLOBALNESS1-NEXT: ; %bb.31: ; %bb7.i.i
-; GLOBALNESS1-NEXT: s_add_u32 s8, s46, 40
-; GLOBALNESS1-NEXT: s_addc_u32 s9, s47, 0
+; GLOBALNESS1-NEXT: s_add_u32 s8, s38, 40
+; GLOBALNESS1-NEXT: s_addc_u32 s9, s39, 0
; GLOBALNESS1-NEXT: s_getpc_b64 s[16:17]
; GLOBALNESS1-NEXT: s_add_u32 s16, s16, widget at rel32@lo+4
; GLOBALNESS1-NEXT: s_addc_u32 s17, s17, widget at rel32@hi+12
@@ -328,8 +330,8 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[4:5]
; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_34
; GLOBALNESS1-NEXT: ; %bb.33: ; %bb11.i.i
-; GLOBALNESS1-NEXT: s_add_u32 s8, s46, 40
-; GLOBALNESS1-NEXT: s_addc_u32 s9, s47, 0
+; GLOBALNESS1-NEXT: s_add_u32 s8, s38, 40
+; GLOBALNESS1-NEXT: s_addc_u32 s9, s39, 0
; GLOBALNESS1-NEXT: s_getpc_b64 s[16:17]
; GLOBALNESS1-NEXT: s_add_u32 s16, s16, widget at rel32@lo+4
; GLOBALNESS1-NEXT: s_addc_u32 s17, s17, widget at rel32@hi+12
@@ -346,14 +348,14 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS0-LABEL: kernel:
; GLOBALNESS0: ; %bb.0: ; %bb
; GLOBALNESS0-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GLOBALNESS0-NEXT: s_load_dwordx4 s[96:99], s[8:9], 0x0
+; GLOBALNESS0-NEXT: s_load_dwordx4 s[52:55], s[8:9], 0x0
; GLOBALNESS0-NEXT: s_load_dword s6, s[8:9], 0x14
; GLOBALNESS0-NEXT: v_mov_b32_e32 v41, v0
; GLOBALNESS0-NEXT: v_mov_b32_e32 v42, 0
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[0:1], 0, 0
; GLOBALNESS0-NEXT: global_store_dword v[0:1], v42, off
; GLOBALNESS0-NEXT: s_waitcnt lgkmcnt(0)
-; GLOBALNESS0-NEXT: global_load_dword v2, v42, s[96:97]
+; GLOBALNESS0-NEXT: global_load_dword v2, v42, s[52:53]
; GLOBALNESS0-NEXT: s_mov_b64 s[48:49], s[4:5]
; GLOBALNESS0-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x18
; GLOBALNESS0-NEXT: s_load_dword s7, s[8:9], 0x20
@@ -363,7 +365,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS0-NEXT: v_mov_b32_e32 v0, 0
; GLOBALNESS0-NEXT: s_addc_u32 s1, s1, 0
; GLOBALNESS0-NEXT: v_mov_b32_e32 v1, 0x40994400
-; GLOBALNESS0-NEXT: s_bitcmp1_b32 s98, 0
+; GLOBALNESS0-NEXT: s_bitcmp1_b32 s54, 0
; GLOBALNESS0-NEXT: s_waitcnt lgkmcnt(0)
; GLOBALNESS0-NEXT: v_cmp_ngt_f64_e32 vcc, s[4:5], v[0:1]
; GLOBALNESS0-NEXT: v_cmp_ngt_f64_e64 s[4:5], s[4:5], 0
@@ -382,7 +384,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS0-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
; GLOBALNESS0-NEXT: s_cselect_b64 s[4:5], -1, 0
; GLOBALNESS0-NEXT: s_xor_b64 s[4:5], s[4:5], -1
-; GLOBALNESS0-NEXT: s_mov_b64 s[46:47], s[8:9]
+; GLOBALNESS0-NEXT: s_mov_b64 s[38:39], s[8:9]
; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[8:9], 1, v1
; GLOBALNESS0-NEXT: ; implicit-def: $vgpr56 : SGPR spill to VGPR lane
; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[66:67], 1, v0
@@ -390,9 +392,9 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS0-NEXT: v_writelane_b32 v56, s8, 0
; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[68:69], 1, v0
; GLOBALNESS0-NEXT: v_writelane_b32 v56, s9, 1
-; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[62:63], 1, v3
-; GLOBALNESS0-NEXT: s_mov_b32 s78, s16
-; GLOBALNESS0-NEXT: s_mov_b32 s79, s15
+; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[84:85], 1, v3
+; GLOBALNESS0-NEXT: s_mov_b32 s70, s16
+; GLOBALNESS0-NEXT: s_mov_b32 s71, s15
; GLOBALNESS0-NEXT: s_mov_b32 s82, s14
; GLOBALNESS0-NEXT: s_mov_b64 s[34:35], s[10:11]
; GLOBALNESS0-NEXT: s_mov_b32 s32, 0
@@ -416,8 +418,8 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS0-NEXT: v_writelane_b32 v56, s4, 6
; GLOBALNESS0-NEXT: v_writelane_b32 v56, s5, 7
; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[80:81], 1, v1
-; GLOBALNESS0-NEXT: v_writelane_b32 v56, s62, 8
-; GLOBALNESS0-NEXT: v_writelane_b32 v56, s63, 9
+; GLOBALNESS0-NEXT: v_writelane_b32 v56, s84, 8
+; GLOBALNESS0-NEXT: v_writelane_b32 v56, s85, 9
; GLOBALNESS0-NEXT: s_branch .LBB1_4
; GLOBALNESS0-NEXT: .LBB1_1: ; %bb70.i
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
@@ -441,10 +443,10 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS0-NEXT: v_mov_b32_e32 v0, 0x80
; GLOBALNESS0-NEXT: v_mov_b32_e32 v1, 0
; GLOBALNESS0-NEXT: flat_load_dword v40, v[0:1]
-; GLOBALNESS0-NEXT: s_add_u32 s8, s46, 40
+; GLOBALNESS0-NEXT: s_add_u32 s8, s38, 40
; GLOBALNESS0-NEXT: buffer_store_dword v42, off, s[0:3], 0
; GLOBALNESS0-NEXT: flat_load_dword v46, v[0:1]
-; GLOBALNESS0-NEXT: s_addc_u32 s9, s47, 0
+; GLOBALNESS0-NEXT: s_addc_u32 s9, s39, 0
; GLOBALNESS0-NEXT: s_getpc_b64 s[4:5]
; GLOBALNESS0-NEXT: s_add_u32 s4, s4, wobble at gotpcrel32@lo+4
; GLOBALNESS0-NEXT: s_addc_u32 s5, s5, wobble at gotpcrel32@hi+12
@@ -453,22 +455,22 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], s[36:37]
; GLOBALNESS0-NEXT: s_mov_b64 s[10:11], s[34:35]
; GLOBALNESS0-NEXT: s_mov_b32 s12, s82
-; GLOBALNESS0-NEXT: s_mov_b32 s13, s79
-; GLOBALNESS0-NEXT: s_mov_b32 s14, s78
+; GLOBALNESS0-NEXT: s_mov_b32 s13, s71
+; GLOBALNESS0-NEXT: s_mov_b32 s14, s70
; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41
; GLOBALNESS0-NEXT: s_waitcnt lgkmcnt(0)
; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[62:63]
+; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[84:85]
; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], -1
; GLOBALNESS0-NEXT: ; implicit-def: $sgpr4_sgpr5
; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_9
; GLOBALNESS0-NEXT: ; %bb.5: ; %NodeBlock
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
-; GLOBALNESS0-NEXT: s_cmp_lt_i32 s99, 1
+; GLOBALNESS0-NEXT: s_cmp_lt_i32 s55, 1
; GLOBALNESS0-NEXT: s_cbranch_scc1 .LBB1_7
; GLOBALNESS0-NEXT: ; %bb.6: ; %LeafBlock12
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
-; GLOBALNESS0-NEXT: s_cmp_lg_u32 s99, 1
+; GLOBALNESS0-NEXT: s_cmp_lg_u32 s55, 1
; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], -1
; GLOBALNESS0-NEXT: s_cselect_b64 s[6:7], -1, 0
; GLOBALNESS0-NEXT: s_cbranch_execz .LBB1_8
@@ -478,7 +480,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS0-NEXT: ; implicit-def: $sgpr4_sgpr5
; GLOBALNESS0-NEXT: .LBB1_8: ; %LeafBlock
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
-; GLOBALNESS0-NEXT: s_cmp_lg_u32 s99, 0
+; GLOBALNESS0-NEXT: s_cmp_lg_u32 s55, 0
; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], 0
; GLOBALNESS0-NEXT: s_cselect_b64 s[6:7], -1, 0
; GLOBALNESS0-NEXT: .LBB1_9: ; %Flow25
@@ -490,17 +492,17 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[2:3], 0, 0
; GLOBALNESS0-NEXT: flat_load_dword v0, v[2:3]
; GLOBALNESS0-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GLOBALNESS0-NEXT: v_cmp_gt_i32_e64 s[94:95], 0, v0
+; GLOBALNESS0-NEXT: v_cmp_gt_i32_e64 s[86:87], 0, v0
; GLOBALNESS0-NEXT: v_mov_b32_e32 v0, 0
; GLOBALNESS0-NEXT: v_mov_b32_e32 v1, 0x3ff00000
-; GLOBALNESS0-NEXT: s_and_saveexec_b64 s[52:53], s[94:95]
+; GLOBALNESS0-NEXT: s_and_saveexec_b64 s[52:53], s[86:87]
; GLOBALNESS0-NEXT: s_cbranch_execz .LBB1_26
; GLOBALNESS0-NEXT: ; %bb.11: ; %bb33.i
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS0-NEXT: global_load_dwordx2 v[0:1], v[2:3], off
; GLOBALNESS0-NEXT: v_readlane_b32 s4, v56, 2
; GLOBALNESS0-NEXT: v_readlane_b32 s5, v56, 3
-; GLOBALNESS0-NEXT: s_mov_b32 s83, s99
+; GLOBALNESS0-NEXT: s_mov_b32 s83, s55
; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[4:5]
; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_13
; GLOBALNESS0-NEXT: ; %bb.12: ; %bb39.i
@@ -553,33 +555,33 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_15
; GLOBALNESS0-NEXT: ; %bb.22: ; %bb55.i
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_16 Depth=2
-; GLOBALNESS0-NEXT: s_add_u32 s84, s46, 40
-; GLOBALNESS0-NEXT: s_addc_u32 s85, s47, 0
+; GLOBALNESS0-NEXT: s_add_u32 s84, s38, 40
+; GLOBALNESS0-NEXT: s_addc_u32 s85, s39, 0
; GLOBALNESS0-NEXT: s_getpc_b64 s[4:5]
; GLOBALNESS0-NEXT: s_add_u32 s4, s4, wobble at gotpcrel32@lo+4
; GLOBALNESS0-NEXT: s_addc_u32 s5, s5, wobble at gotpcrel32@hi+12
-; GLOBALNESS0-NEXT: s_load_dwordx2 s[62:63], s[4:5], 0x0
+; GLOBALNESS0-NEXT: s_load_dwordx2 s[54:55], s[4:5], 0x0
; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[48:49]
; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], s[36:37]
; GLOBALNESS0-NEXT: s_mov_b64 s[8:9], s[84:85]
; GLOBALNESS0-NEXT: s_mov_b64 s[10:11], s[34:35]
; GLOBALNESS0-NEXT: s_mov_b32 s12, s82
-; GLOBALNESS0-NEXT: s_mov_b32 s13, s79
-; GLOBALNESS0-NEXT: s_mov_b32 s14, s78
+; GLOBALNESS0-NEXT: s_mov_b32 s13, s71
+; GLOBALNESS0-NEXT: s_mov_b32 s14, s70
; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41
; GLOBALNESS0-NEXT: s_waitcnt lgkmcnt(0)
-; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[62:63]
+; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[54:55]
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[46:47], 0, 0
; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[48:49]
; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], s[36:37]
; GLOBALNESS0-NEXT: s_mov_b64 s[8:9], s[84:85]
; GLOBALNESS0-NEXT: s_mov_b64 s[10:11], s[34:35]
; GLOBALNESS0-NEXT: s_mov_b32 s12, s82
-; GLOBALNESS0-NEXT: s_mov_b32 s13, s79
-; GLOBALNESS0-NEXT: s_mov_b32 s14, s78
+; GLOBALNESS0-NEXT: s_mov_b32 s13, s71
+; GLOBALNESS0-NEXT: s_mov_b32 s14, s70
; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41
; GLOBALNESS0-NEXT: global_store_dwordx2 v[46:47], v[44:45], off
-; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[62:63]
+; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[54:55]
; GLOBALNESS0-NEXT: s_and_saveexec_b64 s[4:5], s[96:97]
; GLOBALNESS0-NEXT: s_cbranch_execz .LBB1_14
; GLOBALNESS0-NEXT: ; %bb.23: ; %bb62.i
@@ -593,14 +595,14 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS0-NEXT: s_branch .LBB1_3
; GLOBALNESS0-NEXT: .LBB1_25: ; %Flow23
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
-; GLOBALNESS0-NEXT: v_readlane_b32 s62, v56, 8
+; GLOBALNESS0-NEXT: v_readlane_b32 s84, v56, 8
; GLOBALNESS0-NEXT: v_pk_mov_b32 v[0:1], 0, 0
-; GLOBALNESS0-NEXT: s_mov_b32 s99, s83
-; GLOBALNESS0-NEXT: v_readlane_b32 s63, v56, 9
+; GLOBALNESS0-NEXT: s_mov_b32 s55, s83
+; GLOBALNESS0-NEXT: v_readlane_b32 s85, v56, 9
; GLOBALNESS0-NEXT: .LBB1_26: ; %Flow24
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
; GLOBALNESS0-NEXT: s_or_b64 exec, exec, s[52:53]
-; GLOBALNESS0-NEXT: s_and_saveexec_b64 s[4:5], s[94:95]
+; GLOBALNESS0-NEXT: s_and_saveexec_b64 s[4:5], s[86:87]
; GLOBALNESS0-NEXT: s_cbranch_execz .LBB1_2
; GLOBALNESS0-NEXT: ; %bb.27: ; %bb67.i
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1
@@ -625,8 +627,8 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], -1
; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_32
; GLOBALNESS0-NEXT: ; %bb.31: ; %bb7.i.i
-; GLOBALNESS0-NEXT: s_add_u32 s8, s46, 40
-; GLOBALNESS0-NEXT: s_addc_u32 s9, s47, 0
+; GLOBALNESS0-NEXT: s_add_u32 s8, s38, 40
+; GLOBALNESS0-NEXT: s_addc_u32 s9, s39, 0
; GLOBALNESS0-NEXT: s_getpc_b64 s[16:17]
; GLOBALNESS0-NEXT: s_add_u32 s16, s16, widget at rel32@lo+4
; GLOBALNESS0-NEXT: s_addc_u32 s17, s17, widget at rel32@hi+12
@@ -634,8 +636,8 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], s[36:37]
; GLOBALNESS0-NEXT: s_mov_b64 s[10:11], s[34:35]
; GLOBALNESS0-NEXT: s_mov_b32 s12, s82
-; GLOBALNESS0-NEXT: s_mov_b32 s13, s79
-; GLOBALNESS0-NEXT: s_mov_b32 s14, s78
+; GLOBALNESS0-NEXT: s_mov_b32 s13, s71
+; GLOBALNESS0-NEXT: s_mov_b32 s14, s70
; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41
; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], 0
@@ -643,8 +645,8 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[4:5]
; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_34
; GLOBALNESS0-NEXT: ; %bb.33: ; %bb11.i.i
-; GLOBALNESS0-NEXT: s_add_u32 s8, s46, 40
-; GLOBALNESS0-NEXT: s_addc_u32 s9, s47, 0
+; GLOBALNESS0-NEXT: s_add_u32 s8, s38, 40
+; GLOBALNESS0-NEXT: s_addc_u32 s9, s39, 0
; GLOBALNESS0-NEXT: s_getpc_b64 s[16:17]
; GLOBALNESS0-NEXT: s_add_u32 s16, s16, widget at rel32@lo+4
; GLOBALNESS0-NEXT: s_addc_u32 s17, s17, widget at rel32@hi+12
@@ -652,8 +654,8 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], s[36:37]
; GLOBALNESS0-NEXT: s_mov_b64 s[10:11], s[34:35]
; GLOBALNESS0-NEXT: s_mov_b32 s12, s82
-; GLOBALNESS0-NEXT: s_mov_b32 s13, s79
-; GLOBALNESS0-NEXT: s_mov_b32 s14, s78
+; GLOBALNESS0-NEXT: s_mov_b32 s13, s71
+; GLOBALNESS0-NEXT: s_mov_b32 s14, s70
; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41
; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GLOBALNESS0-NEXT: .LBB1_34: ; %UnifiedUnreachableBlock
diff --git a/llvm/test/CodeGen/AMDGPU/unallocatable-bundle-regression.mir b/llvm/test/CodeGen/AMDGPU/unallocatable-bundle-regression.mir
index edb1f74d738f5..0df2e651a15e1 100644
--- a/llvm/test/CodeGen/AMDGPU/unallocatable-bundle-regression.mir
+++ b/llvm/test/CodeGen/AMDGPU/unallocatable-bundle-regression.mir
@@ -27,11 +27,11 @@ body: |
; CHECK-NEXT: renamable $sgpr4 = COPY $sgpr0
; CHECK-NEXT: SI_SPILL_S128_SAVE $sgpr0_sgpr1_sgpr2_sgpr3, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s128) into %stack.0, align 4, addrspace 5)
; CHECK-NEXT: renamable $sgpr5 = S_MOV_B32 0
- ; CHECK-NEXT: renamable $sgpr88 = COPY renamable $sgpr5
- ; CHECK-NEXT: renamable $sgpr89 = COPY renamable $sgpr5
- ; CHECK-NEXT: renamable $sgpr90 = COPY renamable $sgpr5
+ ; CHECK-NEXT: renamable $sgpr36 = COPY renamable $sgpr5
+ ; CHECK-NEXT: renamable $sgpr37 = COPY renamable $sgpr5
+ ; CHECK-NEXT: renamable $sgpr38 = COPY renamable $sgpr5
; CHECK-NEXT: renamable $sgpr0 = S_MOV_B32 1056964608
- ; CHECK-NEXT: renamable $sgpr91 = COPY renamable $sgpr5
+ ; CHECK-NEXT: renamable $sgpr39 = COPY renamable $sgpr5
; CHECK-NEXT: renamable $sgpr1 = COPY renamable $sgpr0
; CHECK-NEXT: renamable $sgpr8 = COPY renamable $sgpr5
; CHECK-NEXT: renamable $sgpr9 = COPY renamable $sgpr5
@@ -43,46 +43,46 @@ body: |
; CHECK-NEXT: renamable $sgpr15 = COPY renamable $sgpr5
; CHECK-NEXT: renamable $vgpr5_vgpr6 = COPY killed renamable $sgpr0_sgpr1
; CHECK-NEXT: renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1088, 0 :: (dereferenceable load (s256), addrspace 6)
- ; CHECK-NEXT: renamable $sgpr76_sgpr77_sgpr78_sgpr79 = S_LOAD_DWORDX4_IMM renamable $sgpr4_sgpr5, 0, 0 :: (load (s128), addrspace 6)
+ ; CHECK-NEXT: renamable $sgpr64_sgpr65_sgpr66_sgpr67 = S_LOAD_DWORDX4_IMM renamable $sgpr4_sgpr5, 0, 0 :: (load (s128), addrspace 6)
; CHECK-NEXT: renamable $sgpr0 = S_MOV_B32 1200
; CHECK-NEXT: renamable $sgpr1 = COPY renamable $sgpr5
- ; CHECK-NEXT: renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1152, 0 :: (dereferenceable load (s256), addrspace 6)
- ; CHECK-NEXT: renamable $sgpr80_sgpr81_sgpr82_sgpr83 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 0, 0 :: (load (s128), addrspace 6)
+ ; CHECK-NEXT: renamable $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1152, 0 :: (dereferenceable load (s256), addrspace 6)
+ ; CHECK-NEXT: renamable $sgpr68_sgpr69_sgpr70_sgpr71 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 0, 0 :: (load (s128), addrspace 6)
; CHECK-NEXT: KILL killed renamable $sgpr0, renamable $sgpr1
; CHECK-NEXT: renamable $sgpr0 = S_MOV_B32 1264
; CHECK-NEXT: renamable $sgpr1 = COPY renamable $sgpr5
- ; CHECK-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1216, 0 :: (dereferenceable load (s256), addrspace 6)
- ; CHECK-NEXT: renamable $sgpr84_sgpr85_sgpr86_sgpr87 = S_LOAD_DWORDX4_IMM killed renamable $sgpr0_sgpr1, 0, 0 :: (load (s128), addrspace 6)
+ ; CHECK-NEXT: renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1216, 0 :: (dereferenceable load (s256), addrspace 6)
+ ; CHECK-NEXT: renamable $sgpr80_sgpr81_sgpr82_sgpr83 = S_LOAD_DWORDX4_IMM killed renamable $sgpr0_sgpr1, 0, 0 :: (load (s128), addrspace 6)
; CHECK-NEXT: renamable $sgpr0 = S_MOV_B32 1328
; CHECK-NEXT: renamable $sgpr1 = COPY renamable $sgpr5
- ; CHECK-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1280, 0 :: (dereferenceable load (s256), addrspace 6)
- ; CHECK-NEXT: renamable $sgpr92_sgpr93_sgpr94_sgpr95 = S_LOAD_DWORDX4_IMM killed renamable $sgpr0_sgpr1, 0, 0 :: (load (s128), addrspace 6)
- ; CHECK-NEXT: renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1344, 0 :: (dereferenceable load (s256), addrspace 6)
+ ; CHECK-NEXT: renamable $sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1280, 0 :: (dereferenceable load (s256), addrspace 6)
+ ; CHECK-NEXT: renamable $sgpr84_sgpr85_sgpr86_sgpr87 = S_LOAD_DWORDX4_IMM killed renamable $sgpr0_sgpr1, 0, 0 :: (load (s128), addrspace 6)
+ ; CHECK-NEXT: renamable $sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1344, 0 :: (dereferenceable load (s256), addrspace 6)
; CHECK-NEXT: renamable $sgpr0 = S_MOV_B32 1392
; CHECK-NEXT: renamable $sgpr1 = COPY renamable $sgpr5
- ; CHECK-NEXT: renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 0, 0 :: (load (s256), addrspace 6)
+ ; CHECK-NEXT: renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 0, 0 :: (load (s256), addrspace 6)
; CHECK-NEXT: renamable $sgpr2 = S_MOV_B32 1456
; CHECK-NEXT: renamable $sgpr3 = COPY renamable $sgpr5
- ; CHECK-NEXT: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1472, 0 :: (dereferenceable load (s256), addrspace 6)
+ ; CHECK-NEXT: renamable $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1472, 0 :: (dereferenceable load (s256), addrspace 6)
; CHECK-NEXT: renamable $sgpr4 = S_MOV_B32 1520
; CHECK-NEXT: renamable $sgpr96_sgpr97_sgpr98_sgpr99 = S_LOAD_DWORDX4_IMM killed renamable $sgpr2_sgpr3, 0, 0 :: (load (s128), addrspace 6)
; CHECK-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (load (s128), addrspace 6)
; CHECK-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr0_sgpr1, 0, 0 :: (load (s128), addrspace 6)
- ; CHECK-NEXT: renamable $vgpr7 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, killed renamable $sgpr88_sgpr89_sgpr90_sgpr91, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128))
- ; CHECK-NEXT: renamable $vgpr8 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, killed renamable $sgpr76_sgpr77_sgpr78_sgpr79, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128))
- ; CHECK-NEXT: renamable $vgpr9 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, killed renamable $sgpr80_sgpr81_sgpr82_sgpr83, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128))
- ; CHECK-NEXT: renamable $vgpr10 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43, killed renamable $sgpr84_sgpr85_sgpr86_sgpr87, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128))
- ; CHECK-NEXT: renamable $vgpr11 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, killed renamable $sgpr92_sgpr93_sgpr94_sgpr95, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128))
- ; CHECK-NEXT: renamable $vgpr12 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67, killed renamable $sgpr96_sgpr97_sgpr98_sgpr99, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128))
- ; CHECK-NEXT: renamable $vgpr13 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, renamable $sgpr4_sgpr5_sgpr6_sgpr7, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128))
- ; CHECK-NEXT: renamable $vgpr14 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128))
+ ; CHECK-NEXT: renamable $vgpr7 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, killed renamable $sgpr36_sgpr37_sgpr38_sgpr39, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128))
+ ; CHECK-NEXT: renamable $vgpr8 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, killed renamable $sgpr64_sgpr65_sgpr66_sgpr67, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128))
+ ; CHECK-NEXT: renamable $vgpr9 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47, killed renamable $sgpr68_sgpr69_sgpr70_sgpr71, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128))
+ ; CHECK-NEXT: renamable $vgpr10 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, killed renamable $sgpr80_sgpr81_sgpr82_sgpr83, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128))
+ ; CHECK-NEXT: renamable $vgpr11 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79, killed renamable $sgpr84_sgpr85_sgpr86_sgpr87, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128))
+ ; CHECK-NEXT: renamable $vgpr12 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, killed renamable $sgpr96_sgpr97_sgpr98_sgpr99, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128))
+ ; CHECK-NEXT: renamable $vgpr13 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55, renamable $sgpr4_sgpr5_sgpr6_sgpr7, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128))
+ ; CHECK-NEXT: renamable $vgpr14 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128))
; CHECK-NEXT: renamable $sgpr8_sgpr9_sgpr10_sgpr11 = SI_SPILL_S128_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s128) from %stack.0, align 4, addrspace 5)
; CHECK-NEXT: renamable $vgpr1_vgpr2_vgpr3_vgpr4 = BUFFER_LOAD_FORMAT_XYZW_IDXEN renamable $vgpr0, renamable $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7)
; CHECK-NEXT: KILL killed renamable $sgpr4_sgpr5_sgpr6_sgpr7
- ; CHECK-NEXT: KILL killed renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75
+ ; CHECK-NEXT: KILL killed renamable $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55
; CHECK-NEXT: KILL killed renamable $vgpr5_vgpr6
; CHECK-NEXT: KILL killed renamable $sgpr0_sgpr1_sgpr2_sgpr3
- ; CHECK-NEXT: KILL killed renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59
+ ; CHECK-NEXT: KILL killed renamable $sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
; CHECK-NEXT: KILL killed renamable $sgpr8_sgpr9_sgpr10_sgpr11
; CHECK-NEXT: KILL killed renamable $vgpr0
; CHECK-NEXT: renamable $vgpr0 = nofpexcept V_MAX_F32_e32 killed $vgpr7, killed $vgpr8, implicit $mode, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll b/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll
index 9afa0e2bb2dcd..f08e5be0fd742 100644
--- a/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll
+++ b/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll
@@ -20,16 +20,16 @@ define hidden void @widget() {
; GCN-NEXT: v_writelane_b32 v41, s35, 3
; GCN-NEXT: v_writelane_b32 v41, s36, 4
; GCN-NEXT: v_writelane_b32 v41, s37, 5
-; GCN-NEXT: v_writelane_b32 v41, s46, 6
-; GCN-NEXT: v_writelane_b32 v41, s47, 7
+; GCN-NEXT: v_writelane_b32 v41, s38, 6
+; GCN-NEXT: v_writelane_b32 v41, s39, 7
; GCN-NEXT: v_writelane_b32 v41, s48, 8
; GCN-NEXT: v_writelane_b32 v41, s49, 9
; GCN-NEXT: v_writelane_b32 v41, s50, 10
; GCN-NEXT: v_writelane_b32 v41, s51, 11
; GCN-NEXT: v_writelane_b32 v41, s52, 12
; GCN-NEXT: v_writelane_b32 v41, s53, 13
-; GCN-NEXT: v_writelane_b32 v41, s62, 14
-; GCN-NEXT: v_writelane_b32 v41, s63, 15
+; GCN-NEXT: v_writelane_b32 v41, s54, 14
+; GCN-NEXT: v_writelane_b32 v41, s55, 15
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: flat_load_dword v0, v[0:1]
@@ -37,7 +37,7 @@ define hidden void @widget() {
; GCN-NEXT: s_mov_b64 s[16:17], 0
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_gt_i32_e32 vcc, 21, v0
-; GCN-NEXT: s_mov_b64 s[62:63], 0
+; GCN-NEXT: s_mov_b64 s[54:55], 0
; GCN-NEXT: s_mov_b64 s[18:19], 0
; GCN-NEXT: s_cbranch_vccz .LBB0_9
; GCN-NEXT: ; %bb.1: ; %Flow
@@ -52,7 +52,7 @@ define hidden void @widget() {
; GCN-NEXT: s_addc_u32 s17, s17, wibble at rel32@hi+12
; GCN-NEXT: s_mov_b64 s[34:35], s[4:5]
; GCN-NEXT: s_mov_b64 s[36:37], s[6:7]
-; GCN-NEXT: s_mov_b64 s[46:47], s[8:9]
+; GCN-NEXT: s_mov_b64 s[38:39], s[8:9]
; GCN-NEXT: s_mov_b64 s[48:49], s[10:11]
; GCN-NEXT: s_mov_b32 s50, s12
; GCN-NEXT: s_mov_b32 s51, s13
@@ -67,15 +67,15 @@ define hidden void @widget() {
; GCN-NEXT: s_mov_b32 s15, s53
; GCN-NEXT: s_mov_b64 s[4:5], s[34:35]
; GCN-NEXT: s_mov_b64 s[6:7], s[36:37]
-; GCN-NEXT: s_mov_b64 s[8:9], s[46:47]
+; GCN-NEXT: s_mov_b64 s[8:9], s[38:39]
; GCN-NEXT: s_mov_b64 s[10:11], s[48:49]
; GCN-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
; GCN-NEXT: s_mov_b64 s[16:17], 0
-; GCN-NEXT: s_andn2_b64 s[18:19], s[62:63], exec
+; GCN-NEXT: s_andn2_b64 s[18:19], s[54:55], exec
; GCN-NEXT: s_and_b64 s[20:21], vcc, exec
-; GCN-NEXT: s_or_b64 s[62:63], s[18:19], s[20:21]
+; GCN-NEXT: s_or_b64 s[54:55], s[18:19], s[20:21]
; GCN-NEXT: .LBB0_4: ; %Flow2
-; GCN-NEXT: s_and_saveexec_b64 s[18:19], s[62:63]
+; GCN-NEXT: s_and_saveexec_b64 s[18:19], s[54:55]
; GCN-NEXT: s_xor_b64 s[18:19], exec, s[18:19]
; GCN-NEXT: s_cbranch_execz .LBB0_6
; GCN-NEXT: ; %bb.5: ; %bb12
@@ -93,16 +93,16 @@ define hidden void @widget() {
; GCN-NEXT: s_addc_u32 s17, s17, wibble at rel32@hi+12
; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GCN-NEXT: .LBB0_8: ; %UnifiedReturnBlock
-; GCN-NEXT: v_readlane_b32 s63, v41, 15
-; GCN-NEXT: v_readlane_b32 s62, v41, 14
+; GCN-NEXT: v_readlane_b32 s55, v41, 15
+; GCN-NEXT: v_readlane_b32 s54, v41, 14
; GCN-NEXT: v_readlane_b32 s53, v41, 13
; GCN-NEXT: v_readlane_b32 s52, v41, 12
; GCN-NEXT: v_readlane_b32 s51, v41, 11
; GCN-NEXT: v_readlane_b32 s50, v41, 10
; GCN-NEXT: v_readlane_b32 s49, v41, 9
; GCN-NEXT: v_readlane_b32 s48, v41, 8
-; GCN-NEXT: v_readlane_b32 s47, v41, 7
-; GCN-NEXT: v_readlane_b32 s46, v41, 6
+; GCN-NEXT: v_readlane_b32 s39, v41, 7
+; GCN-NEXT: v_readlane_b32 s38, v41, 6
; GCN-NEXT: v_readlane_b32 s37, v41, 5
; GCN-NEXT: v_readlane_b32 s36, v41, 4
; GCN-NEXT: v_readlane_b32 s35, v41, 3
@@ -119,7 +119,7 @@ define hidden void @widget() {
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
; GCN-NEXT: .LBB0_9: ; %bb2
-; GCN-NEXT: v_cmp_eq_u32_e64 s[62:63], 21, v0
+; GCN-NEXT: v_cmp_eq_u32_e64 s[54:55], 21, v0
; GCN-NEXT: v_cmp_ne_u32_e64 s[18:19], 21, v0
; GCN-NEXT: s_mov_b64 vcc, exec
; GCN-NEXT: s_cbranch_execnz .LBB0_2
@@ -272,34 +272,34 @@ define hidden void @blam() {
; GCN-NEXT: v_writelane_b32 v45, s35, 3
; GCN-NEXT: v_writelane_b32 v45, s36, 4
; GCN-NEXT: v_writelane_b32 v45, s37, 5
-; GCN-NEXT: v_writelane_b32 v45, s46, 6
-; GCN-NEXT: v_writelane_b32 v45, s47, 7
+; GCN-NEXT: v_writelane_b32 v45, s38, 6
+; GCN-NEXT: v_writelane_b32 v45, s39, 7
; GCN-NEXT: v_writelane_b32 v45, s48, 8
; GCN-NEXT: v_writelane_b32 v45, s49, 9
; GCN-NEXT: v_writelane_b32 v45, s50, 10
; GCN-NEXT: v_writelane_b32 v45, s51, 11
; GCN-NEXT: v_writelane_b32 v45, s52, 12
; GCN-NEXT: v_writelane_b32 v45, s53, 13
-; GCN-NEXT: v_writelane_b32 v45, s62, 14
-; GCN-NEXT: v_writelane_b32 v45, s63, 15
+; GCN-NEXT: v_writelane_b32 v45, s54, 14
+; GCN-NEXT: v_writelane_b32 v45, s55, 15
; GCN-NEXT: v_writelane_b32 v45, s64, 16
; GCN-NEXT: v_writelane_b32 v45, s65, 17
; GCN-NEXT: v_writelane_b32 v45, s66, 18
; GCN-NEXT: v_writelane_b32 v45, s67, 19
; GCN-NEXT: v_writelane_b32 v45, s68, 20
; GCN-NEXT: v_writelane_b32 v45, s69, 21
-; GCN-NEXT: v_writelane_b32 v45, s78, 22
-; GCN-NEXT: v_writelane_b32 v45, s79, 23
+; GCN-NEXT: v_writelane_b32 v45, s70, 22
+; GCN-NEXT: v_writelane_b32 v45, s71, 23
; GCN-NEXT: v_writelane_b32 v45, s80, 24
; GCN-NEXT: v_writelane_b32 v45, s81, 25
; GCN-NEXT: v_mov_b32_e32 v40, v31
-; GCN-NEXT: s_mov_b32 s62, s15
-; GCN-NEXT: s_mov_b32 s63, s14
+; GCN-NEXT: s_mov_b32 s54, s15
+; GCN-NEXT: s_mov_b32 s55, s14
; GCN-NEXT: s_mov_b32 s64, s13
; GCN-NEXT: s_mov_b32 s65, s12
; GCN-NEXT: s_mov_b64 s[34:35], s[10:11]
; GCN-NEXT: s_mov_b64 s[36:37], s[8:9]
-; GCN-NEXT: s_mov_b64 s[46:47], s[6:7]
+; GCN-NEXT: s_mov_b64 s[38:39], s[6:7]
; GCN-NEXT: s_mov_b64 s[48:49], s[4:5]
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: v_mov_b32_e32 v1, 0
@@ -329,7 +329,7 @@ define hidden void @blam() {
; GCN-NEXT: v_cmp_lt_i32_e32 vcc, 2, v0
; GCN-NEXT: s_mov_b64 s[4:5], -1
; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc
-; GCN-NEXT: s_xor_b64 s[78:79], exec, s[8:9]
+; GCN-NEXT: s_xor_b64 s[70:71], exec, s[8:9]
; GCN-NEXT: s_cbranch_execz .LBB1_12
; GCN-NEXT: ; %bb.3: ; %bb6
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
@@ -342,13 +342,13 @@ define hidden void @blam() {
; GCN-NEXT: s_add_u32 s16, s16, spam at rel32@lo+4
; GCN-NEXT: s_addc_u32 s17, s17, spam at rel32@hi+12
; GCN-NEXT: s_mov_b64 s[4:5], s[48:49]
-; GCN-NEXT: s_mov_b64 s[6:7], s[46:47]
+; GCN-NEXT: s_mov_b64 s[6:7], s[38:39]
; GCN-NEXT: s_mov_b64 s[8:9], s[36:37]
; GCN-NEXT: s_mov_b64 s[10:11], s[34:35]
; GCN-NEXT: s_mov_b32 s12, s65
; GCN-NEXT: s_mov_b32 s13, s64
-; GCN-NEXT: s_mov_b32 s14, s63
-; GCN-NEXT: s_mov_b32 s15, s62
+; GCN-NEXT: s_mov_b32 s14, s55
+; GCN-NEXT: s_mov_b32 s15, s54
; GCN-NEXT: v_mov_b32_e32 v31, v40
; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GCN-NEXT: v_cmp_neq_f32_e32 vcc, 0, v0
@@ -394,7 +394,7 @@ define hidden void @blam() {
; GCN-NEXT: ; implicit-def: $vgpr0
; GCN-NEXT: .LBB1_12: ; %Flow
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
-; GCN-NEXT: s_andn2_saveexec_b64 s[8:9], s[78:79]
+; GCN-NEXT: s_andn2_saveexec_b64 s[8:9], s[70:71]
; GCN-NEXT: s_cbranch_execz .LBB1_16
; GCN-NEXT: ; %bb.13: ; %bb8
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
@@ -429,24 +429,24 @@ define hidden void @blam() {
; GCN-NEXT: s_or_b64 exec, exec, s[66:67]
; GCN-NEXT: v_readlane_b32 s81, v45, 25
; GCN-NEXT: v_readlane_b32 s80, v45, 24
-; GCN-NEXT: v_readlane_b32 s79, v45, 23
-; GCN-NEXT: v_readlane_b32 s78, v45, 22
+; GCN-NEXT: v_readlane_b32 s71, v45, 23
+; GCN-NEXT: v_readlane_b32 s70, v45, 22
; GCN-NEXT: v_readlane_b32 s69, v45, 21
; GCN-NEXT: v_readlane_b32 s68, v45, 20
; GCN-NEXT: v_readlane_b32 s67, v45, 19
; GCN-NEXT: v_readlane_b32 s66, v45, 18
; GCN-NEXT: v_readlane_b32 s65, v45, 17
; GCN-NEXT: v_readlane_b32 s64, v45, 16
-; GCN-NEXT: v_readlane_b32 s63, v45, 15
-; GCN-NEXT: v_readlane_b32 s62, v45, 14
+; GCN-NEXT: v_readlane_b32 s55, v45, 15
+; GCN-NEXT: v_readlane_b32 s54, v45, 14
; GCN-NEXT: v_readlane_b32 s53, v45, 13
; GCN-NEXT: v_readlane_b32 s52, v45, 12
; GCN-NEXT: v_readlane_b32 s51, v45, 11
; GCN-NEXT: v_readlane_b32 s50, v45, 10
; GCN-NEXT: v_readlane_b32 s49, v45, 9
; GCN-NEXT: v_readlane_b32 s48, v45, 8
-; GCN-NEXT: v_readlane_b32 s47, v45, 7
-; GCN-NEXT: v_readlane_b32 s46, v45, 6
+; GCN-NEXT: v_readlane_b32 s39, v45, 7
+; GCN-NEXT: v_readlane_b32 s38, v45, 6
; GCN-NEXT: v_readlane_b32 s37, v45, 5
; GCN-NEXT: v_readlane_b32 s36, v45, 4
; GCN-NEXT: v_readlane_b32 s35, v45, 3
diff --git a/llvm/test/CodeGen/AMDGPU/use_restore_frame_reg.mir b/llvm/test/CodeGen/AMDGPU/use_restore_frame_reg.mir
index 670b7d7b8893b..1e815f76ee149 100644
--- a/llvm/test/CodeGen/AMDGPU/use_restore_frame_reg.mir
+++ b/llvm/test/CodeGen/AMDGPU/use_restore_frame_reg.mir
@@ -37,12 +37,12 @@ body: |
; MUBUF-LABEL: name: use_restore_frame_reg
; MUBUF: bb.0:
; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; MUBUF-NEXT: liveins: $sgpr38, $sgpr39, $vgpr1
+ ; MUBUF-NEXT: liveins: $sgpr40, $sgpr41, $vgpr1
; MUBUF-NEXT: {{ $}}
- ; MUBUF-NEXT: $sgpr38 = frame-setup COPY $sgpr33
+ ; MUBUF-NEXT: $sgpr40 = frame-setup COPY $sgpr33
; MUBUF-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
; MUBUF-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
- ; MUBUF-NEXT: $sgpr39 = frame-setup COPY $sgpr34
+ ; MUBUF-NEXT: $sgpr41 = frame-setup COPY $sgpr34
; MUBUF-NEXT: $sgpr34 = frame-setup COPY $sgpr32
; MUBUF-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 11010048, implicit-def dead $scc
; MUBUF-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
@@ -57,54 +57,54 @@ body: |
; MUBUF-NEXT: {{ $}}
; MUBUF-NEXT: bb.1:
; MUBUF-NEXT: successors: %bb.2(0x80000000)
- ; MUBUF-NEXT: liveins: $sgpr38, $sgpr39
+ ; MUBUF-NEXT: liveins: $sgpr40, $sgpr41
; MUBUF-NEXT: {{ $}}
; MUBUF-NEXT: S_NOP 0
; MUBUF-NEXT: {{ $}}
; MUBUF-NEXT: bb.2:
- ; MUBUF-NEXT: liveins: $sgpr38, $sgpr39
+ ; MUBUF-NEXT: liveins: $sgpr40, $sgpr41
; MUBUF-NEXT: {{ $}}
; MUBUF-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
- ; MUBUF-NEXT: $sgpr34 = frame-destroy COPY $sgpr39
- ; MUBUF-NEXT: $sgpr33 = frame-destroy COPY $sgpr38
+ ; MUBUF-NEXT: $sgpr34 = frame-destroy COPY $sgpr41
+ ; MUBUF-NEXT: $sgpr33 = frame-destroy COPY $sgpr40
; MUBUF-NEXT: S_ENDPGM 0
;
; FLATSCR-LABEL: name: use_restore_frame_reg
; FLATSCR: bb.0:
; FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; FLATSCR-NEXT: liveins: $sgpr38, $sgpr39, $vgpr1
+ ; FLATSCR-NEXT: liveins: $sgpr40, $sgpr41, $vgpr1
; FLATSCR-NEXT: {{ $}}
- ; FLATSCR-NEXT: $sgpr38 = frame-setup COPY $sgpr33
+ ; FLATSCR-NEXT: $sgpr40 = frame-setup COPY $sgpr33
; FLATSCR-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
; FLATSCR-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
- ; FLATSCR-NEXT: $sgpr39 = frame-setup COPY $sgpr34
+ ; FLATSCR-NEXT: $sgpr41 = frame-setup COPY $sgpr34
; FLATSCR-NEXT: $sgpr34 = frame-setup COPY $sgpr32
; FLATSCR-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 172032, implicit-def dead $scc
; FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
; FLATSCR-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
- ; FLATSCR-NEXT: $sgpr40 = S_ADDC_U32 $sgpr33, 8192, implicit-def $scc, implicit $scc
- ; FLATSCR-NEXT: S_BITCMP1_B32 $sgpr40, 0, implicit-def $scc
- ; FLATSCR-NEXT: $sgpr40 = S_BITSET0_B32 0, $sgpr40
- ; FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr40, implicit $exec
- ; FLATSCR-NEXT: $sgpr40 = S_ADDC_U32 $sgpr33, 155648, implicit-def $scc, implicit $scc
- ; FLATSCR-NEXT: S_BITCMP1_B32 $sgpr40, 0, implicit-def $scc
- ; FLATSCR-NEXT: $sgpr40 = S_BITSET0_B32 0, $sgpr40
- ; FLATSCR-NEXT: $vgpr0 = V_OR_B32_e32 killed $sgpr40, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
+ ; FLATSCR-NEXT: $sgpr42 = S_ADDC_U32 $sgpr33, 8192, implicit-def $scc, implicit $scc
+ ; FLATSCR-NEXT: S_BITCMP1_B32 $sgpr42, 0, implicit-def $scc
+ ; FLATSCR-NEXT: $sgpr42 = S_BITSET0_B32 0, $sgpr42
+ ; FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr42, implicit $exec
+ ; FLATSCR-NEXT: $sgpr42 = S_ADDC_U32 $sgpr33, 155648, implicit-def $scc, implicit $scc
+ ; FLATSCR-NEXT: S_BITCMP1_B32 $sgpr42, 0, implicit-def $scc
+ ; FLATSCR-NEXT: $sgpr42 = S_BITSET0_B32 0, $sgpr42
+ ; FLATSCR-NEXT: $vgpr0 = V_OR_B32_e32 killed $sgpr42, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
; FLATSCR-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
; FLATSCR-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
; FLATSCR-NEXT: {{ $}}
; FLATSCR-NEXT: bb.1:
; FLATSCR-NEXT: successors: %bb.2(0x80000000)
- ; FLATSCR-NEXT: liveins: $sgpr38, $sgpr39
+ ; FLATSCR-NEXT: liveins: $sgpr40, $sgpr41
; FLATSCR-NEXT: {{ $}}
; FLATSCR-NEXT: S_NOP 0
; FLATSCR-NEXT: {{ $}}
; FLATSCR-NEXT: bb.2:
- ; FLATSCR-NEXT: liveins: $sgpr38, $sgpr39
+ ; FLATSCR-NEXT: liveins: $sgpr40, $sgpr41
; FLATSCR-NEXT: {{ $}}
; FLATSCR-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
- ; FLATSCR-NEXT: $sgpr34 = frame-destroy COPY $sgpr39
- ; FLATSCR-NEXT: $sgpr33 = frame-destroy COPY $sgpr38
+ ; FLATSCR-NEXT: $sgpr34 = frame-destroy COPY $sgpr41
+ ; FLATSCR-NEXT: $sgpr33 = frame-destroy COPY $sgpr40
; FLATSCR-NEXT: S_ENDPGM 0
bb.0:
liveins: $vgpr1
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-large-tuple-alloc-error.ll b/llvm/test/CodeGen/AMDGPU/vgpr-large-tuple-alloc-error.ll
index d0798b261abf0..2ee62d13fcc51 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-large-tuple-alloc-error.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-large-tuple-alloc-error.ll
@@ -33,16 +33,16 @@ define i32 @test_tuple(<16 x i64> %0) {
; GFX900-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; GFX900-NEXT: v_writelane_b32 v63, s36, 0
; GFX900-NEXT: v_writelane_b32 v63, s37, 1
-; GFX900-NEXT: v_writelane_b32 v63, s46, 2
-; GFX900-NEXT: v_writelane_b32 v63, s47, 3
+; GFX900-NEXT: v_writelane_b32 v63, s38, 2
+; GFX900-NEXT: v_writelane_b32 v63, s39, 3
; GFX900-NEXT: v_writelane_b32 v63, s48, 4
; GFX900-NEXT: v_writelane_b32 v63, s49, 5
; GFX900-NEXT: v_writelane_b32 v63, s50, 6
; GFX900-NEXT: v_writelane_b32 v63, s51, 7
; GFX900-NEXT: v_writelane_b32 v63, s52, 8
; GFX900-NEXT: v_writelane_b32 v63, s53, 9
-; GFX900-NEXT: v_writelane_b32 v63, s62, 10
-; GFX900-NEXT: v_writelane_b32 v63, s63, 11
+; GFX900-NEXT: v_writelane_b32 v63, s54, 10
+; GFX900-NEXT: v_writelane_b32 v63, s55, 11
; GFX900-NEXT: v_writelane_b32 v63, s64, 12
; GFX900-NEXT: v_writelane_b32 v63, s65, 13
; GFX900-NEXT: v_writelane_b32 v63, s66, 14
@@ -148,16 +148,16 @@ define i32 @test_tuple(<16 x i64> %0) {
; GFX900-NEXT: v_readlane_b32 s66, v63, 14
; GFX900-NEXT: v_readlane_b32 s65, v63, 13
; GFX900-NEXT: v_readlane_b32 s64, v63, 12
-; GFX900-NEXT: v_readlane_b32 s63, v63, 11
-; GFX900-NEXT: v_readlane_b32 s62, v63, 10
+; GFX900-NEXT: v_readlane_b32 s55, v63, 11
+; GFX900-NEXT: v_readlane_b32 s54, v63, 10
; GFX900-NEXT: v_readlane_b32 s53, v63, 9
; GFX900-NEXT: v_readlane_b32 s52, v63, 8
; GFX900-NEXT: v_readlane_b32 s51, v63, 7
; GFX900-NEXT: v_readlane_b32 s50, v63, 6
; GFX900-NEXT: v_readlane_b32 s49, v63, 5
; GFX900-NEXT: v_readlane_b32 s48, v63, 4
-; GFX900-NEXT: v_readlane_b32 s47, v63, 3
-; GFX900-NEXT: v_readlane_b32 s46, v63, 2
+; GFX900-NEXT: v_readlane_b32 s39, v63, 3
+; GFX900-NEXT: v_readlane_b32 s38, v63, 2
; GFX900-NEXT: v_readlane_b32 s37, v63, 1
; GFX900-NEXT: v_readlane_b32 s36, v63, 0
; GFX900-NEXT: buffer_load_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
@@ -204,16 +204,16 @@ define i32 @test_tuple(<16 x i64> %0) {
; GFX906-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; GFX906-NEXT: v_writelane_b32 v63, s36, 0
; GFX906-NEXT: v_writelane_b32 v63, s37, 1
-; GFX906-NEXT: v_writelane_b32 v63, s46, 2
-; GFX906-NEXT: v_writelane_b32 v63, s47, 3
+; GFX906-NEXT: v_writelane_b32 v63, s38, 2
+; GFX906-NEXT: v_writelane_b32 v63, s39, 3
; GFX906-NEXT: v_writelane_b32 v63, s48, 4
; GFX906-NEXT: v_writelane_b32 v63, s49, 5
; GFX906-NEXT: v_writelane_b32 v63, s50, 6
; GFX906-NEXT: v_writelane_b32 v63, s51, 7
; GFX906-NEXT: v_writelane_b32 v63, s52, 8
; GFX906-NEXT: v_writelane_b32 v63, s53, 9
-; GFX906-NEXT: v_writelane_b32 v63, s62, 10
-; GFX906-NEXT: v_writelane_b32 v63, s63, 11
+; GFX906-NEXT: v_writelane_b32 v63, s54, 10
+; GFX906-NEXT: v_writelane_b32 v63, s55, 11
; GFX906-NEXT: v_writelane_b32 v63, s64, 12
; GFX906-NEXT: v_writelane_b32 v63, s65, 13
; GFX906-NEXT: v_writelane_b32 v63, s66, 14
@@ -319,16 +319,16 @@ define i32 @test_tuple(<16 x i64> %0) {
; GFX906-NEXT: v_readlane_b32 s66, v63, 14
; GFX906-NEXT: v_readlane_b32 s65, v63, 13
; GFX906-NEXT: v_readlane_b32 s64, v63, 12
-; GFX906-NEXT: v_readlane_b32 s63, v63, 11
-; GFX906-NEXT: v_readlane_b32 s62, v63, 10
+; GFX906-NEXT: v_readlane_b32 s55, v63, 11
+; GFX906-NEXT: v_readlane_b32 s54, v63, 10
; GFX906-NEXT: v_readlane_b32 s53, v63, 9
; GFX906-NEXT: v_readlane_b32 s52, v63, 8
; GFX906-NEXT: v_readlane_b32 s51, v63, 7
; GFX906-NEXT: v_readlane_b32 s50, v63, 6
; GFX906-NEXT: v_readlane_b32 s49, v63, 5
; GFX906-NEXT: v_readlane_b32 s48, v63, 4
-; GFX906-NEXT: v_readlane_b32 s47, v63, 3
-; GFX906-NEXT: v_readlane_b32 s46, v63, 2
+; GFX906-NEXT: v_readlane_b32 s39, v63, 3
+; GFX906-NEXT: v_readlane_b32 s38, v63, 2
; GFX906-NEXT: v_readlane_b32 s37, v63, 1
; GFX906-NEXT: v_readlane_b32 s36, v63, 0
; GFX906-NEXT: buffer_load_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
@@ -374,16 +374,16 @@ define i32 @test_tuple(<16 x i64> %0) {
; GFX908-NEXT: v_accvgpr_write_b32 a13, v61 ; Reload Reuse
; GFX908-NEXT: v_writelane_b32 v62, s36, 0
; GFX908-NEXT: v_writelane_b32 v62, s37, 1
-; GFX908-NEXT: v_writelane_b32 v62, s46, 2
-; GFX908-NEXT: v_writelane_b32 v62, s47, 3
+; GFX908-NEXT: v_writelane_b32 v62, s38, 2
+; GFX908-NEXT: v_writelane_b32 v62, s39, 3
; GFX908-NEXT: v_writelane_b32 v62, s48, 4
; GFX908-NEXT: v_writelane_b32 v62, s49, 5
; GFX908-NEXT: v_writelane_b32 v62, s50, 6
; GFX908-NEXT: v_writelane_b32 v62, s51, 7
; GFX908-NEXT: v_writelane_b32 v62, s52, 8
; GFX908-NEXT: v_writelane_b32 v62, s53, 9
-; GFX908-NEXT: v_writelane_b32 v62, s62, 10
-; GFX908-NEXT: v_writelane_b32 v62, s63, 11
+; GFX908-NEXT: v_writelane_b32 v62, s54, 10
+; GFX908-NEXT: v_writelane_b32 v62, s55, 11
; GFX908-NEXT: v_writelane_b32 v62, s64, 12
; GFX908-NEXT: v_writelane_b32 v62, s65, 13
; GFX908-NEXT: v_writelane_b32 v62, s66, 14
@@ -493,16 +493,16 @@ define i32 @test_tuple(<16 x i64> %0) {
; GFX908-NEXT: v_readlane_b32 s66, v62, 14
; GFX908-NEXT: v_readlane_b32 s65, v62, 13
; GFX908-NEXT: v_readlane_b32 s64, v62, 12
-; GFX908-NEXT: v_readlane_b32 s63, v62, 11
-; GFX908-NEXT: v_readlane_b32 s62, v62, 10
+; GFX908-NEXT: v_readlane_b32 s55, v62, 11
+; GFX908-NEXT: v_readlane_b32 s54, v62, 10
; GFX908-NEXT: v_readlane_b32 s53, v62, 9
; GFX908-NEXT: v_readlane_b32 s52, v62, 8
; GFX908-NEXT: v_readlane_b32 s51, v62, 7
; GFX908-NEXT: v_readlane_b32 s50, v62, 6
; GFX908-NEXT: v_readlane_b32 s49, v62, 5
; GFX908-NEXT: v_readlane_b32 s48, v62, 4
-; GFX908-NEXT: v_readlane_b32 s47, v62, 3
-; GFX908-NEXT: v_readlane_b32 s46, v62, 2
+; GFX908-NEXT: v_readlane_b32 s39, v62, 3
+; GFX908-NEXT: v_readlane_b32 s38, v62, 2
; GFX908-NEXT: v_readlane_b32 s37, v62, 1
; GFX908-NEXT: v_readlane_b32 s36, v62, 0
; GFX908-NEXT: v_accvgpr_read_b32 v61, a13 ; Reload Reuse
@@ -548,16 +548,16 @@ define i32 @test_tuple(<16 x i64> %0) {
; GFX90a-NEXT: v_accvgpr_write_b32 a14, v62 ; Reload Reuse
; GFX90a-NEXT: v_writelane_b32 v63, s36, 0
; GFX90a-NEXT: v_writelane_b32 v63, s37, 1
-; GFX90a-NEXT: v_writelane_b32 v63, s46, 2
-; GFX90a-NEXT: v_writelane_b32 v63, s47, 3
+; GFX90a-NEXT: v_writelane_b32 v63, s38, 2
+; GFX90a-NEXT: v_writelane_b32 v63, s39, 3
; GFX90a-NEXT: v_writelane_b32 v63, s48, 4
; GFX90a-NEXT: v_writelane_b32 v63, s49, 5
; GFX90a-NEXT: v_writelane_b32 v63, s50, 6
; GFX90a-NEXT: v_writelane_b32 v63, s51, 7
; GFX90a-NEXT: v_writelane_b32 v63, s52, 8
; GFX90a-NEXT: v_writelane_b32 v63, s53, 9
-; GFX90a-NEXT: v_writelane_b32 v63, s62, 10
-; GFX90a-NEXT: v_writelane_b32 v63, s63, 11
+; GFX90a-NEXT: v_writelane_b32 v63, s54, 10
+; GFX90a-NEXT: v_writelane_b32 v63, s55, 11
; GFX90a-NEXT: v_writelane_b32 v63, s64, 12
; GFX90a-NEXT: v_writelane_b32 v63, s65, 13
; GFX90a-NEXT: v_writelane_b32 v63, s66, 14
@@ -663,16 +663,16 @@ define i32 @test_tuple(<16 x i64> %0) {
; GFX90a-NEXT: v_readlane_b32 s66, v63, 14
; GFX90a-NEXT: v_readlane_b32 s65, v63, 13
; GFX90a-NEXT: v_readlane_b32 s64, v63, 12
-; GFX90a-NEXT: v_readlane_b32 s63, v63, 11
-; GFX90a-NEXT: v_readlane_b32 s62, v63, 10
+; GFX90a-NEXT: v_readlane_b32 s55, v63, 11
+; GFX90a-NEXT: v_readlane_b32 s54, v63, 10
; GFX90a-NEXT: v_readlane_b32 s53, v63, 9
; GFX90a-NEXT: v_readlane_b32 s52, v63, 8
; GFX90a-NEXT: v_readlane_b32 s51, v63, 7
; GFX90a-NEXT: v_readlane_b32 s50, v63, 6
; GFX90a-NEXT: v_readlane_b32 s49, v63, 5
; GFX90a-NEXT: v_readlane_b32 s48, v63, 4
-; GFX90a-NEXT: v_readlane_b32 s47, v63, 3
-; GFX90a-NEXT: v_readlane_b32 s46, v63, 2
+; GFX90a-NEXT: v_readlane_b32 s39, v63, 3
+; GFX90a-NEXT: v_readlane_b32 s38, v63, 2
; GFX90a-NEXT: v_readlane_b32 s37, v63, 1
; GFX90a-NEXT: v_readlane_b32 s36, v63, 0
; GFX90a-NEXT: v_accvgpr_read_b32 v62, a14 ; Reload Reuse
More information about the llvm-commits
mailing list