[llvm] 0e9368c - [AMDGPU] Move frame pointer from s34 to s33
Scott Linder via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 19 12:40:56 PDT 2020
Author: Scott Linder
Date: 2020-03-19T15:35:16-04:00
New Revision: 0e9368cc8ca3f1b53926fb82906bc29190516c0f
URL: https://github.com/llvm/llvm-project/commit/0e9368cc8ca3f1b53926fb82906bc29190516c0f
DIFF: https://github.com/llvm/llvm-project/commit/0e9368cc8ca3f1b53926fb82906bc29190516c0f.diff
LOG: [AMDGPU] Move frame pointer from s34 to s33
Remove the gap left between the stack pointer (s32) and frame pointer
(s34) now that the scratch wave offset is no longer a part of the
calling convention ABI.
Update llvm/docs/AMDGPUUsage.rst to reflect the change.
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D75657
Added:
Modified:
llvm/docs/AMDGPUUsage.rst
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll
llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll
llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll
llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll
llvm/test/CodeGen/AMDGPU/cc-update.ll
llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll
llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
llvm/test/CodeGen/AMDGPU/frame-lowering-fp-adjusted.mir
llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll
llvm/test/CodeGen/AMDGPU/nested-calls.ll
llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir
llvm/test/CodeGen/AMDGPU/sibling-call.ll
llvm/test/CodeGen/AMDGPU/spill-csr-frame-ptr-reg-copy.ll
llvm/test/CodeGen/AMDGPU/stack-realign.ll
llvm/test/CodeGen/AMDGPU/wave32.ll
llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
Removed:
################################################################################
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index 199d9549a2fd..0a39e2eb3212 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -6067,7 +6067,7 @@ Frame Pointer
+++++++++++++
If the kernel needs a frame pointer for the reasons defined in
-``SIFrameLowering`` then SGPR34 is used and is always set to ``0`` in the
+``SIFrameLowering`` then SGPR33 is used and is always set to ``0`` in the
kernel prolog. If a frame pointer is not required then all uses of the frame
pointer are replaced with immediate ``0`` offsets.
@@ -8897,7 +8897,7 @@ registers and some in memory.
The following is not part of the AMDGPU function calling convention but
describes how the AMDGPU implements function calls:
-1. SGPR34 is used as a frame pointer (FP) if necessary. Like the SP it is an
+1. SGPR33 is used as a frame pointer (FP) if necessary. Like the SP it is an
unswizzled scratch address. It is only needed if runtime sized ``alloca``
are used, or for the reasons defined in ``SIFrameLowering``.
2. Runtime stack alignment is not currently supported.
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 0e95475a1974..a48415c238a7 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1950,7 +1950,7 @@ static void reservePrivateMemoryRegs(const TargetMachine &TM,
// finalized, because it does not rely on the known stack size, only
// properties like whether variable sized objects are present.
if (ST.getFrameLowering()->hasFP(MF)) {
- Info.setFrameOffsetReg(AMDGPU::SGPR34);
+ Info.setFrameOffsetReg(AMDGPU::SGPR33);
}
}
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index d5ec8164956b..f17608ad972a 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -81,7 +81,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
// TODO: Pick a high register, and shift down, similar to a kernel.
- FrameOffsetReg = AMDGPU::SGPR34;
+ FrameOffsetReg = AMDGPU::SGPR33;
StackPtrOffsetReg = AMDGPU::SGPR32;
ArgInfo.PrivateSegmentBuffer =
diff --git a/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll b/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll
index de0086495870..c184ce778fa8 100644
--- a/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll
@@ -13,14 +13,14 @@ define void @use_vcc() #1 {
}
; GCN-LABEL: {{^}}indirect_use_vcc:
-; GCN: v_writelane_b32 v32, s34, 2
+; GCN: v_writelane_b32 v32, s33, 2
; GCN: v_writelane_b32 v32, s30, 0
; GCN: v_writelane_b32 v32, s31, 1
; GCN: s_swappc_b64
; GCN: v_readlane_b32 s4, v32, 0
; GCN: v_readlane_b32 s5, v32, 1
-; GCN: v_readlane_b32 s34, v32, 2
-; GCN: ; NumSgprs: 37
+; GCN: v_readlane_b32 s33, v32, 2
+; GCN: ; NumSgprs: 36
; GCN: ; NumVgprs: 33
define void @indirect_use_vcc() #1 {
call void @use_vcc()
@@ -29,8 +29,8 @@ define void @indirect_use_vcc() #1 {
; GCN-LABEL: {{^}}indirect_2level_use_vcc_kernel:
; GCN: is_dynamic_callstack = 0
-; CI: ; NumSgprs: 39
-; VI-NOBUG: ; NumSgprs: 41
+; CI: ; NumSgprs: 38
+; VI-NOBUG: ; NumSgprs: 40
; VI-BUG: ; NumSgprs: 96
; GCN: ; NumVgprs: 33
define amdgpu_kernel void @indirect_2level_use_vcc_kernel(i32 addrspace(1)* %out) #0 {
@@ -48,8 +48,8 @@ define void @use_flat_scratch() #1 {
}
; GCN-LABEL: {{^}}indirect_use_flat_scratch:
-; CI: ; NumSgprs: 39
-; VI: ; NumSgprs: 41
+; CI: ; NumSgprs: 38
+; VI: ; NumSgprs: 40
; GCN: ; NumVgprs: 33
define void @indirect_use_flat_scratch() #1 {
call void @use_flat_scratch()
@@ -58,8 +58,8 @@ define void @indirect_use_flat_scratch() #1 {
; GCN-LABEL: {{^}}indirect_2level_use_flat_scratch_kernel:
; GCN: is_dynamic_callstack = 0
-; CI: ; NumSgprs: 39
-; VI-NOBUG: ; NumSgprs: 41
+; CI: ; NumSgprs: 38
+; VI-NOBUG: ; NumSgprs: 40
; VI-BUG: ; NumSgprs: 96
; GCN: ; NumVgprs: 33
define amdgpu_kernel void @indirect_2level_use_flat_scratch_kernel(i32 addrspace(1)* %out) #0 {
diff --git a/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll b/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll
index 0f754385d294..ebfc93e23850 100644
--- a/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll
@@ -23,9 +23,9 @@ define amdgpu_kernel void @test_kernel_call_external_void_func_void_clobber_s30_
; GCN-LABEL: {{^}}test_func_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void:
; GCN: buffer_store_dword
-; GCN: v_writelane_b32 v32, s34, 4
-; GCN: v_writelane_b32 v32, s36, 0
-; GCN: v_writelane_b32 v32, s37, 1
+; GCN: v_writelane_b32 v32, s33, 4
+; GCN: v_writelane_b32 v32, s34, 0
+; GCN: v_writelane_b32 v32, s35, 1
; GCN: v_writelane_b32 v32, s30, 2
; GCN: v_writelane_b32 v32, s31, 3
@@ -35,10 +35,10 @@ define amdgpu_kernel void @test_kernel_call_external_void_func_void_clobber_s30_
; GCN-NEXT: s_swappc_b64
; GCN-DAG: v_readlane_b32 s4, v32, 2
; GCN-DAG: v_readlane_b32 s5, v32, 3
-; GCN: v_readlane_b32 s37, v32, 1
-; GCN: v_readlane_b32 s36, v32, 0
+; GCN: v_readlane_b32 s35, v32, 1
+; GCN: v_readlane_b32 s34, v32, 0
-; GCN: v_readlane_b32 s34, v32, 4
+; GCN: v_readlane_b32 s33, v32, 4
; GCN: buffer_load_dword
; GCN: s_setpc_b64
define void @test_func_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void() #0 {
@@ -50,14 +50,14 @@ define void @test_func_call_external_void_func_void_clobber_s30_s31_call_externa
; GCN-LABEL: {{^}}test_func_call_external_void_funcx2:
; GCN: buffer_store_dword v32
-; GCN: v_writelane_b32 v32, s34, 4
+; GCN: v_writelane_b32 v32, s33, 4
-; GCN: s_mov_b32 s34, s32
+; GCN: s_mov_b32 s33, s32
; GCN: s_add_u32 s32, s32, 0x400
; GCN: s_swappc_b64
; GCN-NEXT: s_swappc_b64
-; GCN: v_readlane_b32 s34, v32, 4
+; GCN: v_readlane_b32 s33, v32, 4
; GCN: buffer_load_dword v32,
define void @test_func_call_external_void_funcx2() #0 {
call void @external_void_func_void()
@@ -125,6 +125,8 @@ define amdgpu_kernel void @test_call_void_func_void_mayclobber_v31(i32 addrspace
ret void
}
+; FIXME: What is the expected behavior for reserved registers here?
+
; GCN-LABEL: {{^}}test_call_void_func_void_preserves_s33:
; GCN: s_getpc_b64 s[4:5]
; GCN-NEXT: s_add_u32 s4, s4, external_void_func_void at rel32@lo+4
@@ -146,8 +148,6 @@ define amdgpu_kernel void @test_call_void_func_void_preserves_s33(i32 addrspace(
ret void
}
-; FIXME: What is the expected behavior for reserved registers here?
-
; GCN-LABEL: {{^}}test_call_void_func_void_preserves_s34: {{.*}}
; GCN-NOT: s34
diff --git a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
index c42cadbc80c5..e989ea07926d 100644
--- a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
+++ b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
@@ -12,9 +12,9 @@ define void @callee_no_stack() #0 {
; GCN-LABEL: {{^}}callee_no_stack_no_fp_elim_all:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt
-; GCN-NEXT: s_mov_b32 s4, s34
-; GCN-NEXT: s_mov_b32 s34, s32
-; GCN-NEXT: s_mov_b32 s34, s4
+; GCN-NEXT: s_mov_b32 s4, s33
+; GCN-NEXT: s_mov_b32 s33, s32
+; GCN-NEXT: s_mov_b32 s33, s4
; GCN-NEXT: s_setpc_b64
define void @callee_no_stack_no_fp_elim_all() #1 {
ret void
@@ -46,13 +46,13 @@ define void @callee_with_stack() #0 {
; GCN-LABEL: {{^}}callee_with_stack_no_fp_elim_all:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt
-; GCN-NEXT: s_mov_b32 s4, s34
-; GCN-NEXT: s_mov_b32 s34, s32
+; GCN-NEXT: s_mov_b32 s4, s33
+; GCN-NEXT: s_mov_b32 s33, s32
; GCN-NEXT: s_add_u32 s32, s32, 0x200
; GCN-NEXT: v_mov_b32_e32 v0, 0{{$}}
-; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s34 offset:4{{$}}
+; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4{{$}}
; GCN-NEXT: s_sub_u32 s32, s32, 0x200
-; GCN-NEXT: s_mov_b32 s34, s4
+; GCN-NEXT: s_mov_b32 s33, s4
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64
define void @callee_with_stack_no_fp_elim_all() #1 {
@@ -80,14 +80,14 @@ define void @callee_with_stack_no_fp_elim_non_leaf() #2 {
; GCN: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
; GCN-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
-; GCN: v_writelane_b32 [[CSR_VGPR]], s34, 2
-; GCN-DAG: s_mov_b32 s34, s32
+; GCN: v_writelane_b32 [[CSR_VGPR]], s33, 2
+; GCN-DAG: s_mov_b32 s33, s32
; GCN-DAG: s_add_u32 s32, s32, 0x400{{$}}
; GCN-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s30,
; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s31,
-; GCN-DAG: buffer_store_dword [[ZERO]], off, s[0:3], s34{{$}}
+; GCN-DAG: buffer_store_dword [[ZERO]], off, s[0:3], s33{{$}}
; GCN: s_swappc_b64
@@ -95,7 +95,7 @@ define void @callee_with_stack_no_fp_elim_non_leaf() #2 {
; GCN-DAG: v_readlane_b32 s4, [[CSR_VGPR]]
; GCN: s_sub_u32 s32, s32, 0x400{{$}}
-; GCN-NEXT: v_readlane_b32 s34, [[CSR_VGPR]], 2
+; GCN-NEXT: v_readlane_b32 s33, [[CSR_VGPR]], 2
; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
; GCN-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
@@ -121,7 +121,7 @@ define void @callee_with_stack_and_call() #0 {
; GCN-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s32 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
; GCN-DAG: s_add_u32 s32, s32, 0x400
-; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s34, [[FP_SPILL_LANE:[0-9]+]]
+; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s33, [[FP_SPILL_LANE:[0-9]+]]
; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s30, 0
; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s31, 1
@@ -131,7 +131,7 @@ define void @callee_with_stack_and_call() #0 {
; GCN-DAG: v_readlane_b32 s5, v32, 1
; GCN: s_sub_u32 s32, s32, 0x400
-; GCN-NEXT: v_readlane_b32 s34, [[CSR_VGPR]], [[FP_SPILL_LANE]]
+; GCN-NEXT: v_readlane_b32 s33, [[CSR_VGPR]], [[FP_SPILL_LANE]]
; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
; GCN-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
@@ -204,20 +204,20 @@ define void @spill_only_csr_sgpr() {
; TODO: Can the SP inc/deec be remvoed?
; GCN-LABEL: {{^}}callee_with_stack_no_fp_elim_csr_vgpr:
; GCN: s_waitcnt
-; GCN-NEXT:s_mov_b32 [[FP_COPY:s[0-9]+]], s34
-; GCN-NEXT: s_mov_b32 s34, s32
+; GCN-NEXT:s_mov_b32 [[FP_COPY:s[0-9]+]], s33
+; GCN-NEXT: s_mov_b32 s33, s32
; GCN: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
-; GCN-DAG: buffer_store_dword v33, off, s[0:3], s34 ; 4-byte Folded Spill
-; GCN-DAG: buffer_store_dword [[ZERO]], off, s[0:3], s34 offset:8
+; GCN-DAG: buffer_store_dword v33, off, s[0:3], s33 ; 4-byte Folded Spill
+; GCN-DAG: buffer_store_dword [[ZERO]], off, s[0:3], s33 offset:8
; GCN: ;;#ASMSTART
; GCN-NEXT: ; clobber v33
; GCN-NEXT: ;;#ASMEND
-; GCN: buffer_load_dword v33, off, s[0:3], s34 ; 4-byte Folded Reload
+; GCN: buffer_load_dword v33, off, s[0:3], s33 ; 4-byte Folded Reload
; GCN: s_add_u32 s32, s32, 0x300
; GCN-NEXT: s_sub_u32 s32, s32, 0x300
-; GCN-NEXT: s_mov_b32 s34, s4
+; GCN-NEXT: s_mov_b32 s33, s4
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64
define void @callee_with_stack_no_fp_elim_csr_vgpr() #1 {
@@ -230,17 +230,17 @@ define void @callee_with_stack_no_fp_elim_csr_vgpr() #1 {
; Use a copy to a free SGPR instead of introducing a second CSR VGPR.
; GCN-LABEL: {{^}}last_lane_vgpr_for_fp_csr:
; GCN: s_waitcnt
-; GCN-NEXT: v_writelane_b32 v1, s34, 63
-; GCN-NEXT: s_mov_b32 s34, s32
-; GCN: buffer_store_dword v33, off, s[0:3], s34 ; 4-byte Folded Spill
+; GCN-NEXT: v_writelane_b32 v1, s33, 63
+; GCN-NEXT: s_mov_b32 s33, s32
+; GCN: buffer_store_dword v33, off, s[0:3], s33 ; 4-byte Folded Spill
; GCN-COUNT-63: v_writelane_b32 v1
-; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s34 offset:8
+; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s33 offset:8
; GCN: ;;#ASMSTART
; GCN-COUNT-63: v_readlane_b32 s{{[0-9]+}}, v1
; GCN: s_add_u32 s32, s32, 0x300
; GCN-NEXT: s_sub_u32 s32, s32, 0x300
-; GCN-NEXT: v_readlane_b32 s34, v1, 63
+; GCN-NEXT: v_readlane_b32 s33, v1, 63
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64
define void @last_lane_vgpr_for_fp_csr() #1 {
@@ -262,19 +262,19 @@ define void @last_lane_vgpr_for_fp_csr() #1 {
; Use a copy to a free SGPR instead of introducing a second CSR VGPR.
; GCN-LABEL: {{^}}no_new_vgpr_for_fp_csr:
; GCN: s_waitcnt
-; GCN-NEXT: s_mov_b32 [[FP_COPY:s[0-9]+]], s34
-; GCN-NEXT: s_mov_b32 s34, s32
-; GCN-NEXT: buffer_store_dword v33, off, s[0:3], s34 ; 4-byte Folded Spill
+; GCN-NEXT: s_mov_b32 [[FP_COPY:s[0-9]+]], s33
+; GCN-NEXT: s_mov_b32 s33, s32
+; GCN-NEXT: buffer_store_dword v33, off, s[0:3], s33 ; 4-byte Folded Spill
; GCN-COUNT-64: v_writelane_b32 v1,
; GCN: buffer_store_dword
; GCN: ;;#ASMSTART
; GCN-COUNT-64: v_readlane_b32 s{{[0-9]+}}, v1
-; GCN: buffer_load_dword v33, off, s[0:3], s34 ; 4-byte Folded Reload
+; GCN: buffer_load_dword v33, off, s[0:3], s33 ; 4-byte Folded Reload
; GCN: s_add_u32 s32, s32, 0x300
; GCN-NEXT: s_sub_u32 s32, s32, 0x300
-; GCN-NEXT: s_mov_b32 s34, [[FP_COPY]]
+; GCN-NEXT: s_mov_b32 s33, [[FP_COPY]]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64
define void @no_new_vgpr_for_fp_csr() #1 {
@@ -296,13 +296,13 @@ define void @no_new_vgpr_for_fp_csr() #1 {
; GCN-LABEL: {{^}}realign_stack_no_fp_elim:
; GCN: s_waitcnt
; GCN-NEXT: s_add_u32 [[SCRATCH:s[0-9]+]], s32, 0x7ffc0
-; GCN-NEXT: s_mov_b32 s4, s34
-; GCN-NEXT: s_and_b32 s34, [[SCRATCH]], 0xfff80000
+; GCN-NEXT: s_mov_b32 s4, s33
+; GCN-NEXT: s_and_b32 s33, [[SCRATCH]], 0xfff80000
; GCN-NEXT: s_add_u32 s32, s32, 0x100000
; GCN-NEXT: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
-; GCN-NEXT: buffer_store_dword [[ZERO]], off, s[0:3], s34
+; GCN-NEXT: buffer_store_dword [[ZERO]], off, s[0:3], s33
; GCN-NEXT: s_sub_u32 s32, s32, 0x100000
-; GCN-NEXT: s_mov_b32 s34, s4
+; GCN-NEXT: s_mov_b32 s33, s4
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64
define void @realign_stack_no_fp_elim() #1 {
@@ -313,18 +313,18 @@ define void @realign_stack_no_fp_elim() #1 {
; GCN-LABEL: {{^}}no_unused_non_csr_sgpr_for_fp:
; GCN: s_waitcnt
-; GCN-NEXT: v_writelane_b32 v1, s34, 2
+; GCN-NEXT: v_writelane_b32 v1, s33, 2
; GCN-NEXT: v_writelane_b32 v1, s30, 0
-; GCN-NEXT: s_mov_b32 s34, s32
+; GCN-NEXT: s_mov_b32 s33, s32
; GCN: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
; GCN: v_writelane_b32 v1, s31, 1
-; GCN: buffer_store_dword [[ZERO]], off, s[0:3], s34 offset:4
+; GCN: buffer_store_dword [[ZERO]], off, s[0:3], s33 offset:4
; GCN: ;;#ASMSTART
; GCN: v_readlane_b32 s4, v1, 0
; GCN-NEXT: s_add_u32 s32, s32, 0x200
; GCN-NEXT: v_readlane_b32 s5, v1, 1
; GCN-NEXT: s_sub_u32 s32, s32, 0x200
-; GCN-NEXT: v_readlane_b32 s34, v1, 2
+; GCN-NEXT: v_readlane_b32 s33, v1, 2
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[4:5]
define void @no_unused_non_csr_sgpr_for_fp() #1 {
@@ -347,9 +347,9 @@ define void @no_unused_non_csr_sgpr_for_fp() #1 {
; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
; GCN-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
-; GCN-NEXT: v_writelane_b32 v32, s34, 2
+; GCN-NEXT: v_writelane_b32 v32, s33, 2
; GCN-NEXT: v_writelane_b32 v32, s30, 0
-; GCN-NEXT: s_mov_b32 s34, s32
+; GCN-NEXT: s_mov_b32 s33, s32
; GCN-DAG: v_writelane_b32 v32, s31, 1
; GCN-DAG: buffer_store_dword
@@ -360,7 +360,7 @@ define void @no_unused_non_csr_sgpr_for_fp() #1 {
; GCN: v_readlane_b32 s4, v32, 0
; GCN-NEXT: v_readlane_b32 s5, v32, 1
; GCN-NEXT: s_sub_u32 s32, s32, 0x300{{$}}
-; GCN-NEXT: v_readlane_b32 s34, v32, 2
+; GCN-NEXT: v_readlane_b32 s33, v32, 2
; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
; GCN-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
@@ -394,9 +394,9 @@ define void @no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr() #1 {
; GCN-NEXT: v_mov_b32_e32 [[SCRATCH_VGPR:v[0-9]+]], 0x1008
; GCN-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], [[SCRATCH_VGPR]], s[0:3], s32 offen ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
-; GCN-NEXT: v_writelane_b32 v32, s34, 2
+; GCN-NEXT: v_writelane_b32 v32, s33, 2
; GCN-NEXT: v_writelane_b32 v32, s30, 0
-; GCN-NEXT: s_mov_b32 s34, s32
+; GCN-NEXT: s_mov_b32 s33, s32
; GCN-DAG: v_writelane_b32 v32, s31, 1
; GCN-DAG: s_add_u32 s32, s32, 0x40300{{$}}
; GCN-DAG: buffer_store_dword
@@ -406,7 +406,7 @@ define void @no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr() #1 {
; GCN: v_readlane_b32 s4, v32, 0
; GCN-NEXT: v_readlane_b32 s5, v32, 1
; GCN-NEXT: s_sub_u32 s32, s32, 0x40300{{$}}
-; GCN-NEXT: v_readlane_b32 s34, v32, 2
+; GCN-NEXT: v_readlane_b32 s33, v32, 2
; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
; GCN-NEXT: v_mov_b32_e32 [[SCRATCH_VGPR:v[0-9]+]], 0x1008
; GCN-NEXT: buffer_load_dword [[CSR_VGPR]], [[SCRATCH_VGPR]], s[0:3], s32 offen ; 4-byte Folded Reload
@@ -444,13 +444,13 @@ define internal void @local_empty_func() #0 {
; An FP is needed, despite not needing any spills
; TODO: Ccould see callee does not use stack and omit FP.
; GCN-LABEL: {{^}}ipra_call_with_stack:
-; GCN: s_mov_b32 [[FP_COPY:s[0-9]+]], s34
-; GCN: s_mov_b32 s34, s32
+; GCN: s_mov_b32 [[FP_COPY:s[0-9]+]], s33
+; GCN: s_mov_b32 s33, s32
; GCN: s_add_u32 s32, s32, 0x400
-; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s34{{$}}
+; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s33{{$}}
; GCN: s_swappc_b64
; GCN: s_sub_u32 s32, s32, 0x400
-; GCN: s_mov_b32 s34, [[FP_COPY:s[0-9]+]]
+; GCN: s_mov_b32 s33, [[FP_COPY:s[0-9]+]]
define void @ipra_call_with_stack() #0 {
%alloca = alloca i32, addrspace(5)
store volatile i32 0, i32 addrspace(5)* %alloca
diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll
index 16916e4aa42e..d69f70ffad57 100644
--- a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll
+++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll
@@ -530,7 +530,7 @@ define hidden void @func_use_every_sgpr_input_call_use_workgroup_id_xyz() #1 {
}
; GCN-LABEL: {{^}}func_use_every_sgpr_input_call_use_workgroup_id_xyz_spill:
-; GCN-DAG: s_mov_b32 s34, s32
+; GCN-DAG: s_mov_b32 s33, s32
; GCN-DAG: s_add_u32 s32, s32, 0x400
; GCN-DAG: s_mov_b64 s{{\[}}[[LO_X:[0-9]+]]{{\:}}[[HI_X:[0-9]+]]{{\]}}, s[4:5]
; GCN-DAG: s_mov_b64 s{{\[}}[[LO_Y:[0-9]+]]{{\:}}[[HI_Y:[0-9]+]]{{\]}}, s[6:7]
@@ -550,7 +550,7 @@ define hidden void @func_use_every_sgpr_input_call_use_workgroup_id_xyz() #1 {
; GCN: s_swappc_b64
-; GCN-DAG: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s34{{$}}
+; GCN-DAG: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s33{{$}}
; GCN-DAG: v_mov_b32_e32 v[[LO1:[0-9]+]], s[[LO_X]]
; GCN-DAG: v_mov_b32_e32 v[[HI1:[0-9]+]], s[[HI_X]]
; GCN-DAG: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO1]]:[[HI1]]{{\]}}
diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll
index 0dee34dcfdc8..265024e6bb8f 100644
--- a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll
+++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll
@@ -486,7 +486,7 @@ define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x() #1 {
}
; GCN-LABEL: {{^}}func_call_too_many_args_use_workitem_id_x:
-; VARABI: s_mov_b32 s34, s32
+; VARABI: s_mov_b32 s33, s32
; VARABI: buffer_store_dword v1, off, s[0:3], s32{{$}}
; Touching the workitem id register is not necessary.
@@ -514,14 +514,14 @@ define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 {
; Requires loading and storing to stack slot.
; GCN-LABEL: {{^}}too_many_args_call_too_many_args_use_workitem_id_x:
; GCN-DAG: s_add_u32 s32, s32, 0x400{{$}}
-; GCN-DAG: buffer_store_dword v32, off, s[0:3], s34 offset:4 ; 4-byte Folded Spill
-; GCN-DAG: buffer_load_dword v32, off, s[0:3], s34{{$}}
+; GCN-DAG: buffer_store_dword v32, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; GCN-DAG: buffer_load_dword v32, off, s[0:3], s33{{$}}
; GCN: buffer_store_dword v32, off, s[0:3], s32{{$}}
; GCN: s_swappc_b64
-; GCN: buffer_load_dword v32, off, s[0:3], s34 offset:4 ; 4-byte Folded Reload
+; GCN: buffer_load_dword v32, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; GCN: s_sub_u32 s32, s32, 0x400{{$}}
; GCN: s_setpc_b64
define void @too_many_args_call_too_many_args_use_workitem_id_x(
@@ -664,8 +664,8 @@ define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_byval() #1
; GCN-LABEL: {{^}}func_call_too_many_args_use_workitem_id_x_byval:
; VARABI: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
-; VARABI: buffer_store_dword [[K]], off, s[0:3], s34{{$}}
-; VARABI: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], s34{{$}}
+; VARABI: buffer_store_dword [[K]], off, s[0:3], s33{{$}}
+; VARABI: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], s33{{$}}
; VARABI: buffer_store_dword v0, off, s[0:3], s32 offset:4
; VARABI: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32{{$}}
; VARABI: v_mov_b32_e32 [[RELOAD_BYVAL]],
@@ -674,11 +674,11 @@ define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_byval() #1
; FIXED-ABI-NOT: v31
; FIXEDABI: v_mov_b32_e32 [[K0:v[0-9]+]], 0x3e7{{$}}
-; FIXEDABI: buffer_store_dword [[K0]], off, s[0:3], s34{{$}}
+; FIXEDABI: buffer_store_dword [[K0]], off, s[0:3], s33{{$}}
; FIXEDABI: v_mov_b32_e32 [[K1:v[0-9]+]], 0x140{{$}}
; FIXEDABI: buffer_store_dword [[K1]], off, s[0:3], s32{{$}}
-; FIXEDABI: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], s34{{$}}
+; FIXEDABI: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], s33{{$}}
; FIXED-ABI-NOT: v31
; FIXEDABI: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32 offset:4{{$}}
diff --git a/llvm/test/CodeGen/AMDGPU/cc-update.ll b/llvm/test/CodeGen/AMDGPU/cc-update.ll
index 2cd8d67b355f..edd65cbf79c9 100644
--- a/llvm/test/CodeGen/AMDGPU/cc-update.ll
+++ b/llvm/test/CodeGen/AMDGPU/cc-update.ll
@@ -166,17 +166,17 @@ entry:
define amdgpu_kernel void @test_force_fp_kern_empty() local_unnamed_addr #2 {
; GFX803-LABEL: test_force_fp_kern_empty:
; GFX803: ; %bb.0: ; %entry
-; GFX803-NEXT: s_mov_b32 s34, 0
+; GFX803-NEXT: s_mov_b32 s33, 0
; GFX803-NEXT: s_endpgm
;
; GFX900-LABEL: test_force_fp_kern_empty:
; GFX900: ; %bb.0: ; %entry
-; GFX900-NEXT: s_mov_b32 s34, 0
+; GFX900-NEXT: s_mov_b32 s33, 0
; GFX900-NEXT: s_endpgm
;
; GFX1010-LABEL: test_force_fp_kern_empty:
; GFX1010: ; %bb.0: ; %entry
-; GFX1010-NEXT: s_mov_b32 s34, 0
+; GFX1010-NEXT: s_mov_b32 s33, 0
; GFX1010-NEXT: s_endpgm
entry:
ret void
@@ -188,11 +188,11 @@ define amdgpu_kernel void @test_force_fp_kern_stack() local_unnamed_addr #2 {
; GFX803-NEXT: s_add_u32 s4, s4, s7
; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8
; GFX803-NEXT: s_add_u32 s0, s0, s7
-; GFX803-NEXT: s_mov_b32 s34, 0
+; GFX803-NEXT: s_mov_b32 s33, 0
; GFX803-NEXT: s_addc_u32 s1, s1, 0
; GFX803-NEXT: v_mov_b32_e32 v0, 0
; GFX803-NEXT: s_mov_b32 flat_scratch_lo, s5
-; GFX803-NEXT: buffer_store_dword v0, off, s[0:3], s34 offset:4
+; GFX803-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4
; GFX803-NEXT: s_endpgm
;
; GFX900-LABEL: test_force_fp_kern_stack:
@@ -200,16 +200,16 @@ define amdgpu_kernel void @test_force_fp_kern_stack() local_unnamed_addr #2 {
; GFX900-NEXT: s_add_u32 flat_scratch_lo, s4, s7
; GFX900-NEXT: s_addc_u32 flat_scratch_hi, s5, 0
; GFX900-NEXT: s_add_u32 s0, s0, s7
-; GFX900-NEXT: s_mov_b32 s34, 0
+; GFX900-NEXT: s_mov_b32 s33, 0
; GFX900-NEXT: s_addc_u32 s1, s1, 0
; GFX900-NEXT: v_mov_b32_e32 v0, 0
-; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s34 offset:4
+; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4
; GFX900-NEXT: s_endpgm
;
; GFX1010-LABEL: test_force_fp_kern_stack:
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_add_u32 s4, s4, s7
-; GFX1010-NEXT: s_mov_b32 s34, 0
+; GFX1010-NEXT: s_mov_b32 s33, 0
; GFX1010-NEXT: s_addc_u32 s5, s5, 0
; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s4
; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s5
@@ -217,7 +217,7 @@ define amdgpu_kernel void @test_force_fp_kern_stack() local_unnamed_addr #2 {
; GFX1010-NEXT: s_addc_u32 s1, s1, 0
; GFX1010-NEXT: v_mov_b32_e32 v0, 0
; GFX1010-NEXT: ; implicit-def: $vcc_hi
-; GFX1010-NEXT: buffer_store_dword v0, off, s[0:3], s34 offset:4
+; GFX1010-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4
; GFX1010-NEXT: s_endpgm
entry:
%x = alloca i32, align 4, addrspace(5)
@@ -237,7 +237,7 @@ define amdgpu_kernel void @test_force_fp_kern_call() local_unnamed_addr #2 {
; GFX803-NEXT: s_add_u32 s4, s4, ex at rel32@lo+4
; GFX803-NEXT: s_addc_u32 s5, s5, ex at rel32@hi+4
; GFX803-NEXT: s_mov_b32 s32, 0
-; GFX803-NEXT: s_mov_b32 s34, 0
+; GFX803-NEXT: s_mov_b32 s33, 0
; GFX803-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX803-NEXT: s_endpgm
;
@@ -251,7 +251,7 @@ define amdgpu_kernel void @test_force_fp_kern_call() local_unnamed_addr #2 {
; GFX900-NEXT: s_add_u32 s4, s4, ex at rel32@lo+4
; GFX900-NEXT: s_addc_u32 s5, s5, ex at rel32@hi+4
; GFX900-NEXT: s_mov_b32 s32, 0
-; GFX900-NEXT: s_mov_b32 s34, 0
+; GFX900-NEXT: s_mov_b32 s33, 0
; GFX900-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX900-NEXT: s_endpgm
;
@@ -259,7 +259,7 @@ define amdgpu_kernel void @test_force_fp_kern_call() local_unnamed_addr #2 {
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_add_u32 s4, s4, s7
; GFX1010-NEXT: s_mov_b32 s32, 0
-; GFX1010-NEXT: s_mov_b32 s34, 0
+; GFX1010-NEXT: s_mov_b32 s33, 0
; GFX1010-NEXT: s_addc_u32 s5, s5, 0
; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s4
; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s5
@@ -282,7 +282,7 @@ define amdgpu_kernel void @test_force_fp_kern_stack_and_call() local_unnamed_add
; GFX803-NEXT: s_add_u32 s4, s4, s7
; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8
; GFX803-NEXT: s_add_u32 s0, s0, s7
-; GFX803-NEXT: s_mov_b32 s34, 0
+; GFX803-NEXT: s_mov_b32 s33, 0
; GFX803-NEXT: s_addc_u32 s1, s1, 0
; GFX803-NEXT: v_mov_b32_e32 v0, 0
; GFX803-NEXT: s_mov_b32 flat_scratch_lo, s5
@@ -290,7 +290,7 @@ define amdgpu_kernel void @test_force_fp_kern_stack_and_call() local_unnamed_add
; GFX803-NEXT: s_add_u32 s4, s4, ex at rel32@lo+4
; GFX803-NEXT: s_addc_u32 s5, s5, ex at rel32@hi+4
; GFX803-NEXT: s_movk_i32 s32, 0x400
-; GFX803-NEXT: buffer_store_dword v0, off, s[0:3], s34 offset:4
+; GFX803-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4
; GFX803-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX803-NEXT: s_endpgm
;
@@ -300,13 +300,13 @@ define amdgpu_kernel void @test_force_fp_kern_stack_and_call() local_unnamed_add
; GFX900-NEXT: s_addc_u32 flat_scratch_hi, s5, 0
; GFX900-NEXT: s_add_u32 s0, s0, s7
; GFX900-NEXT: s_addc_u32 s1, s1, 0
-; GFX900-NEXT: s_mov_b32 s34, 0
+; GFX900-NEXT: s_mov_b32 s33, 0
; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: s_getpc_b64 s[4:5]
; GFX900-NEXT: s_add_u32 s4, s4, ex at rel32@lo+4
; GFX900-NEXT: s_addc_u32 s5, s5, ex at rel32@hi+4
; GFX900-NEXT: s_movk_i32 s32, 0x400
-; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s34 offset:4
+; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4
; GFX900-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX900-NEXT: s_endpgm
;
@@ -314,7 +314,7 @@ define amdgpu_kernel void @test_force_fp_kern_stack_and_call() local_unnamed_add
; GFX1010: ; %bb.0: ; %entry
; GFX1010-NEXT: s_add_u32 s4, s4, s7
; GFX1010-NEXT: s_movk_i32 s32, 0x200
-; GFX1010-NEXT: s_mov_b32 s34, 0
+; GFX1010-NEXT: s_mov_b32 s33, 0
; GFX1010-NEXT: s_addc_u32 s5, s5, 0
; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s4
; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s5
@@ -325,7 +325,7 @@ define amdgpu_kernel void @test_force_fp_kern_stack_and_call() local_unnamed_add
; GFX1010-NEXT: s_add_u32 s4, s4, ex at rel32@lo+4
; GFX1010-NEXT: s_addc_u32 s5, s5, ex at rel32@hi+4
; GFX1010-NEXT: ; implicit-def: $vcc_hi
-; GFX1010-NEXT: buffer_store_dword v0, off, s[0:3], s34 offset:4
+; GFX1010-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4
; GFX1010-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX1010-NEXT: s_endpgm
entry:
diff --git a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll
index 0fc880251b2c..007ca13d53c9 100644
--- a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll
+++ b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll
@@ -30,9 +30,9 @@ define float @call_split_type_used_outside_block_v2f32() #0 {
; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, s[4:5]
-; GCN-NEXT: v_writelane_b32 v32, s34, 2
+; GCN-NEXT: v_writelane_b32 v32, s33, 2
; GCN-NEXT: v_writelane_b32 v32, s30, 0
-; GCN-NEXT: s_mov_b32 s34, s32
+; GCN-NEXT: s_mov_b32 s33, s32
; GCN-NEXT: s_add_u32 s32, s32, 0x400
; GCN-NEXT: s_getpc_b64 s[4:5]
; GCN-NEXT: s_add_u32 s4, s4, func_v2f32 at rel32@lo+4
@@ -42,7 +42,7 @@ define float @call_split_type_used_outside_block_v2f32() #0 {
; GCN-NEXT: v_readlane_b32 s4, v32, 0
; GCN-NEXT: v_readlane_b32 s5, v32, 1
; GCN-NEXT: s_sub_u32 s32, s32, 0x400
-; GCN-NEXT: v_readlane_b32 s34, v32, 2
+; GCN-NEXT: v_readlane_b32 s33, v32, 2
; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[6:7]
@@ -64,9 +64,9 @@ define float @call_split_type_used_outside_block_v3f32() #0 {
; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, s[4:5]
-; GCN-NEXT: v_writelane_b32 v32, s34, 2
+; GCN-NEXT: v_writelane_b32 v32, s33, 2
; GCN-NEXT: v_writelane_b32 v32, s30, 0
-; GCN-NEXT: s_mov_b32 s34, s32
+; GCN-NEXT: s_mov_b32 s33, s32
; GCN-NEXT: s_add_u32 s32, s32, 0x400
; GCN-NEXT: s_getpc_b64 s[4:5]
; GCN-NEXT: s_add_u32 s4, s4, func_v3f32 at rel32@lo+4
@@ -76,7 +76,7 @@ define float @call_split_type_used_outside_block_v3f32() #0 {
; GCN-NEXT: v_readlane_b32 s4, v32, 0
; GCN-NEXT: v_readlane_b32 s5, v32, 1
; GCN-NEXT: s_sub_u32 s32, s32, 0x400
-; GCN-NEXT: v_readlane_b32 s34, v32, 2
+; GCN-NEXT: v_readlane_b32 s33, v32, 2
; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[6:7]
@@ -98,9 +98,9 @@ define half @call_split_type_used_outside_block_v4f16() #0 {
; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, s[4:5]
-; GCN-NEXT: v_writelane_b32 v32, s34, 2
+; GCN-NEXT: v_writelane_b32 v32, s33, 2
; GCN-NEXT: v_writelane_b32 v32, s30, 0
-; GCN-NEXT: s_mov_b32 s34, s32
+; GCN-NEXT: s_mov_b32 s33, s32
; GCN-NEXT: s_add_u32 s32, s32, 0x400
; GCN-NEXT: s_getpc_b64 s[4:5]
; GCN-NEXT: s_add_u32 s4, s4, func_v4f16 at rel32@lo+4
@@ -110,7 +110,7 @@ define half @call_split_type_used_outside_block_v4f16() #0 {
; GCN-NEXT: v_readlane_b32 s4, v32, 0
; GCN-NEXT: v_readlane_b32 s5, v32, 1
; GCN-NEXT: s_sub_u32 s32, s32, 0x400
-; GCN-NEXT: v_readlane_b32 s34, v32, 2
+; GCN-NEXT: v_readlane_b32 s33, v32, 2
; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[6:7]
@@ -132,9 +132,9 @@ define { i32, half } @call_split_type_used_outside_block_struct() #0 {
; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, s[4:5]
-; GCN-NEXT: v_writelane_b32 v32, s34, 2
+; GCN-NEXT: v_writelane_b32 v32, s33, 2
; GCN-NEXT: v_writelane_b32 v32, s30, 0
-; GCN-NEXT: s_mov_b32 s34, s32
+; GCN-NEXT: s_mov_b32 s33, s32
; GCN-NEXT: s_add_u32 s32, s32, 0x400
; GCN-NEXT: s_getpc_b64 s[4:5]
; GCN-NEXT: s_add_u32 s4, s4, func_struct at rel32@lo+4
@@ -145,7 +145,7 @@ define { i32, half } @call_split_type_used_outside_block_struct() #0 {
; GCN-NEXT: v_readlane_b32 s5, v32, 1
; GCN-NEXT: v_mov_b32_e32 v1, v4
; GCN-NEXT: s_sub_u32 s32, s32, 0x400
-; GCN-NEXT: v_readlane_b32 s34, v32, 2
+; GCN-NEXT: v_readlane_b32 s33, v32, 2
; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[6:7]
diff --git a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
index 2b60004f3a8b..1d7a19f711be 100644
--- a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
+++ b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
@@ -219,10 +219,10 @@ declare void @func(<4 x float> addrspace(5)* nocapture) #0
; GCN-LABEL: {{^}}undefined_stack_store_reg:
; GCN: s_and_saveexec_b64
-; GCN: buffer_store_dword v0, off, s[0:3], s34 offset:
-; GCN: buffer_store_dword v0, off, s[0:3], s34 offset:
-; GCN: buffer_store_dword v0, off, s[0:3], s34 offset:
-; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s34 offset:
+; GCN: buffer_store_dword v0, off, s[0:3], s33 offset:
+; GCN: buffer_store_dword v0, off, s[0:3], s33 offset:
+; GCN: buffer_store_dword v0, off, s[0:3], s33 offset:
+; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s33 offset:
define void @undefined_stack_store_reg(float %arg, i32 %arg1) #0 {
bb:
%tmp = alloca <4 x float>, align 16, addrspace(5)
diff --git a/llvm/test/CodeGen/AMDGPU/frame-lowering-fp-adjusted.mir b/llvm/test/CodeGen/AMDGPU/frame-lowering-fp-adjusted.mir
index 5cc4782c6062..12fa2d4d6872 100644
--- a/llvm/test/CodeGen/AMDGPU/frame-lowering-fp-adjusted.mir
+++ b/llvm/test/CodeGen/AMDGPU/frame-lowering-fp-adjusted.mir
@@ -30,7 +30,7 @@ machineFunctionInfo:
waveLimiter: true
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
stackPtrOffsetReg: '$sgpr32'
- frameOffsetReg: '$sgpr34'
+ frameOffsetReg: '$sgpr33'
argumentInfo:
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
dispatchPtr: { reg: '$sgpr4_sgpr5' }
diff --git a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll
index 5f0b5aaecac9..3eb478896f55 100644
--- a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll
+++ b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll
@@ -189,18 +189,18 @@ define void @slsr1_1(i32 %b.arg, i32 %s.arg) #0 {
; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
; GFX9-NEXT: buffer_store_dword v35, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: v_writelane_b32 v35, s34, 4
-; GFX9-NEXT: s_mov_b32 s34, s32
+; GFX9-NEXT: v_writelane_b32 v35, s33, 4
+; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_add_u32 s32, s32, 0x800
-; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s34 offset:8 ; 4-byte Folded Spill
-; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s34 offset:4 ; 4-byte Folded Spill
-; GFX9-NEXT: buffer_store_dword v34, off, s[0:3], s34 ; 4-byte Folded Spill
-; GFX9-NEXT: v_writelane_b32 v35, s36, 0
+; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
+; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; GFX9-NEXT: buffer_store_dword v34, off, s[0:3], s33 ; 4-byte Folded Spill
+; GFX9-NEXT: v_writelane_b32 v35, s34, 0
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, foo at gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, foo at gotpcrel32@hi+4
-; GFX9-NEXT: v_writelane_b32 v35, s37, 1
-; GFX9-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x0
+; GFX9-NEXT: v_writelane_b32 v35, s35, 1
+; GFX9-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x0
; GFX9-NEXT: v_mov_b32_e32 v32, v1
; GFX9-NEXT: v_mov_b32_e32 v33, v0
; GFX9-NEXT: v_writelane_b32 v35, s30, 2
@@ -208,21 +208,21 @@ define void @slsr1_1(i32 %b.arg, i32 %s.arg) #0 {
; GFX9-NEXT: v_writelane_b32 v35, s31, 3
; GFX9-NEXT: v_and_b32_e32 v34, 0xffffff, v32
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[36:37]
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
; GFX9-NEXT: v_mad_u32_u24 v32, v33, v32, v34
; GFX9-NEXT: v_mov_b32_e32 v0, v32
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[36:37]
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
; GFX9-NEXT: v_add_u32_e32 v0, v32, v34
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[36:37]
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
; GFX9-NEXT: v_readlane_b32 s4, v35, 2
; GFX9-NEXT: v_readlane_b32 s5, v35, 3
-; GFX9-NEXT: v_readlane_b32 s37, v35, 1
-; GFX9-NEXT: v_readlane_b32 s36, v35, 0
-; GFX9-NEXT: buffer_load_dword v34, off, s[0:3], s34 ; 4-byte Folded Reload
-; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s34 offset:4 ; 4-byte Folded Reload
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s34 offset:8 ; 4-byte Folded Reload
+; GFX9-NEXT: v_readlane_b32 s35, v35, 1
+; GFX9-NEXT: v_readlane_b32 s34, v35, 0
+; GFX9-NEXT: buffer_load_dword v34, off, s[0:3], s33 ; 4-byte Folded Reload
+; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
+; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
; GFX9-NEXT: s_sub_u32 s32, s32, 0x800
-; GFX9-NEXT: v_readlane_b32 s34, v35, 4
+; GFX9-NEXT: v_readlane_b32 s33, v35, 4
; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
; GFX9-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
; GFX9-NEXT: s_mov_b64 exec, s[6:7]
diff --git a/llvm/test/CodeGen/AMDGPU/nested-calls.ll b/llvm/test/CodeGen/AMDGPU/nested-calls.ll
index fdbe3a25e64e..3e94a8e2f952 100644
--- a/llvm/test/CodeGen/AMDGPU/nested-calls.ll
+++ b/llvm/test/CodeGen/AMDGPU/nested-calls.ll
@@ -14,8 +14,8 @@ declare void @external_void_func_i32(i32) #0
; GCN: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
-; GCN-DAG: v_writelane_b32 v32, s34, 2
-; GCN-DAG: s_mov_b32 s34, s32
+; GCN-DAG: v_writelane_b32 v32, s33, 2
+; GCN-DAG: s_mov_b32 s33, s32
; GCN-DAG: s_add_u32 s32, s32, 0x400
; GCN-DAG: v_writelane_b32 v32, s30, 0
; GCN-DAG: v_writelane_b32 v32, s31, 1
@@ -26,7 +26,7 @@ declare void @external_void_func_i32(i32) #0
; GCN: v_readlane_b32 s5, v32, 1
; GCN-NEXT: s_sub_u32 s32, s32, 0x400
-; GCN-NEXT: v_readlane_b32 s34, v32, 2
+; GCN-NEXT: v_readlane_b32 s33, v32, 2
; GCN: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
@@ -39,9 +39,9 @@ define void @test_func_call_external_void_func_i32_imm() #0 {
; GCN-LABEL: {{^}}test_func_call_external_void_func_i32_imm_stack_use:
; GCN: s_waitcnt
-; GCN: s_mov_b32 s34, s32
+; GCN: s_mov_b32 s33, s32
; GCN-DAG: s_add_u32 s32, s32, 0x1400{{$}}
-; GCN-DAG: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s34 offset:
+; GCN-DAG: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s33 offset:
; GCN: s_swappc_b64
; GCN: s_sub_u32 s32, s32, 0x1400{{$}}
; GCN: s_setpc_b64
diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir
index 67d62e0d5003..760b26b87dd9 100644
--- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir
@@ -19,7 +19,7 @@ stack:
machineFunctionInfo:
isEntryFunction: false
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
- frameOffsetReg: $sgpr34
+ frameOffsetReg: $sgpr33
stackPtrOffsetReg: $sgpr32
body: |
@@ -28,19 +28,19 @@ body: |
; CHECK-LABEL: name: scavenge_sgpr_pei_no_sgprs
; CHECK: liveins: $vgpr1
- ; CHECK: $sgpr27 = frame-setup COPY $sgpr34
+ ; CHECK: $sgpr27 = frame-setup COPY $sgpr33
; CHECK: $sgpr4 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
- ; CHECK: $sgpr34 = frame-setup S_AND_B32 killed $sgpr4, 4294443008, implicit-def $scc
+ ; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr4, 4294443008, implicit-def $scc
; CHECK: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc
; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr17, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
- ; CHECK: $sgpr34 = S_LSHR_B32 $sgpr34, 6, implicit-def $scc
- ; CHECK: $sgpr34 = S_ADD_U32 killed $sgpr34, 8192, implicit-def $scc
- ; CHECK: $vgpr2 = COPY killed $sgpr34
- ; CHECK: $sgpr34 = S_SUB_U32 killed $sgpr34, 8192, implicit-def $scc
- ; CHECK: $sgpr34 = S_LSHL_B32 $sgpr34, 6, implicit-def $scc
+ ; CHECK: $sgpr33 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc
+ ; CHECK: $sgpr33 = S_ADD_U32 killed $sgpr33, 8192, implicit-def $scc
+ ; CHECK: $vgpr2 = COPY killed $sgpr33
+ ; CHECK: $sgpr33 = S_SUB_U32 killed $sgpr33, 8192, implicit-def $scc
+ ; CHECK: $sgpr33 = S_LSHL_B32 $sgpr33, 6, implicit-def $scc
; CHECK: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr17, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
; CHECK: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 1572864, implicit-def $scc
- ; CHECK: $sgpr34 = frame-setup COPY $sgpr27
+ ; CHECK: $sgpr33 = frame-setup COPY $sgpr27
; CHECK: S_ENDPGM 0, implicit $vcc
S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr17, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
$vgpr0 = V_OR_B32_e32 %stack.1, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr17, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
@@ -61,7 +61,7 @@ stack:
machineFunctionInfo:
isEntryFunction: false
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
- frameOffsetReg: $sgpr34
+ frameOffsetReg: $sgpr33
stackPtrOffsetReg: $sgpr32
body: |
@@ -70,17 +70,17 @@ body: |
; CHECK-LABEL: name: scavenge_sgpr_pei_one_sgpr
; CHECK: liveins: $vgpr1
- ; CHECK: $sgpr27 = frame-setup COPY $sgpr34
+ ; CHECK: $sgpr27 = frame-setup COPY $sgpr33
; CHECK: $sgpr4 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
- ; CHECK: $sgpr34 = frame-setup S_AND_B32 killed $sgpr4, 4294443008, implicit-def $scc
+ ; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr4, 4294443008, implicit-def $scc
; CHECK: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc
; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr17, implicit-def $sgpr28, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
- ; CHECK: $sgpr29 = S_LSHR_B32 $sgpr34, 6, implicit-def $scc
+ ; CHECK: $sgpr29 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc
; CHECK: $sgpr29 = S_ADD_U32 killed $sgpr29, 8192, implicit-def $scc
; CHECK: $vgpr2 = COPY killed $sgpr29
; CHECK: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr17, implicit $sgpr28, implicit $sgpr31
; CHECK: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 1572864, implicit-def $scc
- ; CHECK: $sgpr34 = frame-setup COPY $sgpr27
+ ; CHECK: $sgpr33 = frame-setup COPY $sgpr27
; CHECK: S_ENDPGM 0, implicit $vcc
S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr17, implicit-def $sgpr28, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
$vgpr0 = V_OR_B32_e32 %stack.1, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr17, implicit $sgpr28, implicit $sgpr31
@@ -101,7 +101,7 @@ stack:
machineFunctionInfo:
isEntryFunction: false
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
- frameOffsetReg: $sgpr34
+ frameOffsetReg: $sgpr33
stackPtrOffsetReg: $sgpr32
body: |
@@ -110,17 +110,17 @@ body: |
; CHECK-LABEL: name: scavenge_sgpr_pei_one_sgpr_64
; CHECK: liveins: $vgpr1
- ; CHECK: $sgpr27 = frame-setup COPY $sgpr34
+ ; CHECK: $sgpr27 = frame-setup COPY $sgpr33
; CHECK: $sgpr4 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
- ; CHECK: $sgpr34 = frame-setup S_AND_B32 killed $sgpr4, 4294443008, implicit-def $scc
+ ; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr4, 4294443008, implicit-def $scc
; CHECK: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc
; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr17, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
- ; CHECK: $vgpr3 = V_LSHRREV_B32_e64 6, $sgpr34, implicit $exec
+ ; CHECK: $vgpr3 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
; CHECK: $sgpr28 = S_MOV_B32 8192
; CHECK: $vgpr2, dead $sgpr28_sgpr29 = V_ADD_I32_e64 killed $sgpr28, killed $vgpr3, 0, implicit $exec
; CHECK: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr17, implicit $sgpr31
; CHECK: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 1572864, implicit-def $scc
- ; CHECK: $sgpr34 = frame-setup COPY $sgpr27
+ ; CHECK: $sgpr33 = frame-setup COPY $sgpr27
; CHECK: S_ENDPGM 0, implicit $vcc
S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr17, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
$vgpr0 = V_OR_B32_e32 %stack.1, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr17, implicit $sgpr31
@@ -140,7 +140,7 @@ stack:
machineFunctionInfo:
isEntryFunction: false
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
- frameOffsetReg: $sgpr34
+ frameOffsetReg: $sgpr33
stackPtrOffsetReg: $sgpr32
body: |
@@ -149,17 +149,17 @@ body: |
; CHECK-LABEL: name: scavenge_sgpr_pei_prefer_vcc
; CHECK: liveins: $vgpr1
- ; CHECK: $sgpr27 = frame-setup COPY $sgpr34
+ ; CHECK: $sgpr27 = frame-setup COPY $sgpr33
; CHECK: $sgpr4 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
- ; CHECK: $sgpr34 = frame-setup S_AND_B32 killed $sgpr4, 4294443008, implicit-def $scc
+ ; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr4, 4294443008, implicit-def $scc
; CHECK: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc
; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr17, implicit-def $sgpr30, implicit-def $sgpr31
- ; CHECK: $vgpr3 = V_LSHRREV_B32_e64 6, $sgpr34, implicit $exec
+ ; CHECK: $vgpr3 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
; CHECK: $vcc_lo = S_MOV_B32 8192
; CHECK: $vgpr2, dead $vcc = V_ADD_I32_e64 killed $vcc_lo, killed $vgpr3, 0, implicit $exec
; CHECK: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr17, implicit $sgpr31
; CHECK: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 1572864, implicit-def $scc
- ; CHECK: $sgpr34 = frame-setup COPY $sgpr27
+ ; CHECK: $sgpr33 = frame-setup COPY $sgpr27
; CHECK: S_ENDPGM 0
S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr17, implicit-def $sgpr30, implicit-def $sgpr31
$vgpr0 = V_OR_B32_e32 %stack.1, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr17, implicit $sgpr31
diff --git a/llvm/test/CodeGen/AMDGPU/sibling-call.ll b/llvm/test/CodeGen/AMDGPU/sibling-call.ll
index 508f08ed924b..0b9eec73e191 100644
--- a/llvm/test/CodeGen/AMDGPU/sibling-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/sibling-call.ll
@@ -205,13 +205,13 @@ entry:
; GCN: s_or_saveexec_b64 s{{\[[0-9]+:[0-9]+\]}}, -1
; GCN-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec
-; GCN: s_mov_b32 s34, s32
+; GCN: s_mov_b32 s33, s32
; GCN-DAG: s_add_u32 s32, s32, 0x400
-; GCN-DAG: buffer_store_dword v32, off, s[0:3], s34 offset:4 ; 4-byte Folded Spill
-; GCN-DAG: buffer_store_dword v33, off, s[0:3], s34 ; 4-byte Folded Spill
-; GCN-DAG: v_writelane_b32 v34, s36, 0
-; GCN-DAG: v_writelane_b32 v34, s37, 1
+; GCN-DAG: buffer_store_dword v32, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; GCN-DAG: buffer_store_dword v33, off, s[0:3], s33 ; 4-byte Folded Spill
+; GCN-DAG: v_writelane_b32 v34, s34, 0
+; GCN-DAG: v_writelane_b32 v34, s35, 1
; GCN-DAG: s_getpc_b64 s[4:5]
; GCN-DAG: s_add_u32 s4, s4, i32_fastcc_i32_i32 at gotpcrel32@lo+4
@@ -220,18 +220,18 @@ entry:
; GCN: s_swappc_b64
-; GCN-DAG: v_readlane_b32 s36, v34, 0
-; GCN-DAG: v_readlane_b32 s37, v34, 1
+; GCN-DAG: v_readlane_b32 s34, v34, 0
+; GCN-DAG: v_readlane_b32 s35, v34, 1
-; GCN: buffer_load_dword v33, off, s[0:3], s34 ; 4-byte Folded Reload
-; GCN: buffer_load_dword v32, off, s[0:3], s34 offset:4 ; 4-byte Folded Reload
+; GCN: buffer_load_dword v33, off, s[0:3], s33 ; 4-byte Folded Reload
+; GCN: buffer_load_dword v32, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; GCN: s_getpc_b64 s[4:5]
; GCN-NEXT: s_add_u32 s4, s4, sibling_call_i32_fastcc_i32_i32 at rel32@lo+4
; GCN-NEXT: s_addc_u32 s5, s5, sibling_call_i32_fastcc_i32_i32 at rel32@hi+4
; GCN: s_sub_u32 s32, s32, 0x400
-; GCN-NEXT: v_readlane_b32 s34,
+; GCN-NEXT: v_readlane_b32 s33,
; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
; GCN-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[6:7]
diff --git a/llvm/test/CodeGen/AMDGPU/spill-csr-frame-ptr-reg-copy.ll b/llvm/test/CodeGen/AMDGPU/spill-csr-frame-ptr-reg-copy.ll
index 69a4d7eac9ea..c98e344b5009 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-csr-frame-ptr-reg-copy.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-csr-frame-ptr-reg-copy.ll
@@ -4,13 +4,13 @@
; GCN: s_or_saveexec_b64
; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec
-; GCN: v_writelane_b32 v32, s34, 2
+; GCN: v_writelane_b32 v32, s33, 2
; GCN: s_swappc_b64
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 9
-; GCN: buffer_store_dword [[K]], off, s[0:3], s34{{$}}
+; GCN: buffer_store_dword [[K]], off, s[0:3], s33{{$}}
-; GCN: v_readlane_b32 s34, v32, 2
+; GCN: v_readlane_b32 s33, v32, 2
; GCN: s_or_saveexec_b64
; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
; GCN: s_mov_b64 exec
diff --git a/llvm/test/CodeGen/AMDGPU/stack-realign.ll b/llvm/test/CodeGen/AMDGPU/stack-realign.ll
index 022cc1c7d28f..2a3cfe7a0992 100644
--- a/llvm/test/CodeGen/AMDGPU/stack-realign.ll
+++ b/llvm/test/CodeGen/AMDGPU/stack-realign.ll
@@ -33,7 +33,7 @@ define void @needs_align16_default_stack_align(i32 %idx) #0 {
; GCN-LABEL: {{^}}needs_align16_stack_align4:
; GCN: s_add_u32 [[SCRATCH_REG:s[0-9]+]], s32, 0x3c0{{$}}
-; GCN: s_and_b32 s34, [[SCRATCH_REG]], 0xfffffc00
+; GCN: s_and_b32 s33, [[SCRATCH_REG]], 0xfffffc00
; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen
; GCN: v_or_b32_e32 v{{[0-9]+}}, 12
@@ -54,7 +54,7 @@ define void @needs_align16_stack_align4(i32 %idx) #2 {
; GCN-LABEL: {{^}}needs_align32:
; GCN: s_add_u32 [[SCRATCH_REG:s[0-9]+]], s32, 0x7c0{{$}}
-; GCN: s_and_b32 s34, [[SCRATCH_REG]], 0xfffff800
+; GCN: s_and_b32 s33, [[SCRATCH_REG]], 0xfffff800
; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen
; GCN: v_or_b32_e32 v{{[0-9]+}}, 12
@@ -75,7 +75,7 @@ define void @needs_align32(i32 %idx) #0 {
; GCN-LABEL: {{^}}force_realign4:
; GCN: s_add_u32 [[SCRATCH_REG:s[0-9]+]], s32, 0xc0{{$}}
-; GCN: s_and_b32 s34, [[SCRATCH_REG]], 0xffffff00
+; GCN: s_and_b32 s33, [[SCRATCH_REG]], 0xffffff00
; GCN: s_add_u32 s32, s32, 0xd00{{$}}
; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen
@@ -125,13 +125,13 @@ define amdgpu_kernel void @kernel_call_align4_from_5() {
; GCN-LABEL: {{^}}default_realign_align128:
; GCN: s_add_u32 [[TMP:s[0-9]+]], s32, 0x1fc0
-; GCN-NEXT: s_mov_b32 [[FP_COPY:s[0-9]+]], s34
-; GCN-NEXT: s_and_b32 s34, [[TMP]], 0xffffe000
+; GCN-NEXT: s_mov_b32 [[FP_COPY:s[0-9]+]], s33
+; GCN-NEXT: s_and_b32 s33, [[TMP]], 0xffffe000
; GCN-NEXT: s_add_u32 s32, s32, 0x4000
-; GCN-NOT: s34
-; GCN: buffer_store_dword v0, off, s[0:3], s34{{$}}
+; GCN-NOT: s33
+; GCN: buffer_store_dword v0, off, s[0:3], s33{{$}}
; GCN: s_sub_u32 s32, s32, 0x4000
-; GCN: s_mov_b32 s34, [[FP_COPY]]
+; GCN: s_mov_b32 s33, [[FP_COPY]]
define void @default_realign_align128(i32 %idx) #0 {
%alloca.align = alloca i32, align 128, addrspace(5)
store volatile i32 9, i32 addrspace(5)* %alloca.align, align 128
diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll
index e3149be899c0..f9d4e3a5abad 100644
--- a/llvm/test/CodeGen/AMDGPU/wave32.ll
+++ b/llvm/test/CodeGen/AMDGPU/wave32.ll
@@ -1063,8 +1063,8 @@ declare void @external_void_func_void() #1
; GFX1064-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
; GFX1032-NEXT: s_mov_b32 exec_lo, [[COPY_EXEC0]]
-; GCN-NEXT: v_writelane_b32 v32, s34, 2
-; GCN: s_mov_b32 s34, s32
+; GCN-NEXT: v_writelane_b32 v32, s33, 2
+; GCN: s_mov_b32 s33, s32
; GFX1064: s_add_u32 s32, s32, 0x400
; GFX1032: s_add_u32 s32, s32, 0x200
@@ -1078,7 +1078,7 @@ declare void @external_void_func_void() #1
; GFX1064: s_sub_u32 s32, s32, 0x400
; GFX1032: s_sub_u32 s32, s32, 0x200
-; GCN: v_readlane_b32 s34, v32, 2
+; GCN: v_readlane_b32 s33, v32, 2
; GFX1064: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
; GFX1032: s_or_saveexec_b32 [[COPY_EXEC1:s[0-9]]], -1{{$}}
; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 ; 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
index 95a70cfb33d0..975a4ea19af4 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
@@ -77,7 +77,7 @@ define amdgpu_ps void @ps_shader(i32 %arg0, i32 inreg %arg1) {
; CHECK-NEXT: memoryBound: false
; CHECK-NEXT: waveLimiter: false
; CHECK-NEXT: scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
-; CHECK-NEXT: frameOffsetReg: '$sgpr34'
+; CHECK-NEXT: frameOffsetReg: '$sgpr33'
; CHECK-NEXT: stackPtrOffsetReg: '$sgpr32'
; CHECK-NEXT: argumentInfo:
; CHECK-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
@@ -104,7 +104,7 @@ define void @function() {
; CHECK-NEXT: memoryBound: false
; CHECK-NEXT: waveLimiter: false
; CHECK-NEXT: scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
-; CHECK-NEXT: frameOffsetReg: '$sgpr34'
+; CHECK-NEXT: frameOffsetReg: '$sgpr33'
; CHECK-NEXT: stackPtrOffsetReg: '$sgpr32'
; CHECK-NEXT: argumentInfo:
; CHECK-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
More information about the llvm-commits
mailing list