[llvm] c2fc7f7 - Revert "[AMDGPU]Optimize SGPR spills (#93668)"
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 24 00:05:56 PDT 2024
Author: vg0204
Date: 2024-06-24T12:36:36+05:30
New Revision: c2fc7f75f67039bb1ed577bc0edbd699a850cd9d
URL: https://github.com/llvm/llvm-project/commit/c2fc7f75f67039bb1ed577bc0edbd699a850cd9d
DIFF: https://github.com/llvm/llvm-project/commit/c2fc7f75f67039bb1ed577bc0edbd699a850cd9d.diff
LOG: Revert "[AMDGPU]Optimize SGPR spills (#93668)"
This reverts commit 4b9112e88a998ce620e4683548f2afd17cc5fe95. A separate
issue(#96353) describing it has been opened to further keep its track.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll
llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll
llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir
llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 2e7d5cdfc3fe9..3e21d8ee2e2a0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -1437,11 +1437,6 @@ bool GCNPassConfig::addRegAssignAndRewriteOptimized() {
// since FastRegAlloc does the replacements itself.
addPass(createVirtRegRewriter(false));
- // At this point, the sgpr-regalloc has been done and it is good to have the
- // stack slot coloring to try to optimize the SGPR spill stack indices before
- // attempting the custom SGPR spill lowering.
- addPass(&StackSlotColoringID);
-
// Equivalent of PEI for SGPRs.
addPass(&SILowerSGPRSpillsID);
addPass(&SIPreAllocateWWMRegsID);
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index b4f1aaaa1705d..4c5e60c873bb9 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -1775,13 +1775,8 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, int Index,
if (SpillToVGPR) {
- // Since stack slot coloring pass is trying to optimize SGPR spills,
- // VGPR lanes (mapped from spill stack slot) may be shared for SGPR
- // spills of
diff erent sizes. This accounts for number of VGPR lanes alloted
- // equal to the largest SGPR being spilled in them.
- assert(SB.NumSubRegs <= VGPRSpills.size() &&
- "Num of SGPRs spilled should be less than or equal to num of "
- "the VGPR lanes.");
+ assert(SB.NumSubRegs == VGPRSpills.size() &&
+ "Num of VGPR lanes should be equal to num of SGPRs spilled");
for (unsigned i = 0, e = SB.NumSubRegs; i < e; ++i) {
Register SubReg =
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
index 85d20d1c0410c..08cf83fd2bd0f 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
@@ -366,12 +366,10 @@
; GCN-O1-NEXT: Machine Optimization Remark Emitter
; GCN-O1-NEXT: Greedy Register Allocator
; GCN-O1-NEXT: Virtual Register Rewriter
-; GCN-O1-NEXT: Stack Slot Coloring
; GCN-O1-NEXT: SI lower SGPR spill instructions
; GCN-O1-NEXT: Virtual Register Map
; GCN-O1-NEXT: Live Register Matrix
; GCN-O1-NEXT: SI Pre-allocate WWM Registers
-; GCN-O1-NEXT: Live Stack Slot Analysis
; GCN-O1-NEXT: Greedy Register Allocator
; GCN-O1-NEXT: SI Lower WWM Copies
; GCN-O1-NEXT: GCN NSA Reassign
@@ -673,12 +671,10 @@
; GCN-O1-OPTS-NEXT: Machine Optimization Remark Emitter
; GCN-O1-OPTS-NEXT: Greedy Register Allocator
; GCN-O1-OPTS-NEXT: Virtual Register Rewriter
-; GCN-O1-OPTS-NEXT: Stack Slot Coloring
; GCN-O1-OPTS-NEXT: SI lower SGPR spill instructions
; GCN-O1-OPTS-NEXT: Virtual Register Map
; GCN-O1-OPTS-NEXT: Live Register Matrix
; GCN-O1-OPTS-NEXT: SI Pre-allocate WWM Registers
-; GCN-O1-OPTS-NEXT: Live Stack Slot Analysis
; GCN-O1-OPTS-NEXT: Greedy Register Allocator
; GCN-O1-OPTS-NEXT: SI Lower WWM Copies
; GCN-O1-OPTS-NEXT: GCN NSA Reassign
@@ -986,12 +982,10 @@
; GCN-O2-NEXT: Machine Optimization Remark Emitter
; GCN-O2-NEXT: Greedy Register Allocator
; GCN-O2-NEXT: Virtual Register Rewriter
-; GCN-O2-NEXT: Stack Slot Coloring
; GCN-O2-NEXT: SI lower SGPR spill instructions
; GCN-O2-NEXT: Virtual Register Map
; GCN-O2-NEXT: Live Register Matrix
; GCN-O2-NEXT: SI Pre-allocate WWM Registers
-; GCN-O2-NEXT: Live Stack Slot Analysis
; GCN-O2-NEXT: Greedy Register Allocator
; GCN-O2-NEXT: SI Lower WWM Copies
; GCN-O2-NEXT: GCN NSA Reassign
@@ -1311,12 +1305,10 @@
; GCN-O3-NEXT: Machine Optimization Remark Emitter
; GCN-O3-NEXT: Greedy Register Allocator
; GCN-O3-NEXT: Virtual Register Rewriter
-; GCN-O3-NEXT: Stack Slot Coloring
; GCN-O3-NEXT: SI lower SGPR spill instructions
; GCN-O3-NEXT: Virtual Register Map
; GCN-O3-NEXT: Live Register Matrix
; GCN-O3-NEXT: SI Pre-allocate WWM Registers
-; GCN-O3-NEXT: Live Stack Slot Analysis
; GCN-O3-NEXT: Greedy Register Allocator
; GCN-O3-NEXT: SI Lower WWM Copies
; GCN-O3-NEXT: GCN NSA Reassign
diff --git a/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll b/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll
index 25e9e09748c81..fbe34a3a3970b 100644
--- a/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll
+++ b/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll
@@ -221,15 +221,15 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 {
; GFX906-NEXT: ; def s29
; GFX906-NEXT: ;;#ASMEND
; GFX906-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
-; GFX906-NEXT: v_writelane_b32 v40, s21, 12
-; GFX906-NEXT: v_writelane_b32 v40, s22, 13
-; GFX906-NEXT: v_writelane_b32 v40, s23, 14
-; GFX906-NEXT: v_writelane_b32 v40, s24, 15
-; GFX906-NEXT: v_writelane_b32 v40, s25, 16
-; GFX906-NEXT: v_writelane_b32 v40, s26, 17
-; GFX906-NEXT: v_writelane_b32 v40, s27, 18
-; GFX906-NEXT: v_writelane_b32 v40, s28, 19
-; GFX906-NEXT: v_writelane_b32 v40, s29, 20
+; GFX906-NEXT: v_writelane_b32 v40, s21, 24
+; GFX906-NEXT: v_writelane_b32 v40, s22, 25
+; GFX906-NEXT: v_writelane_b32 v40, s23, 26
+; GFX906-NEXT: v_writelane_b32 v40, s24, 27
+; GFX906-NEXT: v_writelane_b32 v40, s25, 28
+; GFX906-NEXT: v_writelane_b32 v40, s26, 29
+; GFX906-NEXT: v_writelane_b32 v40, s27, 30
+; GFX906-NEXT: v_writelane_b32 v40, s28, 31
+; GFX906-NEXT: v_writelane_b32 v40, s29, 32
; GFX906-NEXT: v_readlane_b32 s4, v40, 10
; GFX906-NEXT: v_readlane_b32 s6, v40, 0
; GFX906-NEXT: v_readlane_b32 s8, v40, 8
@@ -249,39 +249,39 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 {
; GFX906-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX906-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX906-NEXT: s_mov_b64 exec, s[34:35]
-; GFX906-NEXT: v_readlane_b32 s21, v40, 12
+; GFX906-NEXT: v_readlane_b32 s21, v40, 24
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; use s21
; GFX906-NEXT: ;;#ASMEND
-; GFX906-NEXT: v_readlane_b32 s22, v40, 13
+; GFX906-NEXT: v_readlane_b32 s22, v40, 25
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; use s22
; GFX906-NEXT: ;;#ASMEND
-; GFX906-NEXT: v_readlane_b32 s23, v40, 14
+; GFX906-NEXT: v_readlane_b32 s23, v40, 26
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; use s23
; GFX906-NEXT: ;;#ASMEND
-; GFX906-NEXT: v_readlane_b32 s24, v40, 15
+; GFX906-NEXT: v_readlane_b32 s24, v40, 27
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; use s24
; GFX906-NEXT: ;;#ASMEND
-; GFX906-NEXT: v_readlane_b32 s25, v40, 16
+; GFX906-NEXT: v_readlane_b32 s25, v40, 28
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; use s25
; GFX906-NEXT: ;;#ASMEND
-; GFX906-NEXT: v_readlane_b32 s26, v40, 17
+; GFX906-NEXT: v_readlane_b32 s26, v40, 29
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; use s26
; GFX906-NEXT: ;;#ASMEND
-; GFX906-NEXT: v_readlane_b32 s27, v40, 18
+; GFX906-NEXT: v_readlane_b32 s27, v40, 30
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; use s27
; GFX906-NEXT: ;;#ASMEND
-; GFX906-NEXT: v_readlane_b32 s28, v40, 19
+; GFX906-NEXT: v_readlane_b32 s28, v40, 31
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; use s28
; GFX906-NEXT: ;;#ASMEND
-; GFX906-NEXT: v_readlane_b32 s29, v40, 20
+; GFX906-NEXT: v_readlane_b32 s29, v40, 32
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; use s29
; GFX906-NEXT: ;;#ASMEND
@@ -602,15 +602,15 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 {
; GFX908-NEXT: ; def s29
; GFX908-NEXT: ;;#ASMEND
; GFX908-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
-; GFX908-NEXT: v_writelane_b32 v40, s21, 12
-; GFX908-NEXT: v_writelane_b32 v40, s22, 13
-; GFX908-NEXT: v_writelane_b32 v40, s23, 14
-; GFX908-NEXT: v_writelane_b32 v40, s24, 15
-; GFX908-NEXT: v_writelane_b32 v40, s25, 16
-; GFX908-NEXT: v_writelane_b32 v40, s26, 17
-; GFX908-NEXT: v_writelane_b32 v40, s27, 18
-; GFX908-NEXT: v_writelane_b32 v40, s28, 19
-; GFX908-NEXT: v_writelane_b32 v40, s29, 20
+; GFX908-NEXT: v_writelane_b32 v40, s21, 24
+; GFX908-NEXT: v_writelane_b32 v40, s22, 25
+; GFX908-NEXT: v_writelane_b32 v40, s23, 26
+; GFX908-NEXT: v_writelane_b32 v40, s24, 27
+; GFX908-NEXT: v_writelane_b32 v40, s25, 28
+; GFX908-NEXT: v_writelane_b32 v40, s26, 29
+; GFX908-NEXT: v_writelane_b32 v40, s27, 30
+; GFX908-NEXT: v_writelane_b32 v40, s28, 31
+; GFX908-NEXT: v_writelane_b32 v40, s29, 32
; GFX908-NEXT: v_readlane_b32 s4, v40, 10
; GFX908-NEXT: v_readlane_b32 s6, v40, 0
; GFX908-NEXT: v_readlane_b32 s8, v40, 8
@@ -630,39 +630,39 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 {
; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX908-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX908-NEXT: s_mov_b64 exec, s[34:35]
-; GFX908-NEXT: v_readlane_b32 s21, v40, 12
+; GFX908-NEXT: v_readlane_b32 s21, v40, 24
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; use s21
; GFX908-NEXT: ;;#ASMEND
-; GFX908-NEXT: v_readlane_b32 s22, v40, 13
+; GFX908-NEXT: v_readlane_b32 s22, v40, 25
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; use s22
; GFX908-NEXT: ;;#ASMEND
-; GFX908-NEXT: v_readlane_b32 s23, v40, 14
+; GFX908-NEXT: v_readlane_b32 s23, v40, 26
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; use s23
; GFX908-NEXT: ;;#ASMEND
-; GFX908-NEXT: v_readlane_b32 s24, v40, 15
+; GFX908-NEXT: v_readlane_b32 s24, v40, 27
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; use s24
; GFX908-NEXT: ;;#ASMEND
-; GFX908-NEXT: v_readlane_b32 s25, v40, 16
+; GFX908-NEXT: v_readlane_b32 s25, v40, 28
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; use s25
; GFX908-NEXT: ;;#ASMEND
-; GFX908-NEXT: v_readlane_b32 s26, v40, 17
+; GFX908-NEXT: v_readlane_b32 s26, v40, 29
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; use s26
; GFX908-NEXT: ;;#ASMEND
-; GFX908-NEXT: v_readlane_b32 s27, v40, 18
+; GFX908-NEXT: v_readlane_b32 s27, v40, 30
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; use s27
; GFX908-NEXT: ;;#ASMEND
-; GFX908-NEXT: v_readlane_b32 s28, v40, 19
+; GFX908-NEXT: v_readlane_b32 s28, v40, 31
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; use s28
; GFX908-NEXT: ;;#ASMEND
-; GFX908-NEXT: v_readlane_b32 s29, v40, 20
+; GFX908-NEXT: v_readlane_b32 s29, v40, 32
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; use s29
; GFX908-NEXT: ;;#ASMEND
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll b/llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll
index 04a9f3cb2d332..17a19116735e4 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll
@@ -17,12 +17,10 @@
; DEFAULT: Greedy Register Allocator
; DEFAULT-NEXT: Virtual Register Rewriter
-; DEFAULT-NEXT: Stack Slot Coloring
; DEFAULT-NEXT: SI lower SGPR spill instructions
; DEFAULT-NEXT: Virtual Register Map
; DEFAULT-NEXT: Live Register Matrix
; DEFAULT-NEXT: SI Pre-allocate WWM Registers
-; DEFAULT-NEXT: Live Stack Slot Analysis
; DEFAULT-NEXT: Greedy Register Allocator
; DEFAULT-NEXT: SI Lower WWM Copies
; DEFAULT-NEXT: GCN NSA Reassign
@@ -52,12 +50,10 @@
; BASIC-DEFAULT-NEXT: Live Register Matrix
; BASIC-DEFAULT-NEXT: Basic Register Allocator
; BASIC-DEFAULT-NEXT: Virtual Register Rewriter
-; BASIC-DEFAULT-NEXT: Stack Slot Coloring
; BASIC-DEFAULT-NEXT: SI lower SGPR spill instructions
; BASIC-DEFAULT-NEXT: Virtual Register Map
; BASIC-DEFAULT-NEXT: Live Register Matrix
; BASIC-DEFAULT-NEXT: SI Pre-allocate WWM Registers
-; BASIC-DEFAULT-NEXT: Live Stack Slot Analysis
; BASIC-DEFAULT-NEXT: Bundle Machine CFG Edges
; BASIC-DEFAULT-NEXT: Spill Code Placement Analysis
; BASIC-DEFAULT-NEXT: Lazy Machine Block Frequency Analysis
@@ -73,12 +69,10 @@
; DEFAULT-BASIC: Greedy Register Allocator
; DEFAULT-BASIC-NEXT: Virtual Register Rewriter
-; DEFAULT-BASIC-NEXT: Stack Slot Coloring
; DEFAULT-BASIC-NEXT: SI lower SGPR spill instructions
; DEFAULT-BASIC-NEXT: Virtual Register Map
; DEFAULT-BASIC-NEXT: Live Register Matrix
; DEFAULT-BASIC-NEXT: SI Pre-allocate WWM Registers
-; DEFAULT-BASIC-NEXT: Live Stack Slot Analysis
; DEFAULT-BASIC-NEXT: Basic Register Allocator
; DEFAULT-BASIC-NEXT: SI Lower WWM Copies
; DEFAULT-BASIC-NEXT: GCN NSA Reassign
@@ -96,12 +90,10 @@
; BASIC-BASIC-NEXT: Live Register Matrix
; BASIC-BASIC-NEXT: Basic Register Allocator
; BASIC-BASIC-NEXT: Virtual Register Rewriter
-; BASIC-BASIC-NEXT: Stack Slot Coloring
; BASIC-BASIC-NEXT: SI lower SGPR spill instructions
; BASIC-BASIC-NEXT: Virtual Register Map
; BASIC-BASIC-NEXT: Live Register Matrix
; BASIC-BASIC-NEXT: SI Pre-allocate WWM Registers
-; BASIC-BASIC-NEXT: Live Stack Slot Analysis
; BASIC-BASIC-NEXT: Basic Register Allocator
; BASIC-BASIC-NEXT: SI Lower WWM Copies
; BASIC-BASIC-NEXT: GCN NSA Reassign
diff --git a/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir b/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir
index 0527036e67498..887e9c4b5dc5e 100644
--- a/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir
+++ b/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir
@@ -2,7 +2,6 @@
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs -stress-regalloc=3 -start-before=greedy,0 -stop-after=si-lower-sgpr-spills -o - %s | FileCheck -check-prefix=SGPR_SPILLED %s
# INFO : The test starts from the sgpr-regalloc pipeline.
-# INFO : Now, StackSlotColoring pass comes just after sgpr-regalloc pipeline.
# This file contains 3 test cases to observe the optimal stack slot usage for SGPR spills utilizing Stack Slot Coloring pass.
# @stack-slot-share-equal-sized-spills : In this, the stack slot indices is shared among the spill stack objects of equal size.
@@ -42,19 +41,20 @@ body: |
; SGPR_SPILLED-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 4
; SGPR_SPILLED-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit undef $vgpr0
; SGPR_SPILLED-NEXT: $sgpr32 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0
+ ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr32, 5, [[DEF]]
; SGPR_SPILLED-NEXT: $sgpr0 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 1, implicit-def $sgpr0_sgpr1
; SGPR_SPILLED-NEXT: $sgpr1 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 2
- ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr2, 1, [[DEF]], implicit-def $sgpr2_sgpr3, implicit $sgpr2_sgpr3
- ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr3, 2, [[DEF]], implicit $sgpr2_sgpr3
+ ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr2, 6, [[DEF]], implicit-def $sgpr2_sgpr3, implicit $sgpr2_sgpr3
+ ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr3, 7, [[DEF]], implicit $sgpr2_sgpr3
; SGPR_SPILLED-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
; SGPR_SPILLED-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
; SGPR_SPILLED-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
; SGPR_SPILLED-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 3, implicit-def $sgpr4_sgpr5
; SGPR_SPILLED-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 4
; SGPR_SPILLED-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit killed $vgpr0
- ; SGPR_SPILLED-NEXT: $sgpr32 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0
- ; SGPR_SPILLED-NEXT: $sgpr2 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 1, implicit-def $sgpr2_sgpr3
- ; SGPR_SPILLED-NEXT: $sgpr3 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 2
+ ; SGPR_SPILLED-NEXT: $sgpr32 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 5
+ ; SGPR_SPILLED-NEXT: $sgpr2 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 6, implicit-def $sgpr2_sgpr3
+ ; SGPR_SPILLED-NEXT: $sgpr3 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 7
; SGPR_SPILLED-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
%0:sreg_32_xm0 = COPY $sgpr32
%5:sreg_64 = COPY $sgpr0_sgpr1
@@ -107,17 +107,18 @@ body: |
; SGPR_SPILLED-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 4
; SGPR_SPILLED-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit undef $vgpr0
; SGPR_SPILLED-NEXT: $sgpr32 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0
+ ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr32, 5, [[DEF]]
; SGPR_SPILLED-NEXT: $sgpr2 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 1, implicit-def $sgpr2_sgpr3
; SGPR_SPILLED-NEXT: $sgpr3 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 2
- ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr2, 1, [[DEF]]
+ ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr2, 6, [[DEF]]
; SGPR_SPILLED-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
; SGPR_SPILLED-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
; SGPR_SPILLED-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
; SGPR_SPILLED-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 3, implicit-def $sgpr4_sgpr5
; SGPR_SPILLED-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 4
; SGPR_SPILLED-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit killed $vgpr0
- ; SGPR_SPILLED-NEXT: $sgpr32 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0
- ; SGPR_SPILLED-NEXT: $sgpr2 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 1
+ ; SGPR_SPILLED-NEXT: $sgpr32 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 5
+ ; SGPR_SPILLED-NEXT: $sgpr2 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 6
; SGPR_SPILLED-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
%0:sreg_32_xm0 = COPY $sgpr32
%5:sreg_64 = COPY $sgpr2_sgpr3
@@ -162,25 +163,26 @@ body: |
; SGPR_SPILLED-NEXT: [[DEF1:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; SGPR_SPILLED-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[DEF1]], 0, 0, implicit $exec, implicit $flat_scr
; SGPR_SPILLED-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) 4, target-flags(amdgpu-rel32-hi) 4, implicit-def dead $scc
- ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr4, 3, [[DEF]], implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5
- ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr5, 4, [[DEF]], implicit killed $sgpr4_sgpr5
+ ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr4, 2, [[DEF]], implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5
+ ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr5, 3, [[DEF]], implicit killed $sgpr4_sgpr5
; SGPR_SPILLED-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
- ; SGPR_SPILLED-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 3, implicit-def $sgpr4_sgpr5
- ; SGPR_SPILLED-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 4
+ ; SGPR_SPILLED-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 2, implicit-def $sgpr4_sgpr5
+ ; SGPR_SPILLED-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 3
; SGPR_SPILLED-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit undef $vgpr0
; SGPR_SPILLED-NEXT: $sgpr32 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0
+ ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr32, 4, [[DEF]]
; SGPR_SPILLED-NEXT: $sgpr0 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 1
- ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr2, 1, [[DEF]], implicit-def $sgpr2_sgpr3, implicit $sgpr2_sgpr3
- ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr3, 2, [[DEF]], implicit $sgpr2_sgpr3
+ ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr2, 5, [[DEF]], implicit-def $sgpr2_sgpr3, implicit $sgpr2_sgpr3
+ ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr3, 6, [[DEF]], implicit $sgpr2_sgpr3
; SGPR_SPILLED-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
; SGPR_SPILLED-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
; SGPR_SPILLED-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
- ; SGPR_SPILLED-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 3, implicit-def $sgpr4_sgpr5
- ; SGPR_SPILLED-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 4
+ ; SGPR_SPILLED-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 2, implicit-def $sgpr4_sgpr5
+ ; SGPR_SPILLED-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 3
; SGPR_SPILLED-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit killed $vgpr0
- ; SGPR_SPILLED-NEXT: $sgpr32 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0
- ; SGPR_SPILLED-NEXT: $sgpr2 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 1, implicit-def $sgpr2_sgpr3
- ; SGPR_SPILLED-NEXT: $sgpr3 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 2
+ ; SGPR_SPILLED-NEXT: $sgpr32 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 4
+ ; SGPR_SPILLED-NEXT: $sgpr2 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 5, implicit-def $sgpr2_sgpr3
+ ; SGPR_SPILLED-NEXT: $sgpr3 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 6
; SGPR_SPILLED-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
%0:sreg_32_xm0 = COPY $sgpr32
%5:sreg_32 = COPY $sgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
index e1bd1523d78a4..bea2e6d4b45a3 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
@@ -10098,7 +10098,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: s_mov_b64 s[4:5], s[2:3]
; GFX6-NEXT: v_lshlrev_b32_e32 v5, 8, v0
; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:240
-; GFX6-NEXT: s_mov_b32 s2, 0x86a00
+; GFX6-NEXT: s_mov_b32 s2, 0x84400
; GFX6-NEXT: s_mov_b64 s[8:9], exec
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
@@ -10108,7 +10108,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:224
-; GFX6-NEXT: s_mov_b32 s2, 0x86600
+; GFX6-NEXT: s_mov_b32 s2, 0x84000
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
; GFX6-NEXT: s_waitcnt vmcnt(0)
@@ -10117,7 +10117,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:208
-; GFX6-NEXT: s_mov_b32 s2, 0x86200
+; GFX6-NEXT: s_mov_b32 s2, 0x83c00
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
; GFX6-NEXT: s_waitcnt vmcnt(0)
@@ -10126,7 +10126,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:192
-; GFX6-NEXT: s_mov_b32 s2, 0x85e00
+; GFX6-NEXT: s_mov_b32 s2, 0x83800
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
; GFX6-NEXT: s_waitcnt vmcnt(0)
@@ -10135,7 +10135,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:176
-; GFX6-NEXT: s_mov_b32 s2, 0x85a00
+; GFX6-NEXT: s_mov_b32 s2, 0x83400
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
; GFX6-NEXT: s_waitcnt vmcnt(0)
@@ -10144,7 +10144,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:160
-; GFX6-NEXT: s_mov_b32 s2, 0x85600
+; GFX6-NEXT: s_mov_b32 s2, 0x83000
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
; GFX6-NEXT: s_waitcnt vmcnt(0)
@@ -10153,7 +10153,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:144
-; GFX6-NEXT: s_mov_b32 s2, 0x85200
+; GFX6-NEXT: s_mov_b32 s2, 0x82c00
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
; GFX6-NEXT: s_waitcnt vmcnt(0)
@@ -10162,7 +10162,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:128
-; GFX6-NEXT: s_mov_b32 s2, 0x84e00
+; GFX6-NEXT: s_mov_b32 s2, 0x82800
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
; GFX6-NEXT: s_waitcnt vmcnt(0)
@@ -10171,7 +10171,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:112
-; GFX6-NEXT: s_mov_b32 s2, 0x84a00
+; GFX6-NEXT: s_mov_b32 s2, 0x82400
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
; GFX6-NEXT: s_waitcnt vmcnt(0)
@@ -10180,7 +10180,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:96
-; GFX6-NEXT: s_mov_b32 s2, 0x84600
+; GFX6-NEXT: s_mov_b32 s2, 0x82000
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
; GFX6-NEXT: s_waitcnt vmcnt(0)
@@ -10189,7 +10189,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:80
-; GFX6-NEXT: s_mov_b32 s2, 0x84200
+; GFX6-NEXT: s_mov_b32 s2, 0x81c00
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
; GFX6-NEXT: s_waitcnt vmcnt(0)
@@ -10198,7 +10198,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64 offset:64
-; GFX6-NEXT: s_mov_b32 s2, 0x83a00
+; GFX6-NEXT: s_mov_b32 s2, 0x81400
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill
; GFX6-NEXT: s_waitcnt vmcnt(0)
@@ -10208,7 +10208,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[4:7], 0 addr64
; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[5:6], s[4:7], 0 addr64 offset:16
-; GFX6-NEXT: s_mov_b32 s2, 0x83200
+; GFX6-NEXT: s_mov_b32 s2, 0x80c00
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: buffer_store_dword v7, off, s[40:43], s2 ; 4-byte Folded Spill
; GFX6-NEXT: s_waitcnt vmcnt(0)
@@ -10217,7 +10217,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_store_dword v10, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[5:6], s[4:7], 0 addr64 offset:32
-; GFX6-NEXT: s_mov_b32 s2, 0x83600
+; GFX6-NEXT: s_mov_b32 s2, 0x81000
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: buffer_store_dword v7, off, s[40:43], s2 ; 4-byte Folded Spill
; GFX6-NEXT: s_waitcnt vmcnt(0)
@@ -10239,7 +10239,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: s_mov_b64 exec, s[8:9]
; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[5:6], s[4:7], 0 addr64 offset:48
-; GFX6-NEXT: s_mov_b32 s0, 0x83e00
+; GFX6-NEXT: s_mov_b32 s0, 0x81800
; GFX6-NEXT: v_lshlrev_b32_e32 v4, 13, v0
; GFX6-NEXT: v_add_i32_e32 v4, vcc, 16, v4
; GFX6-NEXT: s_waitcnt vmcnt(0)
@@ -10266,7 +10266,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: v_writelane_b32 v4, s9, 5
; GFX6-NEXT: v_writelane_b32 v4, s10, 6
; GFX6-NEXT: v_writelane_b32 v4, s11, 7
-; GFX6-NEXT: s_mov_b32 s2, 0x80c00
+; GFX6-NEXT: s_mov_b32 s2, 0x84800
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], s2 ; 4-byte Folded Spill
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0
@@ -10307,7 +10307,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: v_writelane_b32 v4, s13, 5
; GFX6-NEXT: v_writelane_b32 v4, s14, 6
; GFX6-NEXT: v_writelane_b32 v4, s15, 7
-; GFX6-NEXT: s_mov_b32 s34, 0x81400
+; GFX6-NEXT: s_mov_b32 s34, 0x85000
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], s34 ; 4-byte Folded Spill
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0
@@ -10316,7 +10316,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: s_mov_b64 s[6:7], exec
; GFX6-NEXT: s_mov_b64 exec, 0xff
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
-; GFX6-NEXT: s_mov_b32 s34, 0x80c00
+; GFX6-NEXT: s_mov_b32 s34, 0x84800
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], s34 ; 4-byte Folded Reload
; GFX6-NEXT: s_waitcnt vmcnt(0)
@@ -10343,7 +10343,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: v_writelane_b32 v4, s21, 5
; GFX6-NEXT: v_writelane_b32 v4, s22, 6
; GFX6-NEXT: v_writelane_b32 v4, s23, 7
-; GFX6-NEXT: s_mov_b32 s34, 0x81c00
+; GFX6-NEXT: s_mov_b32 s34, 0x85800
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], s34 ; 4-byte Folded Spill
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0
@@ -10352,7 +10352,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: s_mov_b64 s[6:7], exec
; GFX6-NEXT: s_mov_b64 exec, 0xff
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
-; GFX6-NEXT: s_mov_b32 s34, 0x81400
+; GFX6-NEXT: s_mov_b32 s34, 0x85000
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], s34 ; 4-byte Folded Reload
; GFX6-NEXT: s_waitcnt vmcnt(0)
@@ -10379,7 +10379,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: v_writelane_b32 v4, s29, 5
; GFX6-NEXT: v_writelane_b32 v4, s30, 6
; GFX6-NEXT: v_writelane_b32 v4, s31, 7
-; GFX6-NEXT: s_mov_b32 s34, 0x82400
+; GFX6-NEXT: s_mov_b32 s34, 0x86000
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], s34 ; 4-byte Folded Spill
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0
@@ -10388,7 +10388,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: s_mov_b64 s[6:7], exec
; GFX6-NEXT: s_mov_b64 exec, 0xff
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
-; GFX6-NEXT: s_mov_b32 s34, 0x81c00
+; GFX6-NEXT: s_mov_b32 s34, 0x85800
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], s34 ; 4-byte Folded Reload
; GFX6-NEXT: s_waitcnt vmcnt(0)
@@ -10411,7 +10411,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: v_writelane_b32 v4, s1, 1
; GFX6-NEXT: v_writelane_b32 v4, s2, 2
; GFX6-NEXT: v_writelane_b32 v4, s3, 3
-; GFX6-NEXT: s_mov_b32 s34, 0x82c00
+; GFX6-NEXT: s_mov_b32 s34, 0x86800
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], s34 ; 4-byte Folded Spill
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0
@@ -10423,7 +10423,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: v_writelane_b32 v4, s4, 0
; GFX6-NEXT: v_writelane_b32 v4, s5, 1
-; GFX6-NEXT: s_mov_b32 s2, 0x83000
+; GFX6-NEXT: s_mov_b32 s2, 0x86c00
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], s2 ; 4-byte Folded Spill
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0
@@ -10432,7 +10432,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: s_mov_b64 s[34:35], exec
; GFX6-NEXT: s_mov_b64 exec, 0xff
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
-; GFX6-NEXT: s_mov_b32 s36, 0x82400
+; GFX6-NEXT: s_mov_b32 s36, 0x86000
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], s36 ; 4-byte Folded Reload
; GFX6-NEXT: s_waitcnt vmcnt(0)
@@ -10450,7 +10450,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: s_mov_b64 s[34:35], exec
; GFX6-NEXT: s_mov_b64 exec, 15
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
-; GFX6-NEXT: s_mov_b32 s44, 0x82c00
+; GFX6-NEXT: s_mov_b32 s44, 0x86800
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], s44 ; 4-byte Folded Reload
; GFX6-NEXT: s_waitcnt vmcnt(0)
@@ -10464,7 +10464,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: s_mov_b64 s[44:45], exec
; GFX6-NEXT: s_mov_b64 exec, 3
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
-; GFX6-NEXT: v_mov_b32_e32 v7, 0x20c0
+; GFX6-NEXT: v_mov_b32_e32 v7, 0x21b0
; GFX6-NEXT: s_waitcnt expcnt(0)
; GFX6-NEXT: buffer_load_dword v4, v7, s[40:43], 0 offen ; 4-byte Folded Reload
; GFX6-NEXT: s_waitcnt vmcnt(0)
@@ -10521,13 +10521,13 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: s_mov_b64 exec, s[4:5]
-; GFX6-NEXT: s_mov_b32 s0, 0x86a00
+; GFX6-NEXT: s_mov_b32 s0, 0x84400
; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s0 ; 4-byte Folded Reload
; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload
; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s0 offset:8 ; 4-byte Folded Reload
; GFX6-NEXT: buffer_load_dword v10, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload
; GFX6-NEXT: s_mov_b64 s[38:39], s[2:3]
-; GFX6-NEXT: s_mov_b32 s0, 0x86600
+; GFX6-NEXT: s_mov_b32 s0, 0x84000
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: buffer_store_dwordx4 v[7:10], v[5:6], s[36:39], 0 addr64 offset:240
; GFX6-NEXT: s_waitcnt expcnt(0)
@@ -10535,7 +10535,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload
; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s0 offset:8 ; 4-byte Folded Reload
; GFX6-NEXT: buffer_load_dword v10, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload
-; GFX6-NEXT: s_mov_b32 s0, 0x86200
+; GFX6-NEXT: s_mov_b32 s0, 0x83c00
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: buffer_store_dwordx4 v[7:10], v[5:6], s[36:39], 0 addr64 offset:224
; GFX6-NEXT: s_waitcnt expcnt(0)
@@ -10543,7 +10543,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload
; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s0 offset:8 ; 4-byte Folded Reload
; GFX6-NEXT: buffer_load_dword v10, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload
-; GFX6-NEXT: s_mov_b32 s0, 0x85e00
+; GFX6-NEXT: s_mov_b32 s0, 0x83800
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: buffer_store_dwordx4 v[7:10], v[5:6], s[36:39], 0 addr64 offset:208
; GFX6-NEXT: s_waitcnt expcnt(0)
@@ -10551,7 +10551,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload
; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s0 offset:8 ; 4-byte Folded Reload
; GFX6-NEXT: buffer_load_dword v10, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload
-; GFX6-NEXT: s_mov_b32 s0, 0x85a00
+; GFX6-NEXT: s_mov_b32 s0, 0x83400
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: buffer_store_dwordx4 v[7:10], v[5:6], s[36:39], 0 addr64 offset:192
; GFX6-NEXT: s_waitcnt expcnt(0)
@@ -10559,7 +10559,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload
; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s0 offset:8 ; 4-byte Folded Reload
; GFX6-NEXT: buffer_load_dword v10, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload
-; GFX6-NEXT: s_mov_b32 s0, 0x85600
+; GFX6-NEXT: s_mov_b32 s0, 0x83000
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: buffer_store_dwordx4 v[7:10], v[5:6], s[36:39], 0 addr64 offset:176
; GFX6-NEXT: s_waitcnt expcnt(0)
@@ -10567,7 +10567,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload
; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s0 offset:8 ; 4-byte Folded Reload
; GFX6-NEXT: buffer_load_dword v10, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload
-; GFX6-NEXT: s_mov_b32 s0, 0x85200
+; GFX6-NEXT: s_mov_b32 s0, 0x82c00
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: buffer_store_dwordx4 v[7:10], v[5:6], s[36:39], 0 addr64 offset:160
; GFX6-NEXT: s_waitcnt expcnt(0)
@@ -10575,7 +10575,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload
; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s0 offset:8 ; 4-byte Folded Reload
; GFX6-NEXT: buffer_load_dword v10, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload
-; GFX6-NEXT: s_mov_b32 s0, 0x84e00
+; GFX6-NEXT: s_mov_b32 s0, 0x82800
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: buffer_store_dwordx4 v[7:10], v[5:6], s[36:39], 0 addr64 offset:144
; GFX6-NEXT: s_waitcnt expcnt(0)
@@ -10583,7 +10583,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload
; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s0 offset:8 ; 4-byte Folded Reload
; GFX6-NEXT: buffer_load_dword v10, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload
-; GFX6-NEXT: s_mov_b32 s0, 0x84a00
+; GFX6-NEXT: s_mov_b32 s0, 0x82400
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: buffer_store_dwordx4 v[7:10], v[5:6], s[36:39], 0 addr64 offset:128
; GFX6-NEXT: s_waitcnt expcnt(0)
@@ -10591,7 +10591,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload
; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s0 offset:8 ; 4-byte Folded Reload
; GFX6-NEXT: buffer_load_dword v10, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload
-; GFX6-NEXT: s_mov_b32 s0, 0x84600
+; GFX6-NEXT: s_mov_b32 s0, 0x82000
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: buffer_store_dwordx4 v[7:10], v[5:6], s[36:39], 0 addr64 offset:112
; GFX6-NEXT: s_waitcnt expcnt(0)
@@ -10599,7 +10599,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload
; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s0 offset:8 ; 4-byte Folded Reload
; GFX6-NEXT: buffer_load_dword v10, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload
-; GFX6-NEXT: s_mov_b32 s0, 0x84200
+; GFX6-NEXT: s_mov_b32 s0, 0x81c00
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: buffer_store_dwordx4 v[7:10], v[5:6], s[36:39], 0 addr64 offset:96
; GFX6-NEXT: s_waitcnt expcnt(0)
@@ -10607,7 +10607,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload
; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s0 offset:8 ; 4-byte Folded Reload
; GFX6-NEXT: buffer_load_dword v10, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload
-; GFX6-NEXT: s_mov_b32 s0, 0x83a00
+; GFX6-NEXT: s_mov_b32 s0, 0x81400
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: buffer_store_dwordx4 v[7:10], v[5:6], s[36:39], 0 addr64 offset:80
; GFX6-NEXT: s_waitcnt expcnt(0)
@@ -10615,7 +10615,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload
; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s0 offset:8 ; 4-byte Folded Reload
; GFX6-NEXT: buffer_load_dword v10, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload
-; GFX6-NEXT: s_mov_b32 s0, 0x83e00
+; GFX6-NEXT: s_mov_b32 s0, 0x81800
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: buffer_store_dwordx4 v[7:10], v[5:6], s[36:39], 0 addr64 offset:64
; GFX6-NEXT: s_waitcnt expcnt(0)
@@ -10623,7 +10623,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload
; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s0 offset:8 ; 4-byte Folded Reload
; GFX6-NEXT: buffer_load_dword v10, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload
-; GFX6-NEXT: s_mov_b32 s0, 0x83600
+; GFX6-NEXT: s_mov_b32 s0, 0x81000
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: buffer_store_dwordx4 v[7:10], v[5:6], s[36:39], 0 addr64 offset:48
; GFX6-NEXT: s_waitcnt expcnt(0)
@@ -10631,7 +10631,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s0 offset:4 ; 4-byte Folded Reload
; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s0 offset:8 ; 4-byte Folded Reload
; GFX6-NEXT: buffer_load_dword v10, off, s[40:43], s0 offset:12 ; 4-byte Folded Reload
-; GFX6-NEXT: s_mov_b32 s0, 0x83200
+; GFX6-NEXT: s_mov_b32 s0, 0x80c00
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: buffer_store_dwordx4 v[7:10], v[5:6], s[36:39], 0 addr64 offset:32
; GFX6-NEXT: s_waitcnt expcnt(0)
More information about the llvm-commits
mailing list