[llvm] [AMDGPU] Track physical VGPRs used for SGPR spills (PR #75573)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 15 00:59:19 PST 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Carl Ritson (perlfu)
<details>
<summary>Changes</summary>
Physical VGPRs used for SGPR spills need to be tracked independent of WWM reserved registers. The WWM reserved set contains extra registers allocated during WWM pre-allocation pass.
This causes SGPR spills allocated after WWM pre-allocation to overlap with WWM register usage, e.g. if frame pointer is spilt during prologue/epilog insertion.
---
Full diff: https://github.com/llvm/llvm-project/pull/75573.diff
3 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp (+2-1)
- (modified) llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h (+1)
- (added) llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir (+228)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 48c341917ddec7..e8142244b7db69 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -349,8 +349,9 @@ bool SIMachineFunctionInfo::allocatePhysicalVGPRForSGPRSpills(
MBB.addLiveIn(LaneVGPR);
MBB.sortUniqueLiveIns();
}
+ SpillPhysVGPRs.push_back(LaneVGPR);
} else {
- LaneVGPR = WWMReservedRegs.back();
+ LaneVGPR = SpillPhysVGPRs.back();
}
SGPRSpillsToPhysicalVGPRLanes[FI].push_back(
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 7ff50c80081d30..dc63ae44c528db 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -502,6 +502,7 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
unsigned NumVirtualVGPRSpillLanes = 0;
unsigned NumPhysicalVGPRSpillLanes = 0;
SmallVector<Register, 2> SpillVGPRs;
+ SmallVector<Register, 2> SpillPhysVGPRs;
using WWMSpillsMap = MapVector<Register, int>;
// To track the registers used in instructions that can potentially modify the
// inactive lanes. The WWM instructions and the writelane instructions for
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir
new file mode 100644
index 00000000000000..1473e667f894cd
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir
@@ -0,0 +1,228 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc -mtriple=amdgcn-unknown-amdpal -mcpu=gfx1030 -start-before=si-lower-sgpr-spills -stop-after=prologepilog -verify-machineinstrs --stress-regalloc=2 -o - %s | FileCheck -check-prefix GCN %s
+
+--- |
+ define amdgpu_gfx [13 x i32] @test_main() #0 {
+ ret [13 x i32] poison
+ }
+
+ attributes #0 = { alwaysinline nounwind memory(readwrite) "amdgpu-flat-work-group-size"="32,32" "amdgpu-memory-bound"="false" "amdgpu-unroll-threshold"="700" "amdgpu-wave-limiter"="false" "denormal-fp-math-f32"="preserve-sign" "target-cpu"="gfx1030" "target-features"=",+wavefrontsize32,+cumode,+enable-flat-scratch" "uniform-work-group-size"="false" }
+...
+---
+
+name: test_main
+tracksRegLiveness: true
+frameInfo:
+ hasCalls: true
+stack:
+ - { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4,
+ stack-id: sgpr-spill, callee-saved-register: '', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+machineFunctionInfo:
+ hasSpilledSGPRs: true
+ hasSpilledVGPRs: false
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ sgprForEXECCopy: '$sgpr105'
+body: |
+ ; GCN-LABEL: name: test_main
+ ; GCN: bb.0:
+ ; GCN-NEXT: successors: %bb.1(0x80000000)
+ ; GCN-NEXT: liveins: $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $sgpr102, $sgpr103, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr5
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: $sgpr0 = COPY $sgpr33
+ ; GCN-NEXT: $sgpr33 = frame-setup COPY $sgpr32
+ ; GCN-NEXT: $sgpr1 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+ ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr1, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.69, addrspace 5)
+ ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr2, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.70, addrspace 5)
+ ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr3, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.71, addrspace 5)
+ ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr5, $sgpr33, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.72, addrspace 5)
+ ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.74, addrspace 5)
+ ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
+ ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr0, 4, undef $vgpr3
+ ; GCN-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24, implicit-def dead $scc
+ ; GCN-NEXT: renamable $vgpr5 = IMPLICIT_DEF
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr5, 1, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr6, 2, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr7, 3, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr8, 4, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr9, 5, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr10, 6, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr11, 7, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr12, 8, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr13, 9, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr14, 10, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr15, 11, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr16, 12, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr17, 13, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr18, 14, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr19, 15, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr20, 16, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr21, 17, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr22, 18, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr23, 19, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr24, 20, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr25, 21, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr26, 22, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr27, 23, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr28, 24, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr29, 25, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr30, 26, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr31, 27, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr64, 28, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr65, 29, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr66, 30, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr67, 31, $vgpr1
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr68, 0, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr69, 1, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr70, 2, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr71, 3, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr72, 4, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr73, 5, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr74, 6, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr75, 7, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr76, 8, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr77, 9, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr78, 10, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr79, 11, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr80, 12, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr81, 13, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr82, 14, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr83, 15, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr84, 16, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr85, 17, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr86, 18, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr87, 19, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr88, 20, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr89, 21, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr90, 22, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr91, 23, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr92, 24, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr93, 25, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr94, 26, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr95, 27, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr96, 28, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr97, 29, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr98, 30, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr99, 31, $vgpr2
+ ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr100, 0, $vgpr3
+ ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr101, 1, $vgpr3
+ ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr102, 2, $vgpr3
+ ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr103, 3, $vgpr3
+ ; GCN-NEXT: $sgpr22 = IMPLICIT_DEF
+ ; GCN-NEXT: renamable $vgpr5 = SI_SPILL_S32_TO_VGPR $sgpr22, 0, killed $vgpr5
+ ; GCN-NEXT: dead $vgpr4 = V_SET_INACTIVE_B32 $vgpr0, 0, implicit $exec, implicit-def $scc
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.1:
+ ; GCN-NEXT: successors: %bb.2(0x80000000)
+ ; GCN-NEXT: liveins: $vgpr1, $vgpr2, $vgpr3, $vgpr5
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: KILL implicit-def $vcc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99_sgpr100_sgpr101_sgpr102_sgpr103
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.2:
+ ; GCN-NEXT: successors: %bb.3(0x80000000)
+ ; GCN-NEXT: liveins: $vgpr1, $vgpr2, $vgpr3, $vgpr5
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: $sgpr22 = SI_RESTORE_S32_FROM_VGPR $vgpr5, 0
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.3:
+ ; GCN-NEXT: liveins: $vgpr1, $vgpr2, $vgpr3, $vgpr5
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: $sgpr103 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 3
+ ; GCN-NEXT: $sgpr102 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 2
+ ; GCN-NEXT: $sgpr101 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 1
+ ; GCN-NEXT: $sgpr100 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 0
+ ; GCN-NEXT: $sgpr99 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 31
+ ; GCN-NEXT: $sgpr98 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 30
+ ; GCN-NEXT: $sgpr97 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 29
+ ; GCN-NEXT: $sgpr96 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 28
+ ; GCN-NEXT: $sgpr95 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 27
+ ; GCN-NEXT: $sgpr94 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 26
+ ; GCN-NEXT: $sgpr93 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 25
+ ; GCN-NEXT: $sgpr92 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 24
+ ; GCN-NEXT: $sgpr91 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 23
+ ; GCN-NEXT: $sgpr90 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 22
+ ; GCN-NEXT: $sgpr89 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 21
+ ; GCN-NEXT: $sgpr88 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 20
+ ; GCN-NEXT: $sgpr87 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 19
+ ; GCN-NEXT: $sgpr86 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 18
+ ; GCN-NEXT: $sgpr85 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 17
+ ; GCN-NEXT: $sgpr84 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 16
+ ; GCN-NEXT: $sgpr83 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 15
+ ; GCN-NEXT: $sgpr82 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 14
+ ; GCN-NEXT: $sgpr81 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 13
+ ; GCN-NEXT: $sgpr80 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 12
+ ; GCN-NEXT: $sgpr79 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 11
+ ; GCN-NEXT: $sgpr78 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 10
+ ; GCN-NEXT: $sgpr77 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 9
+ ; GCN-NEXT: $sgpr76 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 8
+ ; GCN-NEXT: $sgpr75 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 7
+ ; GCN-NEXT: $sgpr74 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 6
+ ; GCN-NEXT: $sgpr73 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 5
+ ; GCN-NEXT: $sgpr72 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 4
+ ; GCN-NEXT: $sgpr71 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 3
+ ; GCN-NEXT: $sgpr70 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 2
+ ; GCN-NEXT: $sgpr69 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 1
+ ; GCN-NEXT: $sgpr68 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 0
+ ; GCN-NEXT: $sgpr67 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 31
+ ; GCN-NEXT: $sgpr66 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 30
+ ; GCN-NEXT: $sgpr65 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 29
+ ; GCN-NEXT: $sgpr64 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 28
+ ; GCN-NEXT: $sgpr31 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 27
+ ; GCN-NEXT: $sgpr30 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 26
+ ; GCN-NEXT: $sgpr29 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 25
+ ; GCN-NEXT: $sgpr28 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 24
+ ; GCN-NEXT: $sgpr27 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 23
+ ; GCN-NEXT: $sgpr26 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 22
+ ; GCN-NEXT: $sgpr25 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 21
+ ; GCN-NEXT: $sgpr24 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 20
+ ; GCN-NEXT: $sgpr23 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 19
+ ; GCN-NEXT: $sgpr22 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 18
+ ; GCN-NEXT: $sgpr21 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 17
+ ; GCN-NEXT: $sgpr20 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 16
+ ; GCN-NEXT: $sgpr19 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 15
+ ; GCN-NEXT: $sgpr18 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 14
+ ; GCN-NEXT: $sgpr17 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 13
+ ; GCN-NEXT: $sgpr16 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 12
+ ; GCN-NEXT: $sgpr15 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 11
+ ; GCN-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 10
+ ; GCN-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 9
+ ; GCN-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 8
+ ; GCN-NEXT: $sgpr11 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 7
+ ; GCN-NEXT: $sgpr10 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 6
+ ; GCN-NEXT: $sgpr9 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 5
+ ; GCN-NEXT: $sgpr8 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 4
+ ; GCN-NEXT: $sgpr7 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 3
+ ; GCN-NEXT: $sgpr6 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 2
+ ; GCN-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 1
+ ; GCN-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 0
+ ; GCN-NEXT: KILL killed renamable $vgpr5
+ ; GCN-NEXT: $sgpr0 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 4
+ ; GCN-NEXT: $sgpr1 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+ ; GCN-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.69, addrspace 5)
+ ; GCN-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.70, addrspace 5)
+ ; GCN-NEXT: $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.71, addrspace 5)
+ ; GCN-NEXT: $vgpr5 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.72, addrspace 5)
+ ; GCN-NEXT: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.74, addrspace 5)
+ ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
+ ; GCN-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -24, implicit-def dead $scc
+ ; GCN-NEXT: $sgpr33 = COPY $sgpr0
+ ; GCN-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0
+
+ $sgpr22 = IMPLICIT_DEF
+ SI_SPILL_S32_SAVE $sgpr22, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.0, addrspace 5)
+ %0:vgpr_32 = V_SET_INACTIVE_B32 $vgpr0, 0, implicit $exec, implicit-def $scc
+
+ bb.1:
+ KILL implicit-def $vcc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99_sgpr100_sgpr101_sgpr102_sgpr103
+
+ bb.2:
+ renamable $sgpr22 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.0, addrspace 5)
+
+ bb.3:
+ S_ENDPGM 0
+
+...
``````````
</details>
https://github.com/llvm/llvm-project/pull/75573
More information about the llvm-commits
mailing list