[llvm] [AMDGPU] Track physical VGPRs used for SGPR spills (PR #75573)
Carl Ritson via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 15 00:58:48 PST 2023
https://github.com/perlfu created https://github.com/llvm/llvm-project/pull/75573
Physical VGPRs used for SGPR spills need to be tracked independent of WWM reserved registers. The WWM reserved set contains extra registers allocated during WWM pre-allocation pass.
This causes SGPR spills allocated after WWM pre-allocation to overlap with WWM register usage, e.g. if frame pointer is spilt during prologue/epilog insertion.
>From bf0b3cd0f87e8c927547792f5fc169fe8747e3db Mon Sep 17 00:00:00 2001
From: Carl Ritson <carl.ritson at amd.com>
Date: Fri, 15 Dec 2023 17:52:16 +0900
Subject: [PATCH] [AMDGPU] Track physical VGPRs used for SGPR spills
Physical VGPRs used for SGPR spills need to be tracked independent
of WWM reserved registers. The WWM reserved set contains extra
registers allocated during WWM pre-allocation pass.
This causes SGPR spills allocated after WWM pre-allocation to
overlap with WWM register usage, e.g. if frame pointer is
spilt during prologue/epilog insertion.
---
.../Target/AMDGPU/SIMachineFunctionInfo.cpp | 3 +-
.../lib/Target/AMDGPU/SIMachineFunctionInfo.h | 1 +
.../AMDGPU/sgpr-spill-overlap-wwm-reserve.mir | 228 ++++++++++++++++++
3 files changed, 231 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 48c341917ddec7..e8142244b7db69 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -349,8 +349,9 @@ bool SIMachineFunctionInfo::allocatePhysicalVGPRForSGPRSpills(
MBB.addLiveIn(LaneVGPR);
MBB.sortUniqueLiveIns();
}
+ SpillPhysVGPRs.push_back(LaneVGPR);
} else {
- LaneVGPR = WWMReservedRegs.back();
+ LaneVGPR = SpillPhysVGPRs.back();
}
SGPRSpillsToPhysicalVGPRLanes[FI].push_back(
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 7ff50c80081d30..dc63ae44c528db 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -502,6 +502,7 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
unsigned NumVirtualVGPRSpillLanes = 0;
unsigned NumPhysicalVGPRSpillLanes = 0;
SmallVector<Register, 2> SpillVGPRs;
+ SmallVector<Register, 2> SpillPhysVGPRs;
using WWMSpillsMap = MapVector<Register, int>;
// To track the registers used in instructions that can potentially modify the
// inactive lanes. The WWM instructions and the writelane instructions for
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir
new file mode 100644
index 00000000000000..1473e667f894cd
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir
@@ -0,0 +1,228 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc -mtriple=amdgcn-unknown-amdpal -mcpu=gfx1030 -start-before=si-lower-sgpr-spills -stop-after=prologepilog -verify-machineinstrs --stress-regalloc=2 -o - %s | FileCheck -check-prefix GCN %s
+
+--- |
+ define amdgpu_gfx [13 x i32] @test_main() #0 {
+ ret [13 x i32] poison
+ }
+
+ attributes #0 = { alwaysinline nounwind memory(readwrite) "amdgpu-flat-work-group-size"="32,32" "amdgpu-memory-bound"="false" "amdgpu-unroll-threshold"="700" "amdgpu-wave-limiter"="false" "denormal-fp-math-f32"="preserve-sign" "target-cpu"="gfx1030" "target-features"=",+wavefrontsize32,+cumode,+enable-flat-scratch" "uniform-work-group-size"="false" }
+...
+---
+
+name: test_main
+tracksRegLiveness: true
+frameInfo:
+ hasCalls: true
+stack:
+ - { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4,
+ stack-id: sgpr-spill, callee-saved-register: '', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+machineFunctionInfo:
+ hasSpilledSGPRs: true
+ hasSpilledVGPRs: false
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ sgprForEXECCopy: '$sgpr105'
+body: |
+ ; GCN-LABEL: name: test_main
+ ; GCN: bb.0:
+ ; GCN-NEXT: successors: %bb.1(0x80000000)
+ ; GCN-NEXT: liveins: $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $sgpr102, $sgpr103, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr5
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: $sgpr0 = COPY $sgpr33
+ ; GCN-NEXT: $sgpr33 = frame-setup COPY $sgpr32
+ ; GCN-NEXT: $sgpr1 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+ ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr1, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.69, addrspace 5)
+ ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr2, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.70, addrspace 5)
+ ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr3, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.71, addrspace 5)
+ ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr5, $sgpr33, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.72, addrspace 5)
+ ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.74, addrspace 5)
+ ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
+ ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr0, 4, undef $vgpr3
+ ; GCN-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24, implicit-def dead $scc
+ ; GCN-NEXT: renamable $vgpr5 = IMPLICIT_DEF
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr5, 1, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr6, 2, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr7, 3, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr8, 4, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr9, 5, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr10, 6, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr11, 7, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr12, 8, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr13, 9, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr14, 10, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr15, 11, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr16, 12, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr17, 13, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr18, 14, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr19, 15, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr20, 16, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr21, 17, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr22, 18, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr23, 19, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr24, 20, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr25, 21, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr26, 22, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr27, 23, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr28, 24, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr29, 25, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr30, 26, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr31, 27, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr64, 28, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr65, 29, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr66, 30, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr67, 31, $vgpr1
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr68, 0, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr69, 1, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr70, 2, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr71, 3, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr72, 4, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr73, 5, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr74, 6, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr75, 7, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr76, 8, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr77, 9, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr78, 10, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr79, 11, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr80, 12, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr81, 13, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr82, 14, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr83, 15, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr84, 16, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr85, 17, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr86, 18, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr87, 19, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr88, 20, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr89, 21, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr90, 22, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr91, 23, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr92, 24, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr93, 25, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr94, 26, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr95, 27, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr96, 28, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr97, 29, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr98, 30, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr99, 31, $vgpr2
+ ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr100, 0, $vgpr3
+ ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr101, 1, $vgpr3
+ ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr102, 2, $vgpr3
+ ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr103, 3, $vgpr3
+ ; GCN-NEXT: $sgpr22 = IMPLICIT_DEF
+ ; GCN-NEXT: renamable $vgpr5 = SI_SPILL_S32_TO_VGPR $sgpr22, 0, killed $vgpr5
+ ; GCN-NEXT: dead $vgpr4 = V_SET_INACTIVE_B32 $vgpr0, 0, implicit $exec, implicit-def $scc
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.1:
+ ; GCN-NEXT: successors: %bb.2(0x80000000)
+ ; GCN-NEXT: liveins: $vgpr1, $vgpr2, $vgpr3, $vgpr5
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: KILL implicit-def $vcc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99_sgpr100_sgpr101_sgpr102_sgpr103
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.2:
+ ; GCN-NEXT: successors: %bb.3(0x80000000)
+ ; GCN-NEXT: liveins: $vgpr1, $vgpr2, $vgpr3, $vgpr5
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: $sgpr22 = SI_RESTORE_S32_FROM_VGPR $vgpr5, 0
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.3:
+ ; GCN-NEXT: liveins: $vgpr1, $vgpr2, $vgpr3, $vgpr5
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: $sgpr103 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 3
+ ; GCN-NEXT: $sgpr102 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 2
+ ; GCN-NEXT: $sgpr101 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 1
+ ; GCN-NEXT: $sgpr100 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 0
+ ; GCN-NEXT: $sgpr99 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 31
+ ; GCN-NEXT: $sgpr98 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 30
+ ; GCN-NEXT: $sgpr97 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 29
+ ; GCN-NEXT: $sgpr96 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 28
+ ; GCN-NEXT: $sgpr95 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 27
+ ; GCN-NEXT: $sgpr94 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 26
+ ; GCN-NEXT: $sgpr93 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 25
+ ; GCN-NEXT: $sgpr92 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 24
+ ; GCN-NEXT: $sgpr91 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 23
+ ; GCN-NEXT: $sgpr90 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 22
+ ; GCN-NEXT: $sgpr89 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 21
+ ; GCN-NEXT: $sgpr88 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 20
+ ; GCN-NEXT: $sgpr87 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 19
+ ; GCN-NEXT: $sgpr86 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 18
+ ; GCN-NEXT: $sgpr85 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 17
+ ; GCN-NEXT: $sgpr84 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 16
+ ; GCN-NEXT: $sgpr83 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 15
+ ; GCN-NEXT: $sgpr82 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 14
+ ; GCN-NEXT: $sgpr81 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 13
+ ; GCN-NEXT: $sgpr80 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 12
+ ; GCN-NEXT: $sgpr79 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 11
+ ; GCN-NEXT: $sgpr78 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 10
+ ; GCN-NEXT: $sgpr77 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 9
+ ; GCN-NEXT: $sgpr76 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 8
+ ; GCN-NEXT: $sgpr75 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 7
+ ; GCN-NEXT: $sgpr74 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 6
+ ; GCN-NEXT: $sgpr73 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 5
+ ; GCN-NEXT: $sgpr72 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 4
+ ; GCN-NEXT: $sgpr71 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 3
+ ; GCN-NEXT: $sgpr70 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 2
+ ; GCN-NEXT: $sgpr69 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 1
+ ; GCN-NEXT: $sgpr68 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 0
+ ; GCN-NEXT: $sgpr67 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 31
+ ; GCN-NEXT: $sgpr66 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 30
+ ; GCN-NEXT: $sgpr65 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 29
+ ; GCN-NEXT: $sgpr64 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 28
+ ; GCN-NEXT: $sgpr31 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 27
+ ; GCN-NEXT: $sgpr30 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 26
+ ; GCN-NEXT: $sgpr29 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 25
+ ; GCN-NEXT: $sgpr28 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 24
+ ; GCN-NEXT: $sgpr27 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 23
+ ; GCN-NEXT: $sgpr26 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 22
+ ; GCN-NEXT: $sgpr25 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 21
+ ; GCN-NEXT: $sgpr24 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 20
+ ; GCN-NEXT: $sgpr23 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 19
+ ; GCN-NEXT: $sgpr22 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 18
+ ; GCN-NEXT: $sgpr21 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 17
+ ; GCN-NEXT: $sgpr20 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 16
+ ; GCN-NEXT: $sgpr19 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 15
+ ; GCN-NEXT: $sgpr18 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 14
+ ; GCN-NEXT: $sgpr17 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 13
+ ; GCN-NEXT: $sgpr16 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 12
+ ; GCN-NEXT: $sgpr15 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 11
+ ; GCN-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 10
+ ; GCN-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 9
+ ; GCN-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 8
+ ; GCN-NEXT: $sgpr11 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 7
+ ; GCN-NEXT: $sgpr10 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 6
+ ; GCN-NEXT: $sgpr9 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 5
+ ; GCN-NEXT: $sgpr8 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 4
+ ; GCN-NEXT: $sgpr7 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 3
+ ; GCN-NEXT: $sgpr6 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 2
+ ; GCN-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 1
+ ; GCN-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 0
+ ; GCN-NEXT: KILL killed renamable $vgpr5
+ ; GCN-NEXT: $sgpr0 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 4
+ ; GCN-NEXT: $sgpr1 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+ ; GCN-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.69, addrspace 5)
+ ; GCN-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.70, addrspace 5)
+ ; GCN-NEXT: $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.71, addrspace 5)
+ ; GCN-NEXT: $vgpr5 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.72, addrspace 5)
+ ; GCN-NEXT: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.74, addrspace 5)
+ ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
+ ; GCN-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -24, implicit-def dead $scc
+ ; GCN-NEXT: $sgpr33 = COPY $sgpr0
+ ; GCN-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0
+
+ $sgpr22 = IMPLICIT_DEF
+ SI_SPILL_S32_SAVE $sgpr22, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.0, addrspace 5)
+ %0:vgpr_32 = V_SET_INACTIVE_B32 $vgpr0, 0, implicit $exec, implicit-def $scc
+
+ bb.1:
+ KILL implicit-def $vcc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99_sgpr100_sgpr101_sgpr102_sgpr103
+
+ bb.2:
+ renamable $sgpr22 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.0, addrspace 5)
+
+ bb.3:
+ S_ENDPGM 0
+
+...
More information about the llvm-commits
mailing list