[llvm] [AMDGPU] Track physical VGPRs used for SGPR spills (PR #75573)

Carl Ritson via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 15 00:58:48 PST 2023


https://github.com/perlfu created https://github.com/llvm/llvm-project/pull/75573

Physical VGPRs used for SGPR spills need to be tracked independent of WWM reserved registers.  The WWM reserved set contains extra registers allocated during WWM pre-allocation pass.

This causes SGPR spills allocated after WWM pre-allocation to overlap with WWM register usage, e.g. if frame pointer is spilt during prologue/epilog insertion.

>From bf0b3cd0f87e8c927547792f5fc169fe8747e3db Mon Sep 17 00:00:00 2001
From: Carl Ritson <carl.ritson at amd.com>
Date: Fri, 15 Dec 2023 17:52:16 +0900
Subject: [PATCH] [AMDGPU] Track physical VGPRs used for SGPR spills

Physical VGPRs used for SGPR spills need to be tracked independent
of WWM reserved registers.  The WWM reserved set contains extra
registers allocated during WWM pre-allocation pass.

This causes SGPR spills allocated after WWM pre-allocation to
overlap with WWM register usage, e.g. if frame pointer is
spilt during prologue/epilog insertion.
---
 .../Target/AMDGPU/SIMachineFunctionInfo.cpp   |   3 +-
 .../lib/Target/AMDGPU/SIMachineFunctionInfo.h |   1 +
 .../AMDGPU/sgpr-spill-overlap-wwm-reserve.mir | 228 ++++++++++++++++++
 3 files changed, 231 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir

diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 48c341917ddec7..e8142244b7db69 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -349,8 +349,9 @@ bool SIMachineFunctionInfo::allocatePhysicalVGPRForSGPRSpills(
       MBB.addLiveIn(LaneVGPR);
       MBB.sortUniqueLiveIns();
     }
+    SpillPhysVGPRs.push_back(LaneVGPR);
   } else {
-    LaneVGPR = WWMReservedRegs.back();
+    LaneVGPR = SpillPhysVGPRs.back();
   }
 
   SGPRSpillsToPhysicalVGPRLanes[FI].push_back(
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 7ff50c80081d30..dc63ae44c528db 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -502,6 +502,7 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
   unsigned NumVirtualVGPRSpillLanes = 0;
   unsigned NumPhysicalVGPRSpillLanes = 0;
   SmallVector<Register, 2> SpillVGPRs;
+  SmallVector<Register, 2> SpillPhysVGPRs;
   using WWMSpillsMap = MapVector<Register, int>;
   // To track the registers used in instructions that can potentially modify the
   // inactive lanes. The WWM instructions and the writelane instructions for
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir
new file mode 100644
index 00000000000000..1473e667f894cd
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir
@@ -0,0 +1,228 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc -mtriple=amdgcn-unknown-amdpal -mcpu=gfx1030 -start-before=si-lower-sgpr-spills -stop-after=prologepilog -verify-machineinstrs --stress-regalloc=2 -o - %s | FileCheck -check-prefix GCN %s
+
+--- |
+  define amdgpu_gfx [13 x i32] @test_main() #0 {
+    ret [13 x i32] poison
+  }
+
+  attributes #0 = { alwaysinline nounwind memory(readwrite) "amdgpu-flat-work-group-size"="32,32" "amdgpu-memory-bound"="false" "amdgpu-unroll-threshold"="700" "amdgpu-wave-limiter"="false" "denormal-fp-math-f32"="preserve-sign" "target-cpu"="gfx1030" "target-features"=",+wavefrontsize32,+cumode,+enable-flat-scratch" "uniform-work-group-size"="false" }
+...
+---
+
+name:            test_main
+tracksRegLiveness: true
+frameInfo:
+  hasCalls:        true
+stack:
+  - { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4,
+      stack-id: sgpr-spill, callee-saved-register: '', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+machineFunctionInfo:
+  hasSpilledSGPRs: true
+  hasSpilledVGPRs: false
+  frameOffsetReg:  '$sgpr33'
+  stackPtrOffsetReg: '$sgpr32'
+  sgprForEXECCopy: '$sgpr105'
+body:             |
+  ; GCN-LABEL: name: test_main
+  ; GCN: bb.0:
+  ; GCN-NEXT:   successors: %bb.1(0x80000000)
+  ; GCN-NEXT:   liveins: $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $sgpr102, $sgpr103, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr5
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   $sgpr0 = COPY $sgpr33
+  ; GCN-NEXT:   $sgpr33 = frame-setup COPY $sgpr32
+  ; GCN-NEXT:   $sgpr1 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+  ; GCN-NEXT:   SCRATCH_STORE_DWORD_SADDR $vgpr1, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.69, addrspace 5)
+  ; GCN-NEXT:   SCRATCH_STORE_DWORD_SADDR $vgpr2, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.70, addrspace 5)
+  ; GCN-NEXT:   SCRATCH_STORE_DWORD_SADDR $vgpr3, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.71, addrspace 5)
+  ; GCN-NEXT:   SCRATCH_STORE_DWORD_SADDR $vgpr5, $sgpr33, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.72, addrspace 5)
+  ; GCN-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.74, addrspace 5)
+  ; GCN-NEXT:   $exec_lo = S_MOV_B32 killed $sgpr1
+  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr0, 4, undef $vgpr3
+  ; GCN-NEXT:   $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24, implicit-def dead $scc
+  ; GCN-NEXT:   renamable $vgpr5 = IMPLICIT_DEF
+  ; GCN-NEXT:   $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, $vgpr1
+  ; GCN-NEXT:   $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr5, 1, $vgpr1
+  ; GCN-NEXT:   $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr6, 2, $vgpr1
+  ; GCN-NEXT:   $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr7, 3, $vgpr1
+  ; GCN-NEXT:   $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr8, 4, $vgpr1
+  ; GCN-NEXT:   $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr9, 5, $vgpr1
+  ; GCN-NEXT:   $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr10, 6, $vgpr1
+  ; GCN-NEXT:   $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr11, 7, $vgpr1
+  ; GCN-NEXT:   $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr12, 8, $vgpr1
+  ; GCN-NEXT:   $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr13, 9, $vgpr1
+  ; GCN-NEXT:   $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr14, 10, $vgpr1
+  ; GCN-NEXT:   $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr15, 11, $vgpr1
+  ; GCN-NEXT:   $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr16, 12, $vgpr1
+  ; GCN-NEXT:   $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr17, 13, $vgpr1
+  ; GCN-NEXT:   $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr18, 14, $vgpr1
+  ; GCN-NEXT:   $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr19, 15, $vgpr1
+  ; GCN-NEXT:   $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr20, 16, $vgpr1
+  ; GCN-NEXT:   $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr21, 17, $vgpr1
+  ; GCN-NEXT:   $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr22, 18, $vgpr1
+  ; GCN-NEXT:   $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr23, 19, $vgpr1
+  ; GCN-NEXT:   $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr24, 20, $vgpr1
+  ; GCN-NEXT:   $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr25, 21, $vgpr1
+  ; GCN-NEXT:   $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr26, 22, $vgpr1
+  ; GCN-NEXT:   $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr27, 23, $vgpr1
+  ; GCN-NEXT:   $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr28, 24, $vgpr1
+  ; GCN-NEXT:   $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr29, 25, $vgpr1
+  ; GCN-NEXT:   $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr30, 26, $vgpr1
+  ; GCN-NEXT:   $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr31, 27, $vgpr1
+  ; GCN-NEXT:   $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr64, 28, $vgpr1
+  ; GCN-NEXT:   $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr65, 29, $vgpr1
+  ; GCN-NEXT:   $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr66, 30, $vgpr1
+  ; GCN-NEXT:   $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr67, 31, $vgpr1
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr68, 0, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr69, 1, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr70, 2, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr71, 3, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr72, 4, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr73, 5, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr74, 6, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr75, 7, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr76, 8, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr77, 9, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr78, 10, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr79, 11, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr80, 12, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr81, 13, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr82, 14, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr83, 15, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr84, 16, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr85, 17, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr86, 18, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr87, 19, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr88, 20, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr89, 21, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr90, 22, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr91, 23, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr92, 24, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr93, 25, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr94, 26, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr95, 27, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr96, 28, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr97, 29, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr98, 30, $vgpr2
+  ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr99, 31, $vgpr2
+  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr100, 0, $vgpr3
+  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr101, 1, $vgpr3
+  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr102, 2, $vgpr3
+  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr103, 3, $vgpr3
+  ; GCN-NEXT:   $sgpr22 = IMPLICIT_DEF
+  ; GCN-NEXT:   renamable $vgpr5 = SI_SPILL_S32_TO_VGPR $sgpr22, 0, killed $vgpr5
+  ; GCN-NEXT:   dead $vgpr4 = V_SET_INACTIVE_B32 $vgpr0, 0, implicit $exec, implicit-def $scc
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.1:
+  ; GCN-NEXT:   successors: %bb.2(0x80000000)
+  ; GCN-NEXT:   liveins: $vgpr1, $vgpr2, $vgpr3, $vgpr5
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   KILL implicit-def $vcc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99_sgpr100_sgpr101_sgpr102_sgpr103
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.2:
+  ; GCN-NEXT:   successors: %bb.3(0x80000000)
+  ; GCN-NEXT:   liveins: $vgpr1, $vgpr2, $vgpr3, $vgpr5
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   $sgpr22 = SI_RESTORE_S32_FROM_VGPR $vgpr5, 0
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.3:
+  ; GCN-NEXT:   liveins: $vgpr1, $vgpr2, $vgpr3, $vgpr5
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   $sgpr103 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 3
+  ; GCN-NEXT:   $sgpr102 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 2
+  ; GCN-NEXT:   $sgpr101 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 1
+  ; GCN-NEXT:   $sgpr100 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 0
+  ; GCN-NEXT:   $sgpr99 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 31
+  ; GCN-NEXT:   $sgpr98 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 30
+  ; GCN-NEXT:   $sgpr97 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 29
+  ; GCN-NEXT:   $sgpr96 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 28
+  ; GCN-NEXT:   $sgpr95 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 27
+  ; GCN-NEXT:   $sgpr94 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 26
+  ; GCN-NEXT:   $sgpr93 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 25
+  ; GCN-NEXT:   $sgpr92 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 24
+  ; GCN-NEXT:   $sgpr91 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 23
+  ; GCN-NEXT:   $sgpr90 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 22
+  ; GCN-NEXT:   $sgpr89 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 21
+  ; GCN-NEXT:   $sgpr88 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 20
+  ; GCN-NEXT:   $sgpr87 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 19
+  ; GCN-NEXT:   $sgpr86 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 18
+  ; GCN-NEXT:   $sgpr85 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 17
+  ; GCN-NEXT:   $sgpr84 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 16
+  ; GCN-NEXT:   $sgpr83 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 15
+  ; GCN-NEXT:   $sgpr82 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 14
+  ; GCN-NEXT:   $sgpr81 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 13
+  ; GCN-NEXT:   $sgpr80 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 12
+  ; GCN-NEXT:   $sgpr79 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 11
+  ; GCN-NEXT:   $sgpr78 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 10
+  ; GCN-NEXT:   $sgpr77 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 9
+  ; GCN-NEXT:   $sgpr76 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 8
+  ; GCN-NEXT:   $sgpr75 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 7
+  ; GCN-NEXT:   $sgpr74 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 6
+  ; GCN-NEXT:   $sgpr73 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 5
+  ; GCN-NEXT:   $sgpr72 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 4
+  ; GCN-NEXT:   $sgpr71 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 3
+  ; GCN-NEXT:   $sgpr70 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 2
+  ; GCN-NEXT:   $sgpr69 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 1
+  ; GCN-NEXT:   $sgpr68 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 0
+  ; GCN-NEXT:   $sgpr67 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 31
+  ; GCN-NEXT:   $sgpr66 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 30
+  ; GCN-NEXT:   $sgpr65 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 29
+  ; GCN-NEXT:   $sgpr64 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 28
+  ; GCN-NEXT:   $sgpr31 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 27
+  ; GCN-NEXT:   $sgpr30 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 26
+  ; GCN-NEXT:   $sgpr29 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 25
+  ; GCN-NEXT:   $sgpr28 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 24
+  ; GCN-NEXT:   $sgpr27 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 23
+  ; GCN-NEXT:   $sgpr26 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 22
+  ; GCN-NEXT:   $sgpr25 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 21
+  ; GCN-NEXT:   $sgpr24 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 20
+  ; GCN-NEXT:   $sgpr23 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 19
+  ; GCN-NEXT:   $sgpr22 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 18
+  ; GCN-NEXT:   $sgpr21 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 17
+  ; GCN-NEXT:   $sgpr20 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 16
+  ; GCN-NEXT:   $sgpr19 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 15
+  ; GCN-NEXT:   $sgpr18 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 14
+  ; GCN-NEXT:   $sgpr17 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 13
+  ; GCN-NEXT:   $sgpr16 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 12
+  ; GCN-NEXT:   $sgpr15 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 11
+  ; GCN-NEXT:   $sgpr14 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 10
+  ; GCN-NEXT:   $sgpr13 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 9
+  ; GCN-NEXT:   $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 8
+  ; GCN-NEXT:   $sgpr11 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 7
+  ; GCN-NEXT:   $sgpr10 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 6
+  ; GCN-NEXT:   $sgpr9 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 5
+  ; GCN-NEXT:   $sgpr8 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 4
+  ; GCN-NEXT:   $sgpr7 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 3
+  ; GCN-NEXT:   $sgpr6 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 2
+  ; GCN-NEXT:   $sgpr5 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 1
+  ; GCN-NEXT:   $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 0
+  ; GCN-NEXT:   KILL killed renamable $vgpr5
+  ; GCN-NEXT:   $sgpr0 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 4
+  ; GCN-NEXT:   $sgpr1 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+  ; GCN-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.69, addrspace 5)
+  ; GCN-NEXT:   $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.70, addrspace 5)
+  ; GCN-NEXT:   $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.71, addrspace 5)
+  ; GCN-NEXT:   $vgpr5 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.72, addrspace 5)
+  ; GCN-NEXT:   $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.74, addrspace 5)
+  ; GCN-NEXT:   $exec_lo = S_MOV_B32 killed $sgpr1
+  ; GCN-NEXT:   $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -24, implicit-def dead $scc
+  ; GCN-NEXT:   $sgpr33 = COPY $sgpr0
+  ; GCN-NEXT:   S_ENDPGM 0
+  bb.0:
+    liveins: $vgpr0
+
+    $sgpr22 = IMPLICIT_DEF
+    SI_SPILL_S32_SAVE $sgpr22, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.0, addrspace 5)
+    %0:vgpr_32 = V_SET_INACTIVE_B32 $vgpr0, 0, implicit $exec, implicit-def $scc
+
+  bb.1:
+    KILL implicit-def $vcc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99_sgpr100_sgpr101_sgpr102_sgpr103
+
+  bb.2:
+    renamable $sgpr22 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.0, addrspace 5)
+
+  bb.3:
+    S_ENDPGM 0
+
+...



More information about the llvm-commits mailing list