[llvm] [AMDGPU] Track physical VGPRs used for SGPR spills (PR #75573)

Carl Ritson via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 15 01:41:20 PST 2023


https://github.com/perlfu updated https://github.com/llvm/llvm-project/pull/75573

>From cf516fe754d2956f9ea246bd5ac15523a0b60cb5 Mon Sep 17 00:00:00 2001
From: Carl Ritson <carl.ritson at amd.com>
Date: Fri, 15 Dec 2023 17:52:16 +0900
Subject: [PATCH] [AMDGPU] Track physical VGPRs used for SGPR spills

Physical VGPRs used for SGPR spills need to be tracked independent
of WWM reserved registers.  The WWM reserved set contains extra
registers allocated during WWM pre-allocation pass.

This causes SGPR spills allocated after WWM pre-allocation to
overlap with WWM register usage, e.g. if frame pointer is
spilt during prologue/epilog insertion.
---
 llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp            | 3 ++-
 llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h              | 1 +
 llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir | 4 ++--
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 48c341917ddec7..e8142244b7db69 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -349,8 +349,9 @@ bool SIMachineFunctionInfo::allocatePhysicalVGPRForSGPRSpills(
       MBB.addLiveIn(LaneVGPR);
       MBB.sortUniqueLiveIns();
     }
+    SpillPhysVGPRs.push_back(LaneVGPR);
   } else {
-    LaneVGPR = WWMReservedRegs.back();
+    LaneVGPR = SpillPhysVGPRs.back();
   }
 
   SGPRSpillsToPhysicalVGPRLanes[FI].push_back(
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 7ff50c80081d30..dc63ae44c528db 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -502,6 +502,7 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
   unsigned NumVirtualVGPRSpillLanes = 0;
   unsigned NumPhysicalVGPRSpillLanes = 0;
   SmallVector<Register, 2> SpillVGPRs;
+  SmallVector<Register, 2> SpillPhysVGPRs;
   using WWMSpillsMap = MapVector<Register, int>;
   // To track the registers used in instructions that can potentially modify the
   // inactive lanes. The WWM instructions and the writelane instructions for
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir
index 35e205561a4169..1473e667f894cd 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir
@@ -39,7 +39,7 @@ body:             |
   ; GCN-NEXT:   SCRATCH_STORE_DWORD_SADDR $vgpr5, $sgpr33, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.72, addrspace 5)
   ; GCN-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.74, addrspace 5)
   ; GCN-NEXT:   $exec_lo = S_MOV_B32 killed $sgpr1
-  ; GCN-NEXT:   $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr0, 4, undef $vgpr4
+  ; GCN-NEXT:   $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr0, 4, undef $vgpr3
   ; GCN-NEXT:   $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24, implicit-def dead $scc
   ; GCN-NEXT:   renamable $vgpr5 = IMPLICIT_DEF
   ; GCN-NEXT:   $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, $vgpr1
@@ -198,7 +198,7 @@ body:             |
   ; GCN-NEXT:   $sgpr5 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 1
   ; GCN-NEXT:   $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 0
   ; GCN-NEXT:   KILL killed renamable $vgpr5
-  ; GCN-NEXT:   $sgpr0 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 4
+  ; GCN-NEXT:   $sgpr0 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 4
   ; GCN-NEXT:   $sgpr1 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
   ; GCN-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.69, addrspace 5)
   ; GCN-NEXT:   $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.70, addrspace 5)



More information about the llvm-commits mailing list