[llvm] d6fdbbc - AMDGPU: Add second emergency slot for SGPR to vmem for large frames
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 2 16:05:10 PST 2022
Author: Matt Arsenault
Date: 2022-02-02T19:05:05-05:00
New Revision: d6fdbbcace0b51c0096c5dbab6afb6449da21524
URL: https://github.com/llvm/llvm-project/commit/d6fdbbcace0b51c0096c5dbab6afb6449da21524
DIFF: https://github.com/llvm/llvm-project/commit/d6fdbbcace0b51c0096c5dbab6afb6449da21524.diff
LOG: AMDGPU: Add second emergency slot for SGPR to vmem for large frames
In a future change, we will sometimes use a VGPR offset for doing
spills to memory, in which case we need 2 free VGPRs to do the SGPR
spill. In most cases we could spill the VGPR along with the SGPR being
spilled, but we don't have any free lanes for SGPR_1024 in wave32 so
we could still potentially need a second scavenging slot.
Added:
llvm/test/CodeGen/AMDGPU/sgpr-spill-vmem-large-frame.mir
Modified:
llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 6078f4a0577ab..0169b752e9983 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -1229,7 +1229,11 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
}
}
- FuncInfo->removeDeadFrameIndices(MFI);
+ // At this point we've already allocated all spilled SGPRs to VGPRs if we
+ // can. Any remaining SGPR spills will go to memory, so move them back to the
+ // default stack.
+ bool HaveSGPRToVMemSpill =
+ FuncInfo->removeDeadFrameIndices(MFI, /*ResetSGPRSpillStackIDs*/ true);
assert(allSGPRSpillsAreDead(MF) &&
"SGPR spill should have been removed in SILowerSGPRSpills");
@@ -1241,6 +1245,13 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
// Add an emergency spill slot
RS->addScavengingFrameIndex(FuncInfo->getScavengeFI(MFI, *TRI));
+
+ // If we are spilling SGPRs to memory with a large frame, we may need a
+ // second VGPR emergency frame index.
+ if (HaveSGPRToVMemSpill &&
+ allocateScavengingFrameIndexesNearIncomingSP(MF)) {
+ RS->addScavengingFrameIndex(MFI.CreateStackObject(4, Align(4), false));
+ }
}
}
diff --git a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
index 0fbdbef6fcce6..ddc19440c2558 100644
--- a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
@@ -321,7 +321,7 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
// free frame index ids by the later pass(es) like "stack slot coloring"
// which in turn could mess-up with the book keeping of "frame index to VGPR
// lane".
- FuncInfo->removeDeadFrameIndices(MFI);
+ FuncInfo->removeDeadFrameIndices(MFI, /*ResetSGPRSpillStackIDs*/ false);
MadeChange = true;
}
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index cca8565c9ff97..324a33c8b2ea6 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -402,7 +402,8 @@ bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
return Spill.FullyAllocated;
}
-void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) {
+bool SIMachineFunctionInfo::removeDeadFrameIndices(
+ MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs) {
// Remove dead frame indices from function frame, however keep FP & BP since
// spills for them haven't been inserted yet. And also make sure to remove the
// frame indices from `SGPRToVGPRSpills` data structure, otherwise, it could
@@ -415,17 +416,28 @@ void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) {
}
}
- // All other SPGRs must be allocated on the default stack, so reset the stack
- // ID.
- for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e;
- ++i)
- if (i != FramePointerSaveIndex && i != BasePointerSaveIndex)
- MFI.setStackID(i, TargetStackID::Default);
+ bool HaveSGPRToMemory = false;
+
+ if (ResetSGPRSpillStackIDs) {
+ // All other SPGRs must be allocated on the default stack, so reset the
+ // stack ID.
+ for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e;
+ ++i) {
+ if (i != FramePointerSaveIndex && i != BasePointerSaveIndex) {
+ if (MFI.getStackID(i) == TargetStackID::SGPRSpill) {
+ MFI.setStackID(i, TargetStackID::Default);
+ HaveSGPRToMemory = true;
+ }
+ }
+ }
+ }
for (auto &R : VGPRToAGPRSpills) {
if (R.second.IsDead)
MFI.RemoveStackObject(R.first);
}
+
+ return HaveSGPRToMemory;
}
int SIMachineFunctionInfo::getScavengeFI(MachineFrameInfo &MFI,
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 8e821274bb770..6114a132f0f1e 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -555,7 +555,11 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
unsigned NumLane) const;
bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR);
- void removeDeadFrameIndices(MachineFrameInfo &MFI);
+
+ /// If \p ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill
+ /// to the default stack.
+ bool removeDeadFrameIndices(MachineFrameInfo &MFI,
+ bool ResetSGPRSpillStackIDs);
int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI);
Optional<int> getOptionalScavengeFI() const { return ScavengeFI; }
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-vmem-large-frame.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-vmem-large-frame.mir
new file mode 100644
index 0000000000000..7f0017a5b12b2
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-vmem-large-frame.mir
@@ -0,0 +1,54 @@
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -amdgpu-spill-sgpr-to-vgpr=false -verify-machineinstrs -run-pass=si-lower-sgpr-spills,prologepilog -o - %s | FileCheck %s
+
+# Check that we allocate 2 emergency stack slots if we're spilling
+# SGPRs to memory and potentially have an offset larger than fits in
+# the addressing mode of the memory instructions.
+
+# CHECK-LABEL: name: test
+# CHECK: stack:
+# CHECK-NEXT: - { id: 0, name: '', type: spill-slot, offset: 8, size: 4, alignment: 4,
+# CHECK-NEXT: stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+# CHECK-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+# CHECK-NEXT: - { id: 1, name: '', type: default, offset: 12, size: 4096, alignment: 4,
+# CHECK-NEXT: stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+# CHECK-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+# CHECK-NEXT: - { id: 2, name: '', type: default, offset: 0, size: 4, alignment: 4,
+# CHECK-NEXT: stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+# CHECK-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+# CHECK-NEXT: - { id: 3, name: '', type: default, offset: 4, size: 4, alignment: 4,
+# CHECK-NEXT: stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+# CHECK-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+
+
+# CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+# CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr10, 0, undef $vgpr0
+# CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+# CHECK-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+
+
+# CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+# CHECK-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+# CHECK-NEXT: $sgpr10 = V_READLANE_B32 killed $vgpr0, 0
+# CHECK-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+---
+name: test
+tracksRegLiveness: true
+frameInfo:
+ maxAlignment: 4
+stack:
+ - { id: 0, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
+ - { id: 1, size: 4096, alignment: 4 }
+machineFunctionInfo:
+ isEntryFunction: false
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ stackPtrOffsetReg: '$sgpr32'
+ frameOffsetReg: '$sgpr33'
+ hasSpilledSGPRs: true
+body: |
+ bb.0:
+ liveins: $sgpr30_sgpr31, $sgpr10, $sgpr11
+ S_CMP_EQ_U32 0, 0, implicit-def $scc
+ SI_SPILL_S32_SAVE killed $sgpr10, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
+ renamable $sgpr10 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
+ S_SETPC_B64 $sgpr30_sgpr31, implicit $scc
+...
More information about the llvm-commits
mailing list