[llvm] 6b6ae58 - [AMDGPU] Save fp/bp after csr saves
Sebastian Neubauer via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 2 08:18:12 PST 2021
Author: Sebastian Neubauer
Date: 2021-02-02T17:17:54+01:00
New Revision: 6b6ae583cf873debf06e982062fcc7982c8a184a
URL: https://github.com/llvm/llvm-project/commit/6b6ae583cf873debf06e982062fcc7982c8a184a
DIFF: https://github.com/llvm/llvm-project/commit/6b6ae583cf873debf06e982062fcc7982c8a184a.diff
LOG: [AMDGPU] Save fp/bp after csr saves
Saving callee-save registers happens in whole wave mode. Exec is saved
to a free register, which can be reused to save the frame pointer.
Therefore, saving the fp needs to happen after saving csrs.
Differential Revision: https://reviews.llvm.org/D95861
Added:
Modified:
llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index ac031f7d99c7..246587dff055 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -881,39 +881,6 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
TargetStackID::SGPRSpill;
}
- // Emit the copy if we need an FP, and are using a free SGPR to save it.
- if (FuncInfo->SGPRForFPSaveRestoreCopy) {
- BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FuncInfo->SGPRForFPSaveRestoreCopy)
- .addReg(FramePtrReg)
- .setMIFlag(MachineInstr::FrameSetup);
- }
-
- // Emit the copy if we need a BP, and are using a free SGPR to save it.
- if (FuncInfo->SGPRForBPSaveRestoreCopy) {
- BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY),
- FuncInfo->SGPRForBPSaveRestoreCopy)
- .addReg(BasePtrReg)
- .setMIFlag(MachineInstr::FrameSetup);
- }
-
- // If a copy has been emitted for FP and/or BP, Make the SGPRs
- // used in the copy instructions live throughout the function.
- SmallVector<MCPhysReg, 2> TempSGPRs;
- if (FuncInfo->SGPRForFPSaveRestoreCopy)
- TempSGPRs.push_back(FuncInfo->SGPRForFPSaveRestoreCopy);
-
- if (FuncInfo->SGPRForBPSaveRestoreCopy)
- TempSGPRs.push_back(FuncInfo->SGPRForBPSaveRestoreCopy);
-
- if (!TempSGPRs.empty()) {
- for (MachineBasicBlock &MBB : MF) {
- for (MCPhysReg Reg : TempSGPRs)
- MBB.addLiveIn(Reg);
-
- MBB.sortUniqueLiveIns();
- }
- }
-
for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
: FuncInfo->getSGPRSpillVGPRs()) {
if (!Reg.FI.hasValue())
@@ -1007,6 +974,44 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
.addReg(Spill[0].VGPR, RegState::Undef);
}
+ // Emit the copy if we need an FP, and are using a free SGPR to save it.
+ if (FuncInfo->SGPRForFPSaveRestoreCopy) {
+ BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY),
+ FuncInfo->SGPRForFPSaveRestoreCopy)
+ .addReg(FramePtrReg)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
+ // Emit the copy if we need a BP, and are using a free SGPR to save it.
+ if (FuncInfo->SGPRForBPSaveRestoreCopy) {
+ BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY),
+ FuncInfo->SGPRForBPSaveRestoreCopy)
+ .addReg(BasePtrReg)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
+ // If a copy has been emitted for FP and/or BP, Make the SGPRs
+ // used in the copy instructions live throughout the function.
+ SmallVector<MCPhysReg, 2> TempSGPRs;
+ if (FuncInfo->SGPRForFPSaveRestoreCopy)
+ TempSGPRs.push_back(FuncInfo->SGPRForFPSaveRestoreCopy);
+
+ if (FuncInfo->SGPRForBPSaveRestoreCopy)
+ TempSGPRs.push_back(FuncInfo->SGPRForBPSaveRestoreCopy);
+
+ if (!TempSGPRs.empty()) {
+ for (MachineBasicBlock &MBB : MF) {
+ for (MCPhysReg Reg : TempSGPRs)
+ MBB.addLiveIn(Reg);
+
+ MBB.sortUniqueLiveIns();
+ }
+ if (!LiveRegs.empty()) {
+ LiveRegs.addReg(FuncInfo->SGPRForFPSaveRestoreCopy);
+ LiveRegs.addReg(FuncInfo->SGPRForBPSaveRestoreCopy);
+ }
+ }
+
if (TRI.needsStackRealignment(MF)) {
HasFP = true;
const unsigned Alignment = MFI.getMaxAlign().value();
@@ -1015,8 +1020,6 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
if (LiveRegs.empty()) {
LiveRegs.init(TRI);
LiveRegs.addLiveIns(MBB);
- LiveRegs.addReg(FuncInfo->SGPRForFPSaveRestoreCopy);
- LiveRegs.addReg(FuncInfo->SGPRForBPSaveRestoreCopy);
}
Register ScratchSPReg = findScratchNonCalleeSaveRegister(
diff --git a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
index 17762d22bd77..115826bda90b 100644
--- a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
+++ b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
@@ -597,6 +597,41 @@ define void @callee_need_to_spill_fp_to_memory_full_reserved_vgpr() #3 {
ret void
}
+; When flat-scratch is enabled, we save the FP to s0. At the same time,
+; the exec register is saved to s0 when saving CSR in the function prolog.
+; Make sure that the FP save happens after restoring exec from the same
+; register.
+; GCN-LABEL: {{^}}callee_need_to_spill_fp_to_reg:
+; GCN-NOT: v_writelane_b32 v40, s33
+; FLATSCR: s_or_saveexec_b64 s[0:1], -1
+; FLATSCR: s_mov_b64 exec, s[0:1]
+; FLATSCR: s_mov_b32 s0, s33
+; FLATSCR: s_mov_b32 s33, s32
+; FLATSCR: s_mov_b32 s33, s0
+; FLATSCR: s_or_saveexec_b64 s[0:1], -1
+; GCN-NOT: v_readlane_b32 s33, v40
+; GCN: s_setpc_b64
+define void @callee_need_to_spill_fp_to_reg() #1 {
+ call void asm sideeffect "; clobber nonpreserved SGPRs and 64 CSRs",
+ "~{s4},~{s5},~{s6},~{s7},~{s8},~{s9}
+ ,~{s10},~{s11},~{s12},~{s13},~{s14},~{s15},~{s16},~{s17},~{s18},~{s19}
+ ,~{s20},~{s21},~{s22},~{s23},~{s24},~{s25},~{s26},~{s27},~{s28},~{s29}
+ ,~{s40},~{s41},~{s42},~{s43},~{s44},~{s45},~{s46},~{s47},~{s48},~{s49}
+ ,~{s50},~{s51},~{s52},~{s53},~{s54},~{s55},~{s56},~{s57},~{s58},~{s59}
+ ,~{s60},~{s61},~{s62},~{s63},~{s64},~{s65},~{s66},~{s67},~{s68},~{s69}
+ ,~{s70},~{s71},~{s72},~{s73},~{s74},~{s75},~{s76},~{s77},~{s78},~{s79}
+ ,~{s80},~{s81},~{s82},~{s83},~{s84},~{s85},~{s86},~{s87},~{s88},~{s89}
+ ,~{s90},~{s91},~{s92},~{s93},~{s94},~{s95},~{s96},~{s97},~{s98},~{s99}
+ ,~{s100},~{s101},~{s102},~{s39},~{vcc}"()
+
+ call void asm sideeffect "; clobber all VGPRs except CSR v40",
+ "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9}
+ ,~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19}
+ ,~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29}
+ ,~{v30},~{v31},~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38},~{v39}"()
+ ret void
+}
+
; If the size of the offset exceeds the MUBUF offset field we need another
; scratch VGPR to hold the offset.
; GCN-LABEL: {{^}}spill_fp_to_memory_scratch_reg_needed_mubuf_offset
More information about the llvm-commits
mailing list