[llvm] [ARM] Optimise non-ABI frame pointers (PR #110286)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 27 08:31:10 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-arm
Author: Oliver Stannard (ostannard)
<details>
<summary>Changes</summary>
With -fomit-frame-pointer, even if we set up a frame pointer for other
reasons (e.g. variable-sized or over-aligned stack allocations), we
don't need to create an ABI-compliant frame record. This means that we
can save all of the general-purpose registers in one push, instead of
splitting it to ensure that the frame pointer and link register are
adjacent on the stack, saving two instructions per function.
The first 7 patches here are also included in #<!-- -->110283 and #<!-- -->110285, so the NFC and bug-fix parts can be reviewed separately.
---
Patch is 107.75 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/110286.diff
19 Files Affected:
- (modified) llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp (+18-11)
- (modified) llvm/lib/Target/ARM/ARMBaseRegisterInfo.h (-74)
- (modified) llvm/lib/Target/ARM/ARMCallingConv.td (+11-8)
- (modified) llvm/lib/Target/ARM/ARMFrameLowering.cpp (+361-238)
- (modified) llvm/lib/Target/ARM/ARMFrameLowering.h (+3-4)
- (modified) llvm/lib/Target/ARM/ARMSubtarget.cpp (+35-5)
- (modified) llvm/lib/Target/ARM/ARMSubtarget.h (+44-13)
- (modified) llvm/lib/Target/ARM/Thumb1FrameLowering.cpp (+5-8)
- (modified) llvm/test/CodeGen/Thumb2/bti-pac-replace-2.ll (+27-20)
- (modified) llvm/test/CodeGen/Thumb2/pacbti-m-basic.ll (+97-63)
- (added) llvm/test/CodeGen/Thumb2/pacbti-m-frame-chain.ll (+390)
- (modified) llvm/test/CodeGen/Thumb2/pacbti-m-indirect-tail-call.ll (+21-15)
- (modified) llvm/test/CodeGen/Thumb2/pacbti-m-outliner-3.ll (+67-59)
- (modified) llvm/test/CodeGen/Thumb2/pacbti-m-outliner-4.ll (+106-92)
- (modified) llvm/test/CodeGen/Thumb2/pacbti-m-overalign.ll (+38-26)
- (modified) llvm/test/CodeGen/Thumb2/pacbti-m-stack-arg.ll (+3-6)
- (modified) llvm/test/CodeGen/Thumb2/pacbti-m-varargs-1.ll (+45-28)
- (modified) llvm/test/CodeGen/Thumb2/pacbti-m-varargs-2.ll (+51-34)
- (modified) llvm/test/CodeGen/Thumb2/pacbti-m-vla.ll (+84-26)
``````````diff
diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index c149db3144c7c2..d2e91b02e55b50 100644
--- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -62,27 +62,30 @@ ARMBaseRegisterInfo::ARMBaseRegisterInfo()
const MCPhysReg*
ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
const ARMSubtarget &STI = MF->getSubtarget<ARMSubtarget>();
- bool UseSplitPush = STI.splitFramePushPop(*MF);
+ ARMSubtarget::PushPopSplitVariation PushPopSplit =
+ STI.getPushPopSplitVariation(*MF);
const Function &F = MF->getFunction();
if (F.getCallingConv() == CallingConv::GHC) {
// GHC set of callee saved regs is empty as all those regs are
// used for passing STG regs around
return CSR_NoRegs_SaveList;
- } else if (STI.splitFramePointerPush(*MF)) {
+ } else if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
return CSR_Win_SplitFP_SaveList;
} else if (F.getCallingConv() == CallingConv::CFGuard_Check) {
return CSR_Win_AAPCS_CFGuard_Check_SaveList;
} else if (F.getCallingConv() == CallingConv::SwiftTail) {
- return STI.isTargetDarwin()
- ? CSR_iOS_SwiftTail_SaveList
- : (UseSplitPush ? CSR_ATPCS_SplitPush_SwiftTail_SaveList
- : CSR_AAPCS_SwiftTail_SaveList);
+ return STI.isTargetDarwin() ? CSR_iOS_SwiftTail_SaveList
+ : (PushPopSplit == ARMSubtarget::SplitR7
+ ? CSR_ATPCS_SplitPush_SwiftTail_SaveList
+ : CSR_AAPCS_SwiftTail_SaveList);
} else if (F.hasFnAttribute("interrupt")) {
if (STI.isMClass()) {
// M-class CPUs have hardware which saves the registers needed to allow a
// function conforming to the AAPCS to function as a handler.
- return UseSplitPush ? CSR_ATPCS_SplitPush_SaveList : CSR_AAPCS_SaveList;
+ return PushPopSplit == ARMSubtarget::SplitR7
+ ? CSR_ATPCS_SplitPush_SaveList
+ : CSR_AAPCS_SaveList;
} else if (F.getFnAttribute("interrupt").getValueAsString() == "FIQ") {
// Fast interrupt mode gives the handler a private copy of R8-R14, so less
// need to be saved to restore user-mode state.
@@ -99,8 +102,9 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
if (STI.isTargetDarwin())
return CSR_iOS_SwiftError_SaveList;
- return UseSplitPush ? CSR_ATPCS_SplitPush_SwiftError_SaveList :
- CSR_AAPCS_SwiftError_SaveList;
+ return PushPopSplit == ARMSubtarget::SplitR7
+ ? CSR_ATPCS_SplitPush_SwiftError_SaveList
+ : CSR_AAPCS_SwiftError_SaveList;
}
if (STI.isTargetDarwin() && F.getCallingConv() == CallingConv::CXX_FAST_TLS)
@@ -111,10 +115,13 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
if (STI.isTargetDarwin())
return CSR_iOS_SaveList;
- if (UseSplitPush)
- return STI.createAAPCSFrameChain() ? CSR_AAPCS_SplitPush_SaveList
+ if (PushPopSplit == ARMSubtarget::SplitR7)
+ return STI.createAAPCSFrameChain() ? CSR_AAPCS_SplitPush_R7_SaveList
: CSR_ATPCS_SplitPush_SaveList;
+ if (PushPopSplit == ARMSubtarget::SplitR11AAPCSSignRA)
+ return CSR_AAPCS_SplitPush_R11_SaveList;
+
return CSR_AAPCS_SaveList;
}
diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
index 926d702b4092a5..5d465f51ed1d9e 100644
--- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -41,80 +41,6 @@ namespace ARMRI {
} // end namespace ARMRI
-/// isARMArea1Register - Returns true if the register is a low register (r0-r7)
-/// or a stack/pc register that we should push/pop.
-static inline bool isARMArea1Register(unsigned Reg, bool SplitFramePushPop) {
- using namespace ARM;
-
- switch (Reg) {
- case R0: case R1: case R2: case R3:
- case R4: case R5: case R6: case R7:
- case LR: case SP: case PC:
- return true;
- case R8: case R9: case R10: case R11: case R12:
- // For iOS we want r7 and lr to be next to each other.
- return !SplitFramePushPop;
- default:
- return false;
- }
-}
-
-static inline bool isARMArea2Register(unsigned Reg, bool SplitFramePushPop) {
- using namespace ARM;
-
- switch (Reg) {
- case R8: case R9: case R10: case R11: case R12:
- // iOS has this second area.
- return SplitFramePushPop;
- default:
- return false;
- }
-}
-
-static inline bool isSplitFPArea1Register(unsigned Reg,
- bool SplitFramePushPop) {
- using namespace ARM;
-
- switch (Reg) {
- case R0: case R1: case R2: case R3:
- case R4: case R5: case R6: case R7:
- case R8: case R9: case R10: case R12:
- case SP: case PC:
- return true;
- default:
- return false;
- }
-}
-
-static inline bool isSplitFPArea2Register(unsigned Reg,
- bool SplitFramePushPop) {
- using namespace ARM;
-
- switch (Reg) {
- case R11: case LR:
- return true;
- default:
- return false;
- }
-}
-
-static inline bool isARMArea3Register(unsigned Reg, bool SplitFramePushPop) {
- using namespace ARM;
-
- switch (Reg) {
- case D15: case D14: case D13: case D12:
- case D11: case D10: case D9: case D8:
- case D7: case D6: case D5: case D4:
- case D3: case D2: case D1: case D0:
- case D31: case D30: case D29: case D28:
- case D27: case D26: case D25: case D24:
- case D23: case D22: case D21: case D20:
- case D19: case D18: case D17: case D16:
- return true;
- default:
- return false;
- }
-}
static inline bool isCalleeSavedRegister(unsigned Reg,
const MCPhysReg *CSRegs) {
diff --git a/llvm/lib/Target/ARM/ARMCallingConv.td b/llvm/lib/Target/ARM/ARMCallingConv.td
index d14424c2decac3..27f175a7003366 100644
--- a/llvm/lib/Target/ARM/ARMCallingConv.td
+++ b/llvm/lib/Target/ARM/ARMCallingConv.td
@@ -301,14 +301,17 @@ def CSR_ATPCS_SplitPush_SwiftError : CalleeSavedRegs<(sub CSR_ATPCS_SplitPush,
def CSR_ATPCS_SplitPush_SwiftTail : CalleeSavedRegs<(sub CSR_ATPCS_SplitPush,
R10)>;
-// When enforcing an AAPCS compliant frame chain, R11 is used as the frame
-// pointer even for Thumb targets, where split pushes are necessary.
-// This AAPCS alternative makes sure the frame index slots match the push
-// order in that case.
-def CSR_AAPCS_SplitPush : CalleeSavedRegs<(add LR, R11,
- R7, R6, R5, R4,
- R10, R9, R8,
- (sequence "D%u", 15, 8))>;
+// Sometimes we need to split the push of the callee-saved GPRs into two
+// regions, to ensure that the frame chain record is set up correctly. These
+// list the callee-saved registers in the order they end up on the stack, which
+// depends on whether the frame pointer is r7 or r11.
+def CSR_AAPCS_SplitPush_R11 : CalleeSavedRegs<(add R10, R9, R8, R7, R6, R5, R4,
+ LR, R11,
+ (sequence "D%u", 15, 8))>;
+def CSR_AAPCS_SplitPush_R7 : CalleeSavedRegs<(add LR, R11,
+ R7, R6, R5, R4,
+ R10, R9, R8,
+ (sequence "D%u", 15, 8))>;
// Constructors and destructors return 'this' in the ARM C++ ABI; since 'this'
// and the pointer return value are both passed in R0 in these cases, this can
diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index 40354f99559896..e02bb170fd3f54 100644
--- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -173,6 +173,101 @@ static MachineBasicBlock::iterator
skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,
unsigned NumAlignedDPRCS2Regs);
+enum class SpillArea {
+ GPRCS1,
+ GPRCS2,
+ DPRCS1,
+ DPRCS2,
+ FPCXT,
+};
+
+/// Get the spill area that Reg should be saved into in the prologue.
+SpillArea getSpillArea(Register Reg,
+ ARMSubtarget::PushPopSplitVariation Variation,
+ unsigned NumAlignedDPRCS2Regs,
+ const ARMBaseRegisterInfo *RegInfo) {
+ // NoSplit:
+ // push {r0-r12, lr} GPRCS1
+ // vpush {r8-d15} DPRCS1
+ //
+ // SplitR7:
+ // push {r0-r7, lr} GPRCS1
+ // push {r8-r12} GPRCS2
+ // vpush {r8-d15} DPRCS1
+ //
+ // SplitR11WindowsSEH:
+ // push {r0-r10, r12} GPRCS1
+ // vpush {r8-d15} DPRCS1
+ // push {r11, lr} GPRCS2
+ //
+ // SplitR11AAPCSSignRA:
+ // push {r0-r10, r12} GPRSC1
+ // push {r11, lr} GPRCS2
+ // vpush {r8-d15} DPRCS1
+
+ // If FPCXTNS is spilled (for CMSE secure entryfunctions), it is always at
+ // the top of the stack frame.
+ // The DPRCS2 region is used for ABIs which only guarantee 4-byte alignment
+ // of SP. If used, it will be below the other save areas, after the stack has
+ // been re-aligned.
+
+ switch (Reg) {
+ default:
+ dbgs() << "Don't know where to spill " << printReg(Reg, RegInfo) << "\n";
+ llvm_unreachable("Don't know where to spill this register");
+ break;
+
+ case ARM::FPCXTNS:
+ return SpillArea::FPCXT;
+
+ case ARM::R0: case ARM::R1: case ARM::R2: case ARM::R3:
+ case ARM::R4: case ARM::R5: case ARM::R6: case ARM::R7:
+ return SpillArea::GPRCS1;
+
+ case ARM::R8: case ARM::R9: case ARM::R10:
+ if (Variation == ARMSubtarget::SplitR7)
+ return SpillArea::GPRCS2;
+ else
+ return SpillArea::GPRCS1;
+
+ case ARM::R11:
+ if (Variation == ARMSubtarget::NoSplit)
+ return SpillArea::GPRCS1;
+ else
+ return SpillArea::GPRCS2;
+
+ case ARM::R12:
+ if (Variation == ARMSubtarget::SplitR7)
+ return SpillArea::GPRCS2;
+ else
+ return SpillArea::GPRCS1;
+
+ case ARM::LR:
+ if (Variation == ARMSubtarget::SplitR11WindowsSEH ||
+ Variation == ARMSubtarget::SplitR11AAPCSSignRA)
+ return SpillArea::GPRCS2;
+ else
+ return SpillArea::GPRCS1;
+
+ case ARM::D0: case ARM::D1: case ARM::D2: case ARM::D3:
+ case ARM::D4: case ARM::D5: case ARM::D6: case ARM::D7:
+ return SpillArea::DPRCS1;
+
+ case ARM::D8: case ARM::D9: case ARM::D10: case ARM::D11:
+ case ARM::D12: case ARM::D13: case ARM::D14: case ARM::D15:
+ if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
+ return SpillArea::DPRCS2;
+ else
+ return SpillArea::DPRCS1;
+
+ case ARM::D16: case ARM::D17: case ARM::D18: case ARM::D19:
+ case ARM::D20: case ARM::D21: case ARM::D22: case ARM::D23:
+ case ARM::D24: case ARM::D25: case ARM::D26: case ARM::D27:
+ case ARM::D28: case ARM::D29: case ARM::D30: case ARM::D31:
+ return SpillArea::DPRCS1;
+ }
+}
+
ARMFrameLowering::ARMFrameLowering(const ARMSubtarget &sti)
: TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, Align(4)),
STI(sti) {}
@@ -600,6 +695,14 @@ struct StackAdjustingInsts {
MachineBasicBlock::iterator I;
unsigned SPAdjust;
bool BeforeFPSet;
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ void dump() {
+ dbgs() << " " << (BeforeFPSet ? "before-fp " : " ")
+ << "sp-adjust=" << SPAdjust;
+ I->dump();
+ }
+#endif
};
SmallVector<InstInfo, 4> Insts;
@@ -634,6 +737,14 @@ struct StackAdjustingInsts {
.setMIFlags(MachineInstr::FrameSetup);
}
}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ void dump() {
+ dbgs() << "StackAdjustingInsts:\n";
+ for (auto &Info : Insts)
+ Info.dump();
+ }
+#endif
};
} // end anonymous namespace
@@ -713,6 +824,8 @@ static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI,
/// this to produce a conservative estimate that we check in an assert() later.
static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI,
const MachineFunction &MF) {
+ ARMSubtarget::PushPopSplitVariation PushPopSplit =
+ STI.getPushPopSplitVariation(MF);
// For Thumb1, push.w isn't available, so the first push will always push
// r7 and lr onto the stack first.
if (AFI.isThumb1OnlyFunction())
@@ -720,9 +833,11 @@ static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI,
// This is a conservative estimation: Assume the frame pointer being r7 and
// pc("r15") up to r8 getting spilled before (= 8 registers).
int MaxRegBytes = 8 * 4;
- if (STI.splitFramePointerPush(MF)) {
- // Here, r11 can be stored below all of r4-r15 (3 registers more than
- // above), plus d8-d15.
+ if (PushPopSplit == ARMSubtarget::SplitR11AAPCSSignRA)
+ // Here, r11 can be stored below all of r4-r15.
+ MaxRegBytes = 11 * 4;
+ if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
+ // Here, r11 can be stored below all of r4-r15 plus d8-d15.
MaxRegBytes = 11 * 4 + 8 * 8;
}
int FPCXTSaveSize =
@@ -749,6 +864,10 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
int FPCXTSaveSize = 0;
bool NeedsWinCFI = needsWinCFI(MF);
+ ARMSubtarget::PushPopSplitVariation PushPopSplit =
+ STI.getPushPopSplitVariation(MF);
+
+ LLVM_DEBUG(dbgs() << "Emitting prologue for " << MF.getName() << "\n");
// Debug location must be unknown since the first debug location is used
// to determine the end of the prologue.
@@ -788,81 +907,38 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
return;
}
- // Determine spill area sizes.
- if (STI.splitFramePointerPush(MF)) {
- for (const CalleeSavedInfo &I : CSI) {
- Register Reg = I.getReg();
- int FI = I.getFrameIdx();
- switch (Reg) {
- case ARM::R11:
- case ARM::LR:
- if (Reg == FramePtr)
- FramePtrSpillFI = FI;
- GPRCS2Size += 4;
- break;
- case ARM::R0:
- case ARM::R1:
- case ARM::R2:
- case ARM::R3:
- case ARM::R4:
- case ARM::R5:
- case ARM::R6:
- case ARM::R7:
- case ARM::R8:
- case ARM::R9:
- case ARM::R10:
- case ARM::R12:
- GPRCS1Size += 4;
- break;
- case ARM::FPCXTNS:
- FPCXTSaveSize = 4;
- break;
- default:
- // This is a DPR. Exclude the aligned DPRCS2 spills.
- if (Reg == ARM::D8)
- D8SpillFI = FI;
- if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
- DPRCSSize += 8;
- }
+ // Determine spill area sizes, and some important frame indices.
+ SpillArea FramePtrSpillArea;
+ bool BeforeFPPush = true;
+ for (const CalleeSavedInfo &I : CSI) {
+ Register Reg = I.getReg();
+ int FI = I.getFrameIdx();
+
+ SpillArea Area = getSpillArea(Reg, PushPopSplit,
+ AFI->getNumAlignedDPRCS2Regs(), RegInfo);
+
+ if (Reg == FramePtr) {
+ FramePtrSpillFI = FI;
+ FramePtrSpillArea = Area;
}
- } else {
- for (const CalleeSavedInfo &I : CSI) {
- Register Reg = I.getReg();
- int FI = I.getFrameIdx();
- switch (Reg) {
- case ARM::R8:
- case ARM::R9:
- case ARM::R10:
- case ARM::R11:
- case ARM::R12:
- if (STI.splitFramePushPop(MF)) {
- GPRCS2Size += 4;
- break;
- }
- [[fallthrough]];
- case ARM::R0:
- case ARM::R1:
- case ARM::R2:
- case ARM::R3:
- case ARM::R4:
- case ARM::R5:
- case ARM::R6:
- case ARM::R7:
- case ARM::LR:
- if (Reg == FramePtr)
- FramePtrSpillFI = FI;
- GPRCS1Size += 4;
- break;
- case ARM::FPCXTNS:
- FPCXTSaveSize = 4;
- break;
- default:
- // This is a DPR. Exclude the aligned DPRCS2 spills.
- if (Reg == ARM::D8)
- D8SpillFI = FI;
- if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
- DPRCSSize += 8;
- }
+ if (Reg == ARM::D8)
+ D8SpillFI = FI;
+
+ switch (Area) {
+ case SpillArea::FPCXT:
+ FPCXTSaveSize += 4;
+ break;
+ case SpillArea::GPRCS1:
+ GPRCS1Size += 4;
+ break;
+ case SpillArea::GPRCS2:
+ GPRCS2Size += 4;
+ break;
+ case SpillArea::DPRCS1:
+ DPRCSSize += 8;
+ break;
+ case SpillArea::DPRCS2:
+ break;
}
}
@@ -875,7 +951,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
// Move past FPCXT area.
if (FPCXTSaveSize > 0) {
LastPush = MBBI++;
- DefCFAOffsetCandidates.addInst(LastPush, FPCXTSaveSize, true);
+ DefCFAOffsetCandidates.addInst(LastPush, FPCXTSaveSize, BeforeFPPush);
}
// Allocate the vararg register save area.
@@ -883,39 +959,45 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize,
MachineInstr::FrameSetup);
LastPush = std::prev(MBBI);
- DefCFAOffsetCandidates.addInst(LastPush, ArgRegsSaveSize, true);
+ DefCFAOffsetCandidates.addInst(LastPush, ArgRegsSaveSize, BeforeFPPush);
}
// Move past area 1.
if (GPRCS1Size > 0) {
GPRCS1Push = LastPush = MBBI++;
- DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, true);
+ DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, BeforeFPPush);
+ if (FramePtrSpillArea == SpillArea::GPRCS1)
+ BeforeFPPush = false;
}
- // Determine starting offsets of spill areas.
+ // Determine starting offsets of spill areas. These offsets are all positive
+ // offsets from the bottom of the lowest-addressed callee-save area
+ // (excluding DPRCS2, which is th the re-aligned stack region) to the bottom
+ // of the spill area in question.
unsigned FPCXTOffset = NumBytes - ArgRegsSaveSize - FPCXTSaveSize;
unsigned GPRCS1Offset = FPCXTOffset - GPRCS1Size;
unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
Align DPRAlign = DPRCSSize ? std::min(Align(8), Alignment) : Align(4);
unsigned DPRGapSize = GPRCS1Size + FPCXTSaveSize + ArgRegsSaveSize;
- if (!STI.splitFramePointerPush(MF)) {
+ if (PushPopSplit != ARMSubtarget::SplitR11WindowsSEH) {
DPRGapSize += GPRCS2Size;
}
DPRGapSize %= DPRAlign.value();
unsigned DPRCSOffset;
- if (STI.splitFramePointerPush(MF)) {
+ if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
DPRCSOffset = GPRCS1Offset - DPRGapSize - DPRCSSize;
GPRCS2Offset = DPRCSOffset - GPRCS2Size;
} else {
DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
}
- int FramePtrOffsetInPush = 0;
if (HasFP) {
+ // Offset from the CFA to the saved frame pointer, will be negative.
int FPOffset = MFI.getObjectOffset(FramePtrSpillFI);
+ LLVM_DEBUG(dbgs() << "FramePtrSpillFI: " << FramePtrSpillFI
+ << ", FPOffset: " << FPOffset << "\n");
assert(getMaxFPOffset(STI, *AFI, MF) <= FPOffset &&
"Max FP estimation is wrong");
- FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize + FPCXTSaveSize;
AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) +
NumBytes);
}
@@ -923,10 +1005,13 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
- // Move past area 2.
- if (GPRCS2Size > 0 && !STI.splitFramePointerPush(MF)) {
+ // Move GPRCS2, unless using SplitR11WindowsSEH, in which case it will be
+ // after DPRCS1.
+ if (GPRCS2Size > 0 && PushPopSplit != ARMSubtarget::SplitR11WindowsSEH) {
GPRCS2Push = LastPush = MBBI++;
- DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
+ DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size, BeforeFPPush);
+ if (FramePtrSpillArea == SpillArea::GPRCS2)
+ BeforeFPPush = false;
}
// Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our
@@ -939,16 +1024,16 @@ void ARMFrameLowering::e...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/110286
More information about the llvm-commits
mailing list