[llvm] c379127 - [AArch64] Refactor and move common code to `AArch64PrologueEpilogue` (NFCI) (#158920)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 18 05:04:41 PDT 2025
Author: Benjamin Maxwell
Date: 2025-09-18T13:04:38+01:00
New Revision: c379127c123e8346f764630dc659e0871099f2fb
URL: https://github.com/llvm/llvm-project/commit/c379127c123e8346f764630dc659e0871099f2fb
DIFF: https://github.com/llvm/llvm-project/commit/c379127c123e8346f764630dc659e0871099f2fb.diff
LOG: [AArch64] Refactor and move common code to `AArch64PrologueEpilogue` (NFCI) (#158920)
This is the final patch in a series reworking the structure of the
prologue/epilogue code. This patch moves some methods from
`AArch64FrameLowering` to `Arch64PrologueEpilogue` as they are only used
by `.emitPrologue/Epilogue`. This includes:
- `shouldCombineCSRLocalStackBump()`
- `shouldCombineCSRLocalStackBumpInEpilogue()`
- `allocateStackSpace()`
- `convertCalleeSaveRestoreToSPPrePostIncDec()`
- `fixupCalleeSaveRestoreStackOffset()`
Common code/methods have been factored into a
`AArch64PrologueEpilogueCommon` base class used by both
`AArch64PrologueEmitter` and `AArch64EpilogueEmitter`.
Finally, some redundant fetching of target classes has been removed from
methods.
Added:
Modified:
llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
llvm/lib/Target/AArch64/AArch64FrameLowering.h
llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index fd53f04443766..ab5c6f3c0a19d 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -706,148 +706,6 @@ void AArch64FrameLowering::resetCFIToInitialState(
}
}
-// Return the maximum possible number of bytes for `Size` due to the
-// architectural limit on the size of a SVE register.
-static int64_t upperBound(StackOffset Size) {
- static const int64_t MAX_BYTES_PER_SCALABLE_BYTE = 16;
- return Size.getScalable() * MAX_BYTES_PER_SCALABLE_BYTE + Size.getFixed();
-}
-
-void AArch64FrameLowering::allocateStackSpace(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
- int64_t RealignmentPadding, StackOffset AllocSize, bool NeedsWinCFI,
- bool *HasWinCFI, bool EmitCFI, StackOffset InitialOffset,
- bool FollowupAllocs) const {
-
- if (!AllocSize)
- return;
-
- DebugLoc DL;
- MachineFunction &MF = *MBB.getParent();
- const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
- const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
- AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
- const MachineFrameInfo &MFI = MF.getFrameInfo();
-
- const int64_t MaxAlign = MFI.getMaxAlign().value();
- const uint64_t AndMask = ~(MaxAlign - 1);
-
- if (!Subtarget.getTargetLowering()->hasInlineStackProbe(MF)) {
- Register TargetReg = RealignmentPadding
- ? findScratchNonCalleeSaveRegister(&MBB)
- : AArch64::SP;
- // SUB Xd/SP, SP, AllocSize
- emitFrameOffset(MBB, MBBI, DL, TargetReg, AArch64::SP, -AllocSize, &TII,
- MachineInstr::FrameSetup, false, NeedsWinCFI, HasWinCFI,
- EmitCFI, InitialOffset);
-
- if (RealignmentPadding) {
- // AND SP, X9, 0b11111...0000
- BuildMI(MBB, MBBI, DL, TII.get(AArch64::ANDXri), AArch64::SP)
- .addReg(TargetReg, RegState::Kill)
- .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64))
- .setMIFlags(MachineInstr::FrameSetup);
- AFI.setStackRealigned(true);
-
- // No need for SEH instructions here; if we're realigning the stack,
- // we've set a frame pointer and already finished the SEH prologue.
- assert(!NeedsWinCFI);
- }
- return;
- }
-
- //
- // Stack probing allocation.
- //
-
- // Fixed length allocation. If we don't need to re-align the stack and don't
- // have SVE objects, we can use a more efficient sequence for stack probing.
- if (AllocSize.getScalable() == 0 && RealignmentPadding == 0) {
- Register ScratchReg = findScratchNonCalleeSaveRegister(&MBB);
- assert(ScratchReg != AArch64::NoRegister);
- BuildMI(MBB, MBBI, DL, TII.get(AArch64::PROBED_STACKALLOC))
- .addDef(ScratchReg)
- .addImm(AllocSize.getFixed())
- .addImm(InitialOffset.getFixed())
- .addImm(InitialOffset.getScalable());
- // The fixed allocation may leave unprobed bytes at the top of the
- // stack. If we have subsequent allocation (e.g. if we have variable-sized
- // objects), we need to issue an extra probe, so these allocations start in
- // a known state.
- if (FollowupAllocs) {
- // STR XZR, [SP]
- BuildMI(MBB, MBBI, DL, TII.get(AArch64::STRXui))
- .addReg(AArch64::XZR)
- .addReg(AArch64::SP)
- .addImm(0)
- .setMIFlags(MachineInstr::FrameSetup);
- }
-
- return;
- }
-
- // Variable length allocation.
-
- // If the (unknown) allocation size cannot exceed the probe size, decrement
- // the stack pointer right away.
- int64_t ProbeSize = AFI.getStackProbeSize();
- if (upperBound(AllocSize) + RealignmentPadding <= ProbeSize) {
- Register ScratchReg = RealignmentPadding
- ? findScratchNonCalleeSaveRegister(&MBB)
- : AArch64::SP;
- assert(ScratchReg != AArch64::NoRegister);
- // SUB Xd, SP, AllocSize
- emitFrameOffset(MBB, MBBI, DL, ScratchReg, AArch64::SP, -AllocSize, &TII,
- MachineInstr::FrameSetup, false, NeedsWinCFI, HasWinCFI,
- EmitCFI, InitialOffset);
- if (RealignmentPadding) {
- // AND SP, Xn, 0b11111...0000
- BuildMI(MBB, MBBI, DL, TII.get(AArch64::ANDXri), AArch64::SP)
- .addReg(ScratchReg, RegState::Kill)
- .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64))
- .setMIFlags(MachineInstr::FrameSetup);
- AFI.setStackRealigned(true);
- }
- if (FollowupAllocs || upperBound(AllocSize) + RealignmentPadding >
- AArch64::StackProbeMaxUnprobedStack) {
- // STR XZR, [SP]
- BuildMI(MBB, MBBI, DL, TII.get(AArch64::STRXui))
- .addReg(AArch64::XZR)
- .addReg(AArch64::SP)
- .addImm(0)
- .setMIFlags(MachineInstr::FrameSetup);
- }
- return;
- }
-
- // Emit a variable-length allocation probing loop.
- // TODO: As an optimisation, the loop can be "unrolled" into a few parts,
- // each of them guaranteed to adjust the stack by less than the probe size.
- Register TargetReg = findScratchNonCalleeSaveRegister(&MBB);
- assert(TargetReg != AArch64::NoRegister);
- // SUB Xd, SP, AllocSize
- emitFrameOffset(MBB, MBBI, DL, TargetReg, AArch64::SP, -AllocSize, &TII,
- MachineInstr::FrameSetup, false, NeedsWinCFI, HasWinCFI,
- EmitCFI, InitialOffset);
- if (RealignmentPadding) {
- // AND Xn, Xn, 0b11111...0000
- BuildMI(MBB, MBBI, DL, TII.get(AArch64::ANDXri), TargetReg)
- .addReg(TargetReg, RegState::Kill)
- .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64))
- .setMIFlags(MachineInstr::FrameSetup);
- }
-
- BuildMI(MBB, MBBI, DL, TII.get(AArch64::PROBED_STACKALLOC_VAR))
- .addReg(TargetReg);
- if (EmitCFI) {
- // Set the CFA register back to SP.
- CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
- .buildDefCFARegister(AArch64::SP);
- }
- if (RealignmentPadding)
- AFI.setStackRealigned(true);
-}
-
static MCRegister getRegisterOrZero(MCRegister Reg, bool HasSVE) {
switch (Reg.id()) {
default:
@@ -1088,92 +946,12 @@ bool AArch64FrameLowering::shouldSignReturnAddressEverywhere(
return SignReturnAddressAll;
}
-bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
- MachineFunction &MF, uint64_t StackBumpBytes) const {
- AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
- const MachineFrameInfo &MFI = MF.getFrameInfo();
- const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
- const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
- if (homogeneousPrologEpilog(MF))
- return false;
-
- if (AFI->getLocalStackSize() == 0)
- return false;
-
- // For WinCFI, if optimizing for size, prefer to not combine the stack bump
- // (to force a stp with predecrement) to match the packed unwind format,
- // provided that there actually are any callee saved registers to merge the
- // decrement with.
- // This is potentially marginally slower, but allows using the packed
- // unwind format for functions that both have a local area and callee saved
- // registers. Using the packed unwind format notably reduces the size of
- // the unwind info.
- if (needsWinCFI(MF) && AFI->getCalleeSavedStackSize() > 0 &&
- MF.getFunction().hasOptSize())
- return false;
-
- // 512 is the maximum immediate for stp/ldp that will be used for
- // callee-save save/restores
- if (StackBumpBytes >= 512 || windowsRequiresStackProbe(MF, StackBumpBytes))
- return false;
-
- if (MFI.hasVarSizedObjects())
- return false;
-
- if (RegInfo->hasStackRealignment(MF))
- return false;
-
- // This isn't strictly necessary, but it simplifies things a bit since the
- // current RedZone handling code assumes the SP is adjusted by the
- // callee-save save/restore code.
- if (canUseRedZone(MF))
- return false;
-
- // When there is an SVE area on the stack, always allocate the
- // callee-saves and spills/locals separately.
- if (getSVEStackSize(MF))
- return false;
-
- return true;
-}
-
-bool AArch64FrameLowering::shouldCombineCSRLocalStackBumpInEpilogue(
- MachineBasicBlock &MBB, uint64_t StackBumpBytes) const {
- if (!shouldCombineCSRLocalStackBump(*MBB.getParent(), StackBumpBytes))
- return false;
- if (MBB.empty())
- return true;
-
- // Disable combined SP bump if the last instruction is an MTE tag store. It
- // is almost always better to merge SP adjustment into those instructions.
- MachineBasicBlock::iterator LastI = MBB.getFirstTerminator();
- MachineBasicBlock::iterator Begin = MBB.begin();
- while (LastI != Begin) {
- --LastI;
- if (LastI->isTransient())
- continue;
- if (!LastI->getFlag(MachineInstr::FrameDestroy))
- break;
- }
- switch (LastI->getOpcode()) {
- case AArch64::STGloop:
- case AArch64::STZGloop:
- case AArch64::STGi:
- case AArch64::STZGi:
- case AArch64::ST2Gi:
- case AArch64::STZ2Gi:
- return false;
- default:
- return true;
- }
- llvm_unreachable("unreachable");
-}
-
// Given a load or a store instruction, generate an appropriate unwinding SEH
// code on Windows.
-static MachineBasicBlock::iterator InsertSEH(MachineBasicBlock::iterator MBBI,
- const TargetInstrInfo &TII,
- MachineInstr::MIFlag Flag) {
+MachineBasicBlock::iterator
+AArch64FrameLowering::insertSEH(MachineBasicBlock::iterator MBBI,
+ const TargetInstrInfo &TII,
+ MachineInstr::MIFlag Flag) const {
unsigned Opc = MBBI->getOpcode();
MachineBasicBlock *MBB = MBBI->getParent();
MachineFunction &MF = *MBB->getParent();
@@ -1332,34 +1110,6 @@ static MachineBasicBlock::iterator InsertSEH(MachineBasicBlock::iterator MBBI,
return I;
}
-// Fix up the SEH opcode associated with the save/restore instruction.
-static void fixupSEHOpcode(MachineBasicBlock::iterator MBBI,
- unsigned LocalStackSize) {
- MachineOperand *ImmOpnd = nullptr;
- unsigned ImmIdx = MBBI->getNumOperands() - 1;
- switch (MBBI->getOpcode()) {
- default:
- llvm_unreachable("Fix the offset in the SEH instruction");
- case AArch64::SEH_SaveFPLR:
- case AArch64::SEH_SaveRegP:
- case AArch64::SEH_SaveReg:
- case AArch64::SEH_SaveFRegP:
- case AArch64::SEH_SaveFReg:
- case AArch64::SEH_SaveAnyRegQP:
- case AArch64::SEH_SaveAnyRegQPX:
- ImmOpnd = &MBBI->getOperand(ImmIdx);
- break;
- }
- if (ImmOpnd)
- ImmOpnd->setImm(ImmOpnd->getImm() + LocalStackSize);
-}
-
-bool AArch64FrameLowering::requiresGetVGCall(const MachineFunction &MF) const {
- auto *AFI = MF.getInfo<AArch64FunctionInfo>();
- return AFI->hasStreamingModeChanges() &&
- !MF.getSubtarget<AArch64Subtarget>().hasSVE();
-}
-
bool AArch64FrameLowering::requiresSaveVG(const MachineFunction &MF) const {
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
if (!AFI->needsDwarfUnwindInfo(MF) || !AFI->hasStreamingModeChanges())
@@ -1372,201 +1122,6 @@ bool AArch64FrameLowering::requiresSaveVG(const MachineFunction &MF) const {
return true;
}
-static bool matchLibcall(const TargetLowering &TLI, const MachineOperand &MO,
- RTLIB::Libcall LC) {
- return MO.isSymbol() &&
- StringRef(TLI.getLibcallName(LC)) == MO.getSymbolName();
-}
-
-bool AArch64FrameLowering::isVGInstruction(MachineBasicBlock::iterator MBBI,
- const TargetLowering &TLI) const {
- unsigned Opc = MBBI->getOpcode();
- if (Opc == AArch64::CNTD_XPiI)
- return true;
-
- if (!requiresGetVGCall(*MBBI->getMF()))
- return false;
-
- if (Opc == AArch64::BL)
- return matchLibcall(TLI, MBBI->getOperand(0), RTLIB::SMEABI_GET_CURRENT_VG);
-
- return Opc == TargetOpcode::COPY;
-}
-
-MachineBasicBlock::iterator
-AArch64FrameLowering::convertCalleeSaveRestoreToSPPrePostIncDec(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
- const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc,
- bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFI,
- MachineInstr::MIFlag FrameFlag, int CFAOffset) const {
- unsigned NewOpc;
-
- // If the function contains streaming mode changes, we expect instructions
- // to calculate the value of VG before spilling. Move past these instructions
- // if necessary.
- MachineFunction &MF = *MBB.getParent();
- if (requiresSaveVG(MF)) {
- auto &TLI = *MF.getSubtarget().getTargetLowering();
- while (isVGInstruction(MBBI, TLI))
- ++MBBI;
- }
-
- switch (MBBI->getOpcode()) {
- default:
- llvm_unreachable("Unexpected callee-save save/restore opcode!");
- case AArch64::STPXi:
- NewOpc = AArch64::STPXpre;
- break;
- case AArch64::STPDi:
- NewOpc = AArch64::STPDpre;
- break;
- case AArch64::STPQi:
- NewOpc = AArch64::STPQpre;
- break;
- case AArch64::STRXui:
- NewOpc = AArch64::STRXpre;
- break;
- case AArch64::STRDui:
- NewOpc = AArch64::STRDpre;
- break;
- case AArch64::STRQui:
- NewOpc = AArch64::STRQpre;
- break;
- case AArch64::LDPXi:
- NewOpc = AArch64::LDPXpost;
- break;
- case AArch64::LDPDi:
- NewOpc = AArch64::LDPDpost;
- break;
- case AArch64::LDPQi:
- NewOpc = AArch64::LDPQpost;
- break;
- case AArch64::LDRXui:
- NewOpc = AArch64::LDRXpost;
- break;
- case AArch64::LDRDui:
- NewOpc = AArch64::LDRDpost;
- break;
- case AArch64::LDRQui:
- NewOpc = AArch64::LDRQpost;
- break;
- }
- TypeSize Scale = TypeSize::getFixed(1), Width = TypeSize::getFixed(0);
- int64_t MinOffset, MaxOffset;
- bool Success = static_cast<const AArch64InstrInfo *>(TII)->getMemOpInfo(
- NewOpc, Scale, Width, MinOffset, MaxOffset);
- (void)Success;
- assert(Success && "unknown load/store opcode");
-
- // If the first store isn't right where we want SP then we can't fold the
- // update in so create a normal arithmetic instruction instead.
- if (MBBI->getOperand(MBBI->getNumOperands() - 1).getImm() != 0 ||
- CSStackSizeInc < MinOffset * (int64_t)Scale.getFixedValue() ||
- CSStackSizeInc > MaxOffset * (int64_t)Scale.getFixedValue()) {
- // If we are destroying the frame, make sure we add the increment after the
- // last frame operation.
- if (FrameFlag == MachineInstr::FrameDestroy) {
- ++MBBI;
- // Also skip the SEH instruction, if needed
- if (NeedsWinCFI && AArch64InstrInfo::isSEHInstruction(*MBBI))
- ++MBBI;
- }
- emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
- StackOffset::getFixed(CSStackSizeInc), TII, FrameFlag,
- false, NeedsWinCFI, HasWinCFI, EmitCFI,
- StackOffset::getFixed(CFAOffset));
-
- return std::prev(MBBI);
- }
-
- // Get rid of the SEH code associated with the old instruction.
- if (NeedsWinCFI) {
- auto SEH = std::next(MBBI);
- if (AArch64InstrInfo::isSEHInstruction(*SEH))
- SEH->eraseFromParent();
- }
-
- MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
- MIB.addReg(AArch64::SP, RegState::Define);
-
- // Copy all operands other than the immediate offset.
- unsigned OpndIdx = 0;
- for (unsigned OpndEnd = MBBI->getNumOperands() - 1; OpndIdx < OpndEnd;
- ++OpndIdx)
- MIB.add(MBBI->getOperand(OpndIdx));
-
- assert(MBBI->getOperand(OpndIdx).getImm() == 0 &&
- "Unexpected immediate offset in first/last callee-save save/restore "
- "instruction!");
- assert(MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP &&
- "Unexpected base register in callee-save save/restore instruction!");
- assert(CSStackSizeInc % Scale == 0);
- MIB.addImm(CSStackSizeInc / (int)Scale);
-
- MIB.setMIFlags(MBBI->getFlags());
- MIB.setMemRefs(MBBI->memoperands());
-
- // Generate a new SEH code that corresponds to the new instruction.
- if (NeedsWinCFI) {
- *HasWinCFI = true;
- InsertSEH(*MIB, *TII, FrameFlag);
- }
-
- if (EmitCFI)
- CFIInstBuilder(MBB, MBBI, FrameFlag)
- .buildDefCFAOffset(CFAOffset - CSStackSizeInc);
-
- return std::prev(MBB.erase(MBBI));
-}
-
-void AArch64FrameLowering::fixupCalleeSaveRestoreStackOffset(
- MachineInstr &MI, uint64_t LocalStackSize, bool NeedsWinCFI,
- bool *HasWinCFI) const {
- if (AArch64InstrInfo::isSEHInstruction(MI))
- return;
-
- unsigned Opc = MI.getOpcode();
- unsigned Scale;
- switch (Opc) {
- case AArch64::STPXi:
- case AArch64::STRXui:
- case AArch64::STPDi:
- case AArch64::STRDui:
- case AArch64::LDPXi:
- case AArch64::LDRXui:
- case AArch64::LDPDi:
- case AArch64::LDRDui:
- Scale = 8;
- break;
- case AArch64::STPQi:
- case AArch64::STRQui:
- case AArch64::LDPQi:
- case AArch64::LDRQui:
- Scale = 16;
- break;
- default:
- llvm_unreachable("Unexpected callee-save save/restore opcode!");
- }
-
- unsigned OffsetIdx = MI.getNumExplicitOperands() - 1;
- assert(MI.getOperand(OffsetIdx - 1).getReg() == AArch64::SP &&
- "Unexpected base register in callee-save save/restore instruction!");
- // Last operand is immediate offset that needs fixing.
- MachineOperand &OffsetOpnd = MI.getOperand(OffsetIdx);
- // All generated opcodes have scaled offsets.
- assert(LocalStackSize % Scale == 0);
- OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / Scale);
-
- if (NeedsWinCFI) {
- *HasWinCFI = true;
- auto MBBI = std::next(MachineBasicBlock::iterator(MI));
- assert(MBBI != MI.getParent()->end() && "Expecting a valid instruction");
- assert(AArch64InstrInfo::isSEHInstruction(*MBBI) &&
- "Expecting a SEH instruction");
- fixupSEHOpcode(MBBI, LocalStackSize);
- }
-}
-
static bool isTargetWindows(const MachineFunction &MF) {
return MF.getSubtarget<AArch64Subtarget>().isTargetWindows();
}
@@ -1575,30 +1130,6 @@ static unsigned getStackHazardSize(const MachineFunction &MF) {
return MF.getSubtarget<AArch64Subtarget>().getStreamingHazardSize();
}
-// Convenience function to determine whether I is an SVE callee save.
-bool AArch64FrameLowering::isSVECalleeSave(
- MachineBasicBlock::iterator I) const {
- switch (I->getOpcode()) {
- default:
- return false;
- case AArch64::PTRUE_C_B:
- case AArch64::LD1B_2Z_IMM:
- case AArch64::ST1B_2Z_IMM:
- case AArch64::STR_ZXI:
- case AArch64::STR_PXI:
- case AArch64::LDR_ZXI:
- case AArch64::LDR_PXI:
- case AArch64::PTRUE_B:
- case AArch64::CPY_ZPzI_B:
- case AArch64::CMPNE_PPzZI_B:
- return I->getFlag(MachineInstr::FrameSetup) ||
- I->getFlag(MachineInstr::FrameDestroy);
- case AArch64::SEH_SavePReg:
- case AArch64::SEH_SaveZReg:
- return true;
- }
-}
-
void AArch64FrameLowering::emitPacRetPlusLeafHardening(
MachineFunction &MF) const {
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
@@ -2464,7 +1995,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
MachinePointerInfo::getFixedStack(MF, FrameIdxReg1),
MachineMemOperand::MOStore, Size, Alignment));
if (NeedsWinCFI)
- InsertSEH(MIB, TII, MachineInstr::FrameSetup);
+ insertSEH(MIB, TII, MachineInstr::FrameSetup);
} else { // The code when the pair of ZReg is not present
MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc));
if (!MRI.isReserved(Reg1))
@@ -2486,7 +2017,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
MachinePointerInfo::getFixedStack(MF, FrameIdxReg1),
MachineMemOperand::MOStore, Size, Alignment));
if (NeedsWinCFI)
- InsertSEH(MIB, TII, MachineInstr::FrameSetup);
+ insertSEH(MIB, TII, MachineInstr::FrameSetup);
}
// Update the StackIDs of the SVE stack slots.
MachineFrameInfo &MFI = MF.getFrameInfo();
@@ -2615,7 +2146,7 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
MachinePointerInfo::getFixedStack(MF, FrameIdxReg1),
MachineMemOperand::MOLoad, Size, Alignment));
if (NeedsWinCFI)
- InsertSEH(MIB, TII, MachineInstr::FrameDestroy);
+ insertSEH(MIB, TII, MachineInstr::FrameDestroy);
} else {
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII.get(LdrOpc));
if (RPI.isPaired()) {
@@ -2633,7 +2164,7 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
MachinePointerInfo::getFixedStack(MF, FrameIdxReg1),
MachineMemOperand::MOLoad, Size, Alignment));
if (NeedsWinCFI)
- InsertSEH(MIB, TII, MachineInstr::FrameDestroy);
+ insertSEH(MIB, TII, MachineInstr::FrameDestroy);
}
}
return true;
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index 0825d03bcb0d8..7bba053111e89 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -147,6 +147,7 @@ class AArch64FrameLowering : public TargetFrameLowering {
StackOffset getSVEStackSize(const MachineFunction &MF) const;
+ friend class AArch64PrologueEpilogueCommon;
friend class AArch64PrologueEmitter;
friend class AArch64EpilogueEmitter;
@@ -164,20 +165,10 @@ class AArch64FrameLowering : public TargetFrameLowering {
/// Returns true if CSRs should be paired.
bool producePairRegisters(MachineFunction &MF) const;
- bool shouldCombineCSRLocalStackBump(MachineFunction &MF,
- uint64_t StackBumpBytes) const;
-
int64_t estimateSVEStackObjectOffsets(MachineFrameInfo &MF) const;
int64_t assignSVEStackObjectOffsets(MachineFrameInfo &MF,
int &MinCSFrameIndex,
int &MaxCSFrameIndex) const;
- bool shouldCombineCSRLocalStackBumpInEpilogue(MachineBasicBlock &MBB,
- uint64_t StackBumpBytes) const;
- void allocateStackSpace(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- int64_t RealignmentPadding, StackOffset AllocSize,
- bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFI,
- StackOffset InitialOffset, bool FollowupAllocs) const;
/// Make a determination whether a Hazard slot is used and create it if
/// needed.
void determineStackHazardSlot(MachineFunction &MF,
@@ -214,6 +205,12 @@ class AArch64FrameLowering : public TargetFrameLowering {
StackOffset getStackOffset(const MachineFunction &MF,
int64_t ObjectOffset) const;
+ // Given a load or a store instruction, generate an appropriate unwinding SEH
+ // code on Windows.
+ MachineBasicBlock::iterator insertSEH(MachineBasicBlock::iterator MBBI,
+ const TargetInstrInfo &TII,
+ MachineInstr::MIFlag Flag) const;
+
/// Returns how much of the incoming argument stack area (in bytes) we should
/// clean up in an epilogue. For the C calling convention this will be 0, for
/// guaranteed tail call conventions it can be positive (a normal return or a
@@ -237,35 +234,11 @@ class AArch64FrameLowering : public TargetFrameLowering {
Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB,
bool HasCall = false) const;
- // Convert callee-save register save/restore instruction to do stack pointer
- // decrement/increment to allocate/deallocate the callee-save stack area by
- // converting store/load to use pre/post increment version.
- MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
- const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc,
- bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFI,
- MachineInstr::MIFlag FrameFlag = MachineInstr::FrameSetup,
- int CFAOffset = 0) const;
-
- // Fixup callee-save register save/restore instructions to take into account
- // combined SP bump by adding the local stack size to the stack offsets.
- void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI,
- uint64_t LocalStackSize,
- bool NeedsWinCFI,
- bool *HasWinCFI) const;
-
- bool isSVECalleeSave(MachineBasicBlock::iterator I) const;
-
/// Returns the size of the fixed object area (allocated next to sp on entry)
/// On Win64 this may include a var args area and an UnwindHelp object for EH.
unsigned getFixedObjectSize(const MachineFunction &MF,
const AArch64FunctionInfo *AFI, bool IsWin64,
bool IsFunclet) const;
-
- bool isVGInstruction(MachineBasicBlock::iterator MBBI,
- const TargetLowering &TLI) const;
-
- bool requiresGetVGCall(const MachineFunction &MF) const;
};
} // End llvm namespace
diff --git a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
index 700c45a8aec9a..7947469b6c04f 100644
--- a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
+++ b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
@@ -22,19 +22,312 @@ STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
namespace llvm {
-AArch64PrologueEmitter::AArch64PrologueEmitter(MachineFunction &MF,
- MachineBasicBlock &MBB,
- const AArch64FrameLowering &AFL)
- : MF(MF), MBB(MBB), F(MF.getFunction()), MFI(MF.getFrameInfo()),
+static bool matchLibcall(const TargetLowering &TLI, const MachineOperand &MO,
+ RTLIB::Libcall LC) {
+ return MO.isSymbol() &&
+ StringRef(TLI.getLibcallName(LC)) == MO.getSymbolName();
+}
+
+bool AArch64PrologueEpilogueCommon::requiresGetVGCall() const {
+ return AFI->hasStreamingModeChanges() &&
+ !MF.getSubtarget<AArch64Subtarget>().hasSVE();
+}
+
+bool AArch64PrologueEpilogueCommon::isVGInstruction(
+ MachineBasicBlock::iterator MBBI, const TargetLowering &TLI) const {
+ unsigned Opc = MBBI->getOpcode();
+ if (Opc == AArch64::CNTD_XPiI)
+ return true;
+
+ if (!requiresGetVGCall())
+ return false;
+
+ if (Opc == AArch64::BL)
+ return matchLibcall(TLI, MBBI->getOperand(0), RTLIB::SMEABI_GET_CURRENT_VG);
+
+ return Opc == TargetOpcode::COPY;
+}
+
+// Convenience function to determine whether I is an SVE callee save.
+static bool isSVECalleeSave(MachineBasicBlock::iterator I) {
+ switch (I->getOpcode()) {
+ default:
+ return false;
+ case AArch64::PTRUE_C_B:
+ case AArch64::LD1B_2Z_IMM:
+ case AArch64::ST1B_2Z_IMM:
+ case AArch64::STR_ZXI:
+ case AArch64::STR_PXI:
+ case AArch64::LDR_ZXI:
+ case AArch64::LDR_PXI:
+ case AArch64::PTRUE_B:
+ case AArch64::CPY_ZPzI_B:
+ case AArch64::CMPNE_PPzZI_B:
+ return I->getFlag(MachineInstr::FrameSetup) ||
+ I->getFlag(MachineInstr::FrameDestroy);
+ case AArch64::SEH_SavePReg:
+ case AArch64::SEH_SaveZReg:
+ return true;
+ }
+}
+
+AArch64PrologueEpilogueCommon::AArch64PrologueEpilogueCommon(
+ MachineFunction &MF, MachineBasicBlock &MBB,
+ const AArch64FrameLowering &AFL)
+ : MF(MF), MBB(MBB), MFI(MF.getFrameInfo()),
Subtarget(MF.getSubtarget<AArch64Subtarget>()), AFL(AFL),
RegInfo(*Subtarget.getRegisterInfo()) {
TII = Subtarget.getInstrInfo();
AFI = MF.getInfo<AArch64FunctionInfo>();
- EmitCFI = AFI->needsDwarfUnwindInfo(MF);
- EmitAsyncCFI = AFI->needsAsyncDwarfUnwindInfo(MF);
HasFP = AFL.hasFP(MF);
NeedsWinCFI = AFL.needsWinCFI(MF);
+}
+
+MachineBasicBlock::iterator
+AArch64PrologueEpilogueCommon::convertCalleeSaveRestoreToSPPrePostIncDec(
+ MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int CSStackSizeInc,
+ bool EmitCFI, MachineInstr::MIFlag FrameFlag, int CFAOffset) const {
+ unsigned NewOpc;
+
+ // If the function contains streaming mode changes, we expect instructions
+ // to calculate the value of VG before spilling. Move past these instructions
+ // if necessary.
+ if (AFL.requiresSaveVG(MF)) {
+ auto &TLI = *Subtarget.getTargetLowering();
+ while (isVGInstruction(MBBI, TLI))
+ ++MBBI;
+ }
+
+ switch (MBBI->getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected callee-save save/restore opcode!");
+ case AArch64::STPXi:
+ NewOpc = AArch64::STPXpre;
+ break;
+ case AArch64::STPDi:
+ NewOpc = AArch64::STPDpre;
+ break;
+ case AArch64::STPQi:
+ NewOpc = AArch64::STPQpre;
+ break;
+ case AArch64::STRXui:
+ NewOpc = AArch64::STRXpre;
+ break;
+ case AArch64::STRDui:
+ NewOpc = AArch64::STRDpre;
+ break;
+ case AArch64::STRQui:
+ NewOpc = AArch64::STRQpre;
+ break;
+ case AArch64::LDPXi:
+ NewOpc = AArch64::LDPXpost;
+ break;
+ case AArch64::LDPDi:
+ NewOpc = AArch64::LDPDpost;
+ break;
+ case AArch64::LDPQi:
+ NewOpc = AArch64::LDPQpost;
+ break;
+ case AArch64::LDRXui:
+ NewOpc = AArch64::LDRXpost;
+ break;
+ case AArch64::LDRDui:
+ NewOpc = AArch64::LDRDpost;
+ break;
+ case AArch64::LDRQui:
+ NewOpc = AArch64::LDRQpost;
+ break;
+ }
+ TypeSize Scale = TypeSize::getFixed(1), Width = TypeSize::getFixed(0);
+ int64_t MinOffset, MaxOffset;
+ bool Success = static_cast<const AArch64InstrInfo *>(TII)->getMemOpInfo(
+ NewOpc, Scale, Width, MinOffset, MaxOffset);
+ (void)Success;
+ assert(Success && "unknown load/store opcode");
+
+ // If the first store isn't right where we want SP then we can't fold the
+ // update in so create a normal arithmetic instruction instead.
+ if (MBBI->getOperand(MBBI->getNumOperands() - 1).getImm() != 0 ||
+ CSStackSizeInc < MinOffset * (int64_t)Scale.getFixedValue() ||
+ CSStackSizeInc > MaxOffset * (int64_t)Scale.getFixedValue()) {
+ // If we are destroying the frame, make sure we add the increment after the
+ // last frame operation.
+ if (FrameFlag == MachineInstr::FrameDestroy) {
+ ++MBBI;
+ // Also skip the SEH instruction, if needed
+ if (NeedsWinCFI && AArch64InstrInfo::isSEHInstruction(*MBBI))
+ ++MBBI;
+ }
+ emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
+ StackOffset::getFixed(CSStackSizeInc), TII, FrameFlag,
+ false, NeedsWinCFI, &HasWinCFI, EmitCFI,
+ StackOffset::getFixed(CFAOffset));
+
+ return std::prev(MBBI);
+ }
+
+ // Get rid of the SEH code associated with the old instruction.
+ if (NeedsWinCFI) {
+ auto SEH = std::next(MBBI);
+ if (AArch64InstrInfo::isSEHInstruction(*SEH))
+ SEH->eraseFromParent();
+ }
+
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
+ MIB.addReg(AArch64::SP, RegState::Define);
+
+ // Copy all operands other than the immediate offset.
+ unsigned OpndIdx = 0;
+ for (unsigned OpndEnd = MBBI->getNumOperands() - 1; OpndIdx < OpndEnd;
+ ++OpndIdx)
+ MIB.add(MBBI->getOperand(OpndIdx));
+
+ assert(MBBI->getOperand(OpndIdx).getImm() == 0 &&
+ "Unexpected immediate offset in first/last callee-save save/restore "
+ "instruction!");
+ assert(MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP &&
+ "Unexpected base register in callee-save save/restore instruction!");
+ assert(CSStackSizeInc % Scale == 0);
+ MIB.addImm(CSStackSizeInc / (int)Scale);
+
+ MIB.setMIFlags(MBBI->getFlags());
+ MIB.setMemRefs(MBBI->memoperands());
+
+ // Generate a new SEH code that corresponds to the new instruction.
+ if (NeedsWinCFI) {
+ HasWinCFI = true;
+ AFL.insertSEH(*MIB, *TII, FrameFlag);
+ }
+
+ if (EmitCFI)
+ CFIInstBuilder(MBB, MBBI, FrameFlag)
+ .buildDefCFAOffset(CFAOffset - CSStackSizeInc);
+
+ return std::prev(MBB.erase(MBBI));
+}
+
+// Fix up the SEH opcode associated with the save/restore instruction.
+static void fixupSEHOpcode(MachineBasicBlock::iterator MBBI,
+ unsigned LocalStackSize) {
+ MachineOperand *ImmOpnd = nullptr;
+ unsigned ImmIdx = MBBI->getNumOperands() - 1;
+ switch (MBBI->getOpcode()) {
+ default:
+ llvm_unreachable("Fix the offset in the SEH instruction");
+ case AArch64::SEH_SaveFPLR:
+ case AArch64::SEH_SaveRegP:
+ case AArch64::SEH_SaveReg:
+ case AArch64::SEH_SaveFRegP:
+ case AArch64::SEH_SaveFReg:
+ case AArch64::SEH_SaveAnyRegQP:
+ case AArch64::SEH_SaveAnyRegQPX:
+ ImmOpnd = &MBBI->getOperand(ImmIdx);
+ break;
+ }
+ if (ImmOpnd)
+ ImmOpnd->setImm(ImmOpnd->getImm() + LocalStackSize);
+}
+
+void AArch64PrologueEpilogueCommon::fixupCalleeSaveRestoreStackOffset(
+ MachineInstr &MI, uint64_t LocalStackSize) const {
+ if (AArch64InstrInfo::isSEHInstruction(MI))
+ return;
+
+ unsigned Opc = MI.getOpcode();
+ unsigned Scale;
+ switch (Opc) {
+ case AArch64::STPXi:
+ case AArch64::STRXui:
+ case AArch64::STPDi:
+ case AArch64::STRDui:
+ case AArch64::LDPXi:
+ case AArch64::LDRXui:
+ case AArch64::LDPDi:
+ case AArch64::LDRDui:
+ Scale = 8;
+ break;
+ case AArch64::STPQi:
+ case AArch64::STRQui:
+ case AArch64::LDPQi:
+ case AArch64::LDRQui:
+ Scale = 16;
+ break;
+ default:
+ llvm_unreachable("Unexpected callee-save save/restore opcode!");
+ }
+
+ unsigned OffsetIdx = MI.getNumExplicitOperands() - 1;
+ assert(MI.getOperand(OffsetIdx - 1).getReg() == AArch64::SP &&
+ "Unexpected base register in callee-save save/restore instruction!");
+ // Last operand is immediate offset that needs fixing.
+ MachineOperand &OffsetOpnd = MI.getOperand(OffsetIdx);
+ // All generated opcodes have scaled offsets.
+ assert(LocalStackSize % Scale == 0);
+ OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / Scale);
+
+ if (NeedsWinCFI) {
+ HasWinCFI = true;
+ auto MBBI = std::next(MachineBasicBlock::iterator(MI));
+ assert(MBBI != MI.getParent()->end() && "Expecting a valid instruction");
+ assert(AArch64InstrInfo::isSEHInstruction(*MBBI) &&
+ "Expecting a SEH instruction");
+ fixupSEHOpcode(MBBI, LocalStackSize);
+ }
+}
+
+bool AArch64PrologueEpilogueCommon::shouldCombineCSRLocalStackBump(
+ uint64_t StackBumpBytes) const {
+ if (AFL.homogeneousPrologEpilog(MF))
+ return false;
+
+ if (AFI->getLocalStackSize() == 0)
+ return false;
+
+ // For WinCFI, if optimizing for size, prefer to not combine the stack bump
+ // (to force a stp with predecrement) to match the packed unwind format,
+ // provided that there actually are any callee saved registers to merge the
+ // decrement with.
+ // This is potentially marginally slower, but allows using the packed
+ // unwind format for functions that both have a local area and callee saved
+ // registers. Using the packed unwind format notably reduces the size of
+ // the unwind info.
+ if (AFL.needsWinCFI(MF) && AFI->getCalleeSavedStackSize() > 0 &&
+ MF.getFunction().hasOptSize())
+ return false;
+
+ // 512 is the maximum immediate for stp/ldp that will be used for
+ // callee-save save/restores
+ if (StackBumpBytes >= 512 ||
+ AFL.windowsRequiresStackProbe(MF, StackBumpBytes))
+ return false;
+
+ if (MFI.hasVarSizedObjects())
+ return false;
+
+ if (RegInfo.hasStackRealignment(MF))
+ return false;
+
+ // This isn't strictly necessary, but it simplifies things a bit since the
+ // current RedZone handling code assumes the SP is adjusted by the
+ // callee-save save/restore code.
+ if (AFL.canUseRedZone(MF))
+ return false;
+
+ // When there is an SVE area on the stack, always allocate the
+ // callee-saves and spills/locals separately.
+ if (AFL.getSVEStackSize(MF))
+ return false;
+
+ return true;
+}
+
+AArch64PrologueEmitter::AArch64PrologueEmitter(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ const AArch64FrameLowering &AFL)
+ : AArch64PrologueEpilogueCommon(MF, MBB, AFL), F(MF.getFunction()) {
+ EmitCFI = AFI->needsDwarfUnwindInfo(MF);
+ EmitAsyncCFI = AFI->needsAsyncDwarfUnwindInfo(MF);
IsFunclet = MBB.isEHFuncletEntry();
HomPrologEpilog = AFL.homogeneousPrologEpilog(MF);
@@ -75,7 +368,7 @@ void AArch64PrologueEmitter::collectBlockLiveins() {
// X0 will be clobbered by a call to __arm_get_current_vg in the prologue.
// This is necessary to spill VG if required where SVE is unavailable, but
// X0 is preserved around this call.
- if (AFL.requiresGetVGCall(MF))
+ if (requiresGetVGCall())
LiveRegs.removeReg(AArch64::X0);
}
}
@@ -97,7 +390,142 @@ void AArch64PrologueEmitter::verifyPrologueClobbers() const {
void AArch64PrologueEmitter::determineLocalsStackSize(
uint64_t StackSize, uint64_t PrologueSaveSize) {
AFI->setLocalStackSize(StackSize - PrologueSaveSize);
- CombineSPBump = AFL.shouldCombineCSRLocalStackBump(MF, StackSize);
+ CombineSPBump = shouldCombineCSRLocalStackBump(StackSize);
+}
+
+// Return the maximum possible number of bytes for `Size` due to the
+// architectural limit on the size of a SVE register.
+static int64_t upperBound(StackOffset Size) {
+ static const int64_t MAX_BYTES_PER_SCALABLE_BYTE = 16;
+ return Size.getScalable() * MAX_BYTES_PER_SCALABLE_BYTE + Size.getFixed();
+}
+
+void AArch64PrologueEmitter::allocateStackSpace(
+ MachineBasicBlock::iterator MBBI, int64_t RealignmentPadding,
+ StackOffset AllocSize, bool EmitCFI, StackOffset InitialOffset,
+ bool FollowupAllocs) {
+
+ if (!AllocSize)
+ return;
+
+ DebugLoc DL;
+ const int64_t MaxAlign = MFI.getMaxAlign().value();
+ const uint64_t AndMask = ~(MaxAlign - 1);
+
+ if (!Subtarget.getTargetLowering()->hasInlineStackProbe(MF)) {
+ Register TargetReg = RealignmentPadding
+ ? AFL.findScratchNonCalleeSaveRegister(&MBB)
+ : AArch64::SP;
+ // SUB Xd/SP, SP, AllocSize
+ emitFrameOffset(MBB, MBBI, DL, TargetReg, AArch64::SP, -AllocSize, TII,
+ MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI,
+ EmitCFI, InitialOffset);
+
+ if (RealignmentPadding) {
+ // AND SP, X9, 0b11111...0000
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
+ .addReg(TargetReg, RegState::Kill)
+ .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64))
+ .setMIFlags(MachineInstr::FrameSetup);
+ AFI->setStackRealigned(true);
+
+ // No need for SEH instructions here; if we're realigning the stack,
+ // we've set a frame pointer and already finished the SEH prologue.
+ assert(!NeedsWinCFI);
+ }
+ return;
+ }
+
+ //
+ // Stack probing allocation.
+ //
+
+ // Fixed length allocation. If we don't need to re-align the stack and don't
+ // have SVE objects, we can use a more efficient sequence for stack probing.
+ if (AllocSize.getScalable() == 0 && RealignmentPadding == 0) {
+ Register ScratchReg = AFL.findScratchNonCalleeSaveRegister(&MBB);
+ assert(ScratchReg != AArch64::NoRegister);
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::PROBED_STACKALLOC))
+ .addDef(ScratchReg)
+ .addImm(AllocSize.getFixed())
+ .addImm(InitialOffset.getFixed())
+ .addImm(InitialOffset.getScalable());
+ // The fixed allocation may leave unprobed bytes at the top of the
+ // stack. If we have subsequent allocation (e.g. if we have variable-sized
+ // objects), we need to issue an extra probe, so these allocations start in
+ // a known state.
+ if (FollowupAllocs) {
+ // STR XZR, [SP]
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
+ .addReg(AArch64::XZR)
+ .addReg(AArch64::SP)
+ .addImm(0)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+
+ return;
+ }
+
+ // Variable length allocation.
+
+ // If the (unknown) allocation size cannot exceed the probe size, decrement
+ // the stack pointer right away.
+ int64_t ProbeSize = AFI->getStackProbeSize();
+ if (upperBound(AllocSize) + RealignmentPadding <= ProbeSize) {
+ Register ScratchReg = RealignmentPadding
+ ? AFL.findScratchNonCalleeSaveRegister(&MBB)
+ : AArch64::SP;
+ assert(ScratchReg != AArch64::NoRegister);
+ // SUB Xd, SP, AllocSize
+ emitFrameOffset(MBB, MBBI, DL, ScratchReg, AArch64::SP, -AllocSize, TII,
+ MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI,
+ EmitCFI, InitialOffset);
+ if (RealignmentPadding) {
+ // AND SP, Xn, 0b11111...0000
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
+ .addReg(ScratchReg, RegState::Kill)
+ .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64))
+ .setMIFlags(MachineInstr::FrameSetup);
+ AFI->setStackRealigned(true);
+ }
+ if (FollowupAllocs || upperBound(AllocSize) + RealignmentPadding >
+ AArch64::StackProbeMaxUnprobedStack) {
+ // STR XZR, [SP]
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
+ .addReg(AArch64::XZR)
+ .addReg(AArch64::SP)
+ .addImm(0)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+ return;
+ }
+
+ // Emit a variable-length allocation probing loop.
+ // TODO: As an optimisation, the loop can be "unrolled" into a few parts,
+ // each of them guaranteed to adjust the stack by less than the probe size.
+ Register TargetReg = AFL.findScratchNonCalleeSaveRegister(&MBB);
+ assert(TargetReg != AArch64::NoRegister);
+ // SUB Xd, SP, AllocSize
+ emitFrameOffset(MBB, MBBI, DL, TargetReg, AArch64::SP, -AllocSize, TII,
+ MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI,
+ EmitCFI, InitialOffset);
+ if (RealignmentPadding) {
+ // AND Xn, Xn, 0b11111...0000
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), TargetReg)
+ .addReg(TargetReg, RegState::Kill)
+ .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64))
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::PROBED_STACKALLOC_VAR))
+ .addReg(TargetReg);
+ if (EmitCFI) {
+ // Set the CFA register back to SP.
+ CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
+ .buildDefCFARegister(AArch64::SP);
+ }
+ if (RealignmentPadding)
+ AFI->setStackRealigned(true);
}
void AArch64PrologueEmitter::emitPrologue() {
@@ -198,19 +626,16 @@ void AArch64PrologueEmitter::emitPrologue() {
auto SaveSize =
StackOffset::getScalable(AFI->getSVECalleeSavedStackSize()) +
StackOffset::getFixed(FixedObject);
- AFL.allocateStackSpace(MBB, PrologueBeginI, 0, SaveSize, NeedsWinCFI,
- &HasWinCFI,
- /*EmitCFI=*/false, StackOffset{},
- /*FollowupAllocs=*/true);
+ allocateStackSpace(PrologueBeginI, 0, SaveSize, false, StackOffset{},
+ /*FollowupAllocs=*/true);
NumBytes -= FixedObject;
// Now allocate space for the GPR callee saves.
MachineBasicBlock::iterator MBBI = PrologueBeginI;
- while (MBBI != EndI && AFL.isSVECalleeSave(MBBI))
+ while (MBBI != EndI && isSVECalleeSave(MBBI))
++MBBI;
- FirstGPRSaveI = AFL.convertCalleeSaveRestoreToSPPrePostIncDec(
- MBB, MBBI, DL, TII, -AFI->getCalleeSavedStackSize(), NeedsWinCFI,
- &HasWinCFI, EmitAsyncCFI);
+ FirstGPRSaveI = convertCalleeSaveRestoreToSPPrePostIncDec(
+ MBBI, DL, -AFI->getCalleeSavedStackSize(), EmitAsyncCFI);
NumBytes -= AFI->getCalleeSavedStackSize();
} else if (CombineSPBump) {
assert(!AFL.getSVEStackSize(MF) && "Cannot combine SP bump with SVE");
@@ -223,9 +648,8 @@ void AArch64PrologueEmitter::emitPrologue() {
// Stack has been already adjusted.
NumBytes -= PrologueSaveSize;
} else if (PrologueSaveSize != 0) {
- FirstGPRSaveI = AFL.convertCalleeSaveRestoreToSPPrePostIncDec(
- MBB, PrologueBeginI, DL, TII, -PrologueSaveSize, NeedsWinCFI,
- &HasWinCFI, EmitAsyncCFI);
+ FirstGPRSaveI = convertCalleeSaveRestoreToSPPrePostIncDec(
+ PrologueBeginI, DL, -PrologueSaveSize, EmitAsyncCFI);
NumBytes -= PrologueSaveSize;
}
assert(NumBytes >= 0 && "Negative stack allocation size!?");
@@ -233,17 +657,17 @@ void AArch64PrologueEmitter::emitPrologue() {
// Move past the saves of the callee-saved registers, fixing up the offsets
// and pre-inc if we decided to combine the callee-save and local stack
// pointer bump above.
- auto &TLI = *MF.getSubtarget().getTargetLowering();
+ auto &TLI = *Subtarget.getTargetLowering();
MachineBasicBlock::iterator AfterGPRSavesI = FirstGPRSaveI;
while (AfterGPRSavesI != EndI &&
AfterGPRSavesI->getFlag(MachineInstr::FrameSetup) &&
- !AFL.isSVECalleeSave(AfterGPRSavesI)) {
+ !isSVECalleeSave(AfterGPRSavesI)) {
if (CombineSPBump &&
// Only fix-up frame-setup load/store instructions.
- (!AFL.requiresSaveVG(MF) || !AFL.isVGInstruction(AfterGPRSavesI, TLI)))
- AFL.fixupCalleeSaveRestoreStackOffset(
- *AfterGPRSavesI, AFI->getLocalStackSize(), NeedsWinCFI, &HasWinCFI);
+ (!AFL.requiresSaveVG(MF) || !isVGInstruction(AfterGPRSavesI, TLI)))
+ fixupCalleeSaveRestoreStackOffset(*AfterGPRSavesI,
+ AFI->getLocalStackSize());
++AfterGPRSavesI;
}
@@ -289,17 +713,17 @@ void AArch64PrologueEmitter::emitPrologue() {
// allocated.
if (!FPAfterSVECalleeSaves) {
MachineBasicBlock::iterator CalleeSavesBegin = AfterGPRSavesI;
- assert(AFL.isSVECalleeSave(CalleeSavesBegin) && "Unexpected instruction");
- while (AFL.isSVECalleeSave(AfterSVESavesI) &&
+ assert(isSVECalleeSave(CalleeSavesBegin) && "Unexpected instruction");
+ while (isSVECalleeSave(AfterSVESavesI) &&
AfterSVESavesI != MBB.getFirstTerminator())
++AfterSVESavesI;
CalleeSavesEnd = AfterSVESavesI;
StackOffset LocalsSize = SVELocalsSize + StackOffset::getFixed(NumBytes);
// Allocate space for the callee saves (if any).
- AFL.allocateStackSpace(MBB, CalleeSavesBegin, 0, SVECalleeSavesSize,
- false, nullptr, EmitAsyncCFI && !HasFP, CFAOffset,
- MFI.hasVarSizedObjects() || LocalsSize);
+ allocateStackSpace(CalleeSavesBegin, 0, SVECalleeSavesSize,
+ EmitAsyncCFI && !HasFP, CFAOffset,
+ MFI.hasVarSizedObjects() || LocalsSize);
}
}
CFAOffset += SVECalleeSavesSize;
@@ -315,10 +739,10 @@ void AArch64PrologueEmitter::emitPrologue() {
// FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
// the correct value here, as NumBytes also includes padding bytes,
// which shouldn't be counted here.
- AFL.allocateStackSpace(MBB, CalleeSavesEnd, RealignmentPadding,
- SVELocalsSize + StackOffset::getFixed(NumBytes),
- NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP,
- CFAOffset, MFI.hasVarSizedObjects());
+ allocateStackSpace(CalleeSavesEnd, RealignmentPadding,
+ SVELocalsSize + StackOffset::getFixed(NumBytes),
+ EmitAsyncCFI && !HasFP, CFAOffset,
+ MFI.hasVarSizedObjects());
}
// If we need a base pointer, set it up here. It's whatever the value of the
@@ -553,11 +977,10 @@ void AArch64PrologueEmitter::emitFramePointerSetup(
// Define the current CFA rule to use the provided FP.
void AArch64PrologueEmitter::emitDefineCFAWithFP(
MachineBasicBlock::iterator MBBI, unsigned FixedObject) const {
- const AArch64RegisterInfo *TRI = Subtarget.getRegisterInfo();
const int OffsetToFirstCalleeSaveFromFP =
AFI->getCalleeSaveBaseToFrameRecordOffset() -
AFI->getCalleeSavedStackSize();
- Register FramePtr = TRI->getFrameRegister(MF);
+ Register FramePtr = RegInfo.getFrameRegister(MF);
CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
.buildDefCFA(FramePtr, FixedObject - OffsetToFirstCalleeSaveFromFP);
}
@@ -571,11 +994,10 @@ void AArch64PrologueEmitter::emitWindowsStackProbe(
// Find an available register to spill the value of X15 to, if X15 is being
// used already for nest.
unsigned X15Scratch = AArch64::NoRegister;
- const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
if (llvm::any_of(MBB.liveins(),
- [&STI](const MachineBasicBlock::RegisterMaskPair &LiveIn) {
- return STI.getRegisterInfo()->isSuperOrSubRegisterEq(
- AArch64::X15, LiveIn.PhysReg);
+ [this](const MachineBasicBlock::RegisterMaskPair &LiveIn) {
+ return RegInfo.isSuperOrSubRegisterEq(AArch64::X15,
+ LiveIn.PhysReg);
})) {
X15Scratch = AFL.findScratchNonCalleeSaveRegister(&MBB, /*HasCall=*/true);
assert(X15Scratch != AArch64::NoRegister &&
@@ -729,9 +1151,6 @@ void AArch64PrologueEmitter::emitWindowsStackProbe(
void AArch64PrologueEmitter::emitCalleeSavedGPRLocations(
MachineBasicBlock::iterator MBBI) const {
- MachineFunction &MF = *MBB.getParent();
- MachineFrameInfo &MFI = MF.getFrameInfo();
-
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
if (CSI.empty())
return;
@@ -750,17 +1169,11 @@ void AArch64PrologueEmitter::emitCalleeSavedGPRLocations(
void AArch64PrologueEmitter::emitCalleeSavedSVELocations(
MachineBasicBlock::iterator MBBI) const {
- MachineFunction &MF = *MBB.getParent();
- MachineFrameInfo &MFI = MF.getFrameInfo();
-
// Add callee saved registers to move list.
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
if (CSI.empty())
return;
- const TargetSubtargetInfo &STI = MF.getSubtarget();
- const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
- AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
std::optional<int64_t> IncomingVGOffsetFromDefCFA;
@@ -779,15 +1192,15 @@ void AArch64PrologueEmitter::emitCalleeSavedSVELocations(
// common denominator.
assert(!Info.isSpilledToReg() && "Spilling to registers not implemented");
MCRegister Reg = Info.getReg();
- if (!static_cast<const AArch64RegisterInfo &>(TRI).regNeedsCFI(Reg, Reg))
+ if (!RegInfo.regNeedsCFI(Reg, Reg))
continue;
StackOffset Offset =
StackOffset::getScalable(MFI.getObjectOffset(Info.getFrameIdx())) -
- StackOffset::getFixed(AFI.getCalleeSavedStackSize(MFI));
+ StackOffset::getFixed(AFI->getCalleeSavedStackSize(MFI));
CFIBuilder.insertCFIInst(
- createCFAOffset(TRI, Reg, Offset, IncomingVGOffsetFromDefCFA));
+ createCFAOffset(RegInfo, Reg, Offset, IncomingVGOffsetFromDefCFA));
}
}
@@ -804,13 +1217,9 @@ static bool isFuncletReturnInstr(const MachineInstr &MI) {
AArch64EpilogueEmitter::AArch64EpilogueEmitter(MachineFunction &MF,
MachineBasicBlock &MBB,
const AArch64FrameLowering &AFL)
- : MF(MF), MBB(MBB), MFI(MF.getFrameInfo()),
- Subtarget(MF.getSubtarget<AArch64Subtarget>()), AFL(AFL) {
- TII = Subtarget.getInstrInfo();
- AFI = MF.getInfo<AArch64FunctionInfo>();
-
- NeedsWinCFI = AFL.needsWinCFI(MF);
+ : AArch64PrologueEpilogueCommon(MF, MBB, AFL) {
EmitCFI = AFI->needsAsyncDwarfUnwindInfo(MF);
+ HomPrologEpilog = AFL.homogeneousPrologEpilog(MF, &MBB);
SEHEpilogueStartI = MBB.end();
}
@@ -845,7 +1254,7 @@ void AArch64EpilogueEmitter::emitEpilogue() {
if (MF.hasEHFunclets())
AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
- if (AFL.homogeneousPrologEpilog(MF, &MBB)) {
+ if (HomPrologEpilog) {
assert(!NeedsWinCFI);
auto FirstHomogenousEpilogI = MBB.getFirstTerminator();
if (FirstHomogenousEpilogI != MBB.begin()) {
@@ -868,8 +1277,7 @@ void AArch64EpilogueEmitter::emitEpilogue() {
bool FPAfterSVECalleeSaves =
Subtarget.isTargetWindows() && AFI->getSVECalleeSavedStackSize();
- bool CombineSPBump =
- AFL.shouldCombineCSRLocalStackBumpInEpilogue(MBB, NumBytes);
+ bool CombineSPBump = shouldCombineCSRLocalStackBump(NumBytes);
// Assume we can't combine the last pop with the sp restore.
bool CombineAfterCSRBump = false;
if (FPAfterSVECalleeSaves) {
@@ -886,9 +1294,9 @@ void AArch64EpilogueEmitter::emitEpilogue() {
// allocate more stack for arguments (in space that an untimely interrupt
// may clobber), convert it to a post-index ldp.
if (OffsetOp.getImm() == 0 && AfterCSRPopSize >= 0) {
- AFL.convertCalleeSaveRestoreToSPPrePostIncDec(
- MBB, Pop, DL, TII, PrologueSaveSize, NeedsWinCFI, &HasWinCFI, EmitCFI,
- MachineInstr::FrameDestroy, PrologueSaveSize);
+ convertCalleeSaveRestoreToSPPrePostIncDec(
+ Pop, DL, PrologueSaveSize, EmitCFI, MachineInstr::FrameDestroy,
+ PrologueSaveSize);
} else {
// If not, make sure to emit an add after the last ldp.
// We're doing this by transferring the size to be restored from the
@@ -907,12 +1315,12 @@ void AArch64EpilogueEmitter::emitEpilogue() {
while (FirstGPRRestoreI != Begin) {
--FirstGPRRestoreI;
if (!FirstGPRRestoreI->getFlag(MachineInstr::FrameDestroy) ||
- (!FPAfterSVECalleeSaves && AFL.isSVECalleeSave(FirstGPRRestoreI))) {
+ (!FPAfterSVECalleeSaves && isSVECalleeSave(FirstGPRRestoreI))) {
++FirstGPRRestoreI;
break;
} else if (CombineSPBump)
- AFL.fixupCalleeSaveRestoreStackOffset(
- *FirstGPRRestoreI, AFI->getLocalStackSize(), NeedsWinCFI, &HasWinCFI);
+ fixupCalleeSaveRestoreStackOffset(*FirstGPRRestoreI,
+ AFI->getLocalStackSize());
}
if (NeedsWinCFI) {
@@ -928,7 +1336,7 @@ void AArch64EpilogueEmitter::emitEpilogue() {
--SEHEpilogueStartI;
}
- if (AFL.hasFP(MF) && AFI->hasSwiftAsyncContext())
+ if (HasFP && AFI->hasSwiftAsyncContext())
emitSwiftAsyncContextFramePointer(EpilogueEndI, DL);
const StackOffset &SVEStackSize = AFL.getSVEStackSize(MF);
@@ -938,7 +1346,7 @@ void AArch64EpilogueEmitter::emitEpilogue() {
assert(!SVEStackSize && "Cannot combine SP bump with SVE");
// When we are about to restore the CSRs, the CFA register is SP again.
- if (EmitCFI && AFL.hasFP(MF))
+ if (EmitCFI && HasFP)
CFIInstBuilder(MBB, FirstGPRRestoreI, MachineInstr::FrameDestroy)
.buildDefCFA(AArch64::SP, NumBytes);
@@ -963,12 +1371,11 @@ void AArch64EpilogueEmitter::emitEpilogue() {
RestoreBegin = std::prev(RestoreEnd);
while (RestoreBegin != MBB.begin() &&
- AFL.isSVECalleeSave(std::prev(RestoreBegin)))
+ isSVECalleeSave(std::prev(RestoreBegin)))
--RestoreBegin;
- assert(AFL.isSVECalleeSave(RestoreBegin) &&
- AFL.isSVECalleeSave(std::prev(RestoreEnd)) &&
- "Unexpected instruction");
+ assert(isSVECalleeSave(RestoreBegin) &&
+ isSVECalleeSave(std::prev(RestoreEnd)) && "Unexpected instruction");
StackOffset CalleeSavedSizeAsOffset =
StackOffset::getScalable(CalleeSavedSize);
@@ -1016,8 +1423,8 @@ void AArch64EpilogueEmitter::emitEpilogue() {
// If we have have an non-zero offset to the non-SVE CS base we need to
// compute the base address by subtracting the offest in a temporary
// register first (to avoid briefly deallocating the SVE CS).
- CalleeSaveBase = MBB.getParent()->getRegInfo().createVirtualRegister(
- &AArch64::GPR64RegClass);
+ CalleeSaveBase =
+ MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
emitFrameOffset(MBB, RestoreBegin, DL, CalleeSaveBase, AArch64::FP,
StackOffset::getFixed(-CalleeSaveBaseOffset), TII,
MachineInstr::FrameDestroy);
@@ -1034,20 +1441,20 @@ void AArch64EpilogueEmitter::emitEpilogue() {
emitFrameOffset(
MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
StackOffset::getFixed(NumBytes), TII, MachineInstr::FrameDestroy,
- false, NeedsWinCFI, &HasWinCFI, EmitCFI && !AFL.hasFP(MF),
+ false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
SVEStackSize + StackOffset::getFixed(NumBytes + PrologueSaveSize));
NumBytes = 0;
}
emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
DeallocateBefore, TII, MachineInstr::FrameDestroy, false,
- NeedsWinCFI, &HasWinCFI, EmitCFI && !AFL.hasFP(MF),
+ NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
SVEStackSize +
StackOffset::getFixed(NumBytes + PrologueSaveSize));
emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
DeallocateAfter, TII, MachineInstr::FrameDestroy, false,
- NeedsWinCFI, &HasWinCFI, EmitCFI && !AFL.hasFP(MF),
+ NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
DeallocateAfter +
StackOffset::getFixed(NumBytes + PrologueSaveSize));
}
@@ -1055,7 +1462,7 @@ void AArch64EpilogueEmitter::emitEpilogue() {
emitCalleeSavedSVERestores(RestoreEnd);
}
- if (!AFL.hasFP(MF)) {
+ if (!HasFP) {
bool RedZone = AFL.canUseRedZone(MF);
// If this was a redzone leaf function, we don't need to restore the
// stack pointer (but we may need to pop stack args for fastcc).
@@ -1100,7 +1507,7 @@ void AArch64EpilogueEmitter::emitEpilogue() {
MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
// When we are about to restore the CSRs, the CFA register is SP again.
- if (EmitCFI && AFL.hasFP(MF))
+ if (EmitCFI && HasFP)
CFIInstBuilder(MBB, FirstGPRRestoreI, MachineInstr::FrameDestroy)
.buildDefCFA(AArch64::SP, PrologueSaveSize);
@@ -1119,6 +1526,39 @@ void AArch64EpilogueEmitter::emitEpilogue() {
}
}
+bool AArch64EpilogueEmitter::shouldCombineCSRLocalStackBump(
+ uint64_t StackBumpBytes) const {
+ if (!AArch64PrologueEpilogueCommon::shouldCombineCSRLocalStackBump(
+ StackBumpBytes))
+ return false;
+ if (MBB.empty())
+ return true;
+
+ // Disable combined SP bump if the last instruction is an MTE tag store. It
+ // is almost always better to merge SP adjustment into those instructions.
+ MachineBasicBlock::iterator LastI = MBB.getFirstTerminator();
+ MachineBasicBlock::iterator Begin = MBB.begin();
+ while (LastI != Begin) {
+ --LastI;
+ if (LastI->isTransient())
+ continue;
+ if (!LastI->getFlag(MachineInstr::FrameDestroy))
+ break;
+ }
+ switch (LastI->getOpcode()) {
+ case AArch64::STGloop:
+ case AArch64::STZGloop:
+ case AArch64::STGi:
+ case AArch64::STZGi:
+ case AArch64::ST2Gi:
+ case AArch64::STZ2Gi:
+ return false;
+ default:
+ return true;
+ }
+ llvm_unreachable("unreachable");
+}
+
void AArch64EpilogueEmitter::emitSwiftAsyncContextFramePointer(
MachineBasicBlock::iterator MBBI, const DebugLoc &DL) const {
switch (MF.getTarget().Options.SwiftAsyncFramePointer) {
@@ -1174,8 +1614,6 @@ void AArch64EpilogueEmitter::emitCalleeSavedRestores(
if (CSI.empty())
return;
- const TargetSubtargetInfo &STI = MF.getSubtarget();
- const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameDestroy);
for (const auto &Info : CSI) {
@@ -1184,8 +1622,7 @@ void AArch64EpilogueEmitter::emitCalleeSavedRestores(
continue;
MCRegister Reg = Info.getReg();
- if (SVE &&
- !static_cast<const AArch64RegisterInfo &>(TRI).regNeedsCFI(Reg, Reg))
+ if (SVE && !RegInfo.regNeedsCFI(Reg, Reg))
continue;
CFIBuilder.buildRestore(Info.getReg());
diff --git a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h
index 20bbffcdb33f2..a1c9b34a77c3f 100644
--- a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h
+++ b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h
@@ -22,15 +22,65 @@
namespace llvm {
+class TargetLowering;
class AArch64Subtarget;
class AArch64FunctionInfo;
class AArch64FrameLowering;
+class AArch64PrologueEpilogueCommon {
+public:
+ AArch64PrologueEpilogueCommon(MachineFunction &MF, MachineBasicBlock &MBB,
+ const AArch64FrameLowering &AFL);
+
+protected:
+ bool requiresGetVGCall() const;
+
+ bool isVGInstruction(MachineBasicBlock::iterator MBBI,
+ const TargetLowering &TLI) const;
+
+ // Convert callee-save register save/restore instruction to do stack pointer
+ // decrement/increment to allocate/deallocate the callee-save stack area by
+ // converting store/load to use pre/post increment version.
+ MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
+ MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int CSStackSizeInc,
+ bool EmitCFI, MachineInstr::MIFlag FrameFlag = MachineInstr::FrameSetup,
+ int CFAOffset = 0) const;
+
+ // Fixup callee-save register save/restore instructions to take into account
+ // combined SP bump by adding the local stack size to the stack offsets.
+ void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI,
+ uint64_t LocalStackSize) const;
+
+ bool shouldCombineCSRLocalStackBump(uint64_t StackBumpBytes) const;
+
+ MachineFunction &MF;
+ MachineBasicBlock &MBB;
+
+ const MachineFrameInfo &MFI;
+ const AArch64Subtarget &Subtarget;
+ const AArch64FrameLowering &AFL;
+ const AArch64RegisterInfo &RegInfo;
+
+ // Common flags. These generally should not change outside of the (possibly
+ // derived) constructor.
+ bool HasFP = false;
+ bool EmitCFI = false; // Note: Set in derived constructors.
+ bool IsFunclet = false; // Note: Set in derived constructors.
+ bool NeedsWinCFI = false; // Note: Can be changed in emitFramePointerSetup.
+ bool HomPrologEpilog = false; // Note: Set in derived constructors.
+
+ // Note: "HasWinCFI" is mutable as it can change in any "emit" function.
+ mutable bool HasWinCFI = false;
+
+ const TargetInstrInfo *TII = nullptr;
+ AArch64FunctionInfo *AFI = nullptr;
+};
+
/// A helper class for emitting the prologue. Substantial new functionality
/// should be factored into a new method. Where possible "emit*" methods should
/// be const, and any flags that change how the prologue is emitted should be
/// set in the constructor.
-class AArch64PrologueEmitter {
+class AArch64PrologueEmitter final : public AArch64PrologueEpilogueCommon {
public:
AArch64PrologueEmitter(MachineFunction &MF, MachineBasicBlock &MBB,
const AArch64FrameLowering &AFL);
@@ -46,6 +96,11 @@ class AArch64PrologueEmitter {
}
private:
+ void allocateStackSpace(MachineBasicBlock::iterator MBBI,
+ int64_t RealignmentPadding, StackOffset AllocSize,
+ bool EmitCFI, StackOffset InitialOffset,
+ bool FollowupAllocs);
+
void emitShadowCallStackPrologue(MachineBasicBlock::iterator MBBI,
const DebugLoc &DL) const;
@@ -71,14 +126,7 @@ class AArch64PrologueEmitter {
void determineLocalsStackSize(uint64_t StackSize, uint64_t PrologueSaveSize);
- MachineFunction &MF;
- MachineBasicBlock &MBB;
-
const Function &F;
- const MachineFrameInfo &MFI;
- const AArch64Subtarget &Subtarget;
- const AArch64FrameLowering &AFL;
- const AArch64RegisterInfo &RegInfo;
#ifndef NDEBUG
mutable LivePhysRegs LiveRegs{RegInfo};
@@ -89,29 +137,16 @@ class AArch64PrologueEmitter {
#endif
// Prologue flags. These generally should not change outside of the
- // constructor. Two exceptions are "CombineSPBump" which is set in
- // determineLocalsStackSize, and "NeedsWinCFI" which is set in
- // emitFramePointerSetup.
- bool EmitCFI = false;
+ // constructor.
bool EmitAsyncCFI = false;
- bool HasFP = false;
- bool IsFunclet = false;
- bool CombineSPBump = false;
- bool HomPrologEpilog = false;
- bool NeedsWinCFI = false;
-
- // Note: "HasWinCFI" is mutable as it can change in any "emit" function.
- mutable bool HasWinCFI = false;
-
- const TargetInstrInfo *TII = nullptr;
- AArch64FunctionInfo *AFI = nullptr;
+ bool CombineSPBump = false; // Note: This is set in determineLocalsStackSize.
};
/// A helper class for emitting the epilogue. Substantial new functionality
/// should be factored into a new method. Where possible "emit*" methods should
/// be const, and any flags that change how the epilogue is emitted should be
/// set in the constructor.
-class AArch64EpilogueEmitter {
+class AArch64EpilogueEmitter final : public AArch64PrologueEpilogueCommon {
public:
AArch64EpilogueEmitter(MachineFunction &MF, MachineBasicBlock &MBB,
const AArch64FrameLowering &AFL);
@@ -122,6 +157,8 @@ class AArch64EpilogueEmitter {
~AArch64EpilogueEmitter() { finalizeEpilogue(); }
private:
+ bool shouldCombineCSRLocalStackBump(uint64_t StackBumpBytes) const;
+
void emitSwiftAsyncContextFramePointer(MachineBasicBlock::iterator MBBI,
const DebugLoc &DL) const;
@@ -141,27 +178,8 @@ class AArch64EpilogueEmitter {
void finalizeEpilogue() const;
- MachineFunction &MF;
- MachineBasicBlock &MBB;
-
- const MachineFrameInfo &MFI;
- const AArch64Subtarget &Subtarget;
- const AArch64FrameLowering &AFL;
-
- // Epilogue flags. These generally should not change outside of the
- // constructor (or early in emitEpilogue).
- bool NeedsWinCFI = false;
- bool EmitCFI = false;
- bool IsFunclet = false;
-
- // Note: "HasWinCFI" is mutable as it can change in any "emit" function.
- mutable bool HasWinCFI = false;
-
- const TargetInstrInfo *TII = nullptr;
- AArch64FunctionInfo *AFI = nullptr;
-
- DebugLoc DL;
MachineBasicBlock::iterator SEHEpilogueStartI;
+ DebugLoc DL;
};
} // namespace llvm
More information about the llvm-commits
mailing list