[llvm] [AArch64] Stack probing for dynamic allocas in SelectionDAG (PR #66525)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 15 09:20:20 PDT 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
<details>
<summary>Changes</summary>
Add support for probing for dynamic allocas (variable-size objects and outgoing stack arguments).
(This is stacked on top of https://github.com/llvm/llvm-project/pull/66524 and will be rebased as needed)
---
Patch is 128.98 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/66525.diff
13 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64FrameLowering.cpp (+382-33)
- (modified) llvm/lib/Target/AArch64/AArch64FrameLowering.h (+23)
- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+142-52)
- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.h (+24-3)
- (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.cpp (+92-1)
- (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.h (+5)
- (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.td (+33-2)
- (modified) llvm/test/CodeGen/AArch64/framelayout-sve.mir (+2-2)
- (modified) llvm/test/CodeGen/AArch64/spill-stack-realignment.mir (+1-1)
- (added) llvm/test/CodeGen/AArch64/stack-probing-64k.ll (+392)
- (added) llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll (+320)
- (added) llvm/test/CodeGen/AArch64/stack-probing-sve.ll (+661)
- (added) llvm/test/CodeGen/AArch64/stack-probing.ll (+474)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 68e68449d4073b2..701fea37ef825e2 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -301,6 +301,7 @@ static bool produceCompactUnwindFrame(MachineFunction &MF);
static bool needsWinCFI(const MachineFunction &MF);
static StackOffset getSVEStackSize(const MachineFunction &MF);
static bool needsShadowCallStackPrologueEpilogue(MachineFunction &MF);
+static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB);
/// Returns true if a homogeneous prolog or epilog code can be emitted
/// for the size optimization. If possible, a frame helper call is injected.
@@ -470,6 +471,9 @@ MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr(
MachineBasicBlock::iterator I) const {
const AArch64InstrInfo *TII =
static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
+ const AArch64TargetLowering *TLI =
+ MF.getSubtarget<AArch64Subtarget>().getTargetLowering();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
DebugLoc DL = I->getDebugLoc();
unsigned Opc = I->getOpcode();
bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
@@ -496,8 +500,24 @@ MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr(
// Most call frames will be allocated at the start of a function so
// this is OK, but it is a limitation that needs dealing with.
assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large");
- emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP,
- StackOffset::getFixed(Amount), TII);
+
+ if (TLI->hasInlineStackProbe(MF) &&
+ -Amount >= AArch64::StackProbeMaxUnprobedStack) {
+ // When stack probing is enabled, the decrement of SP may need to be
+ // probed. We only need to do this if the call site needs 1024 bytes of
+ // space or more, because a region smaller than that is allowed to be
+ // unprobed at an ABI boundary. We rely on the fact that SP has been
+ // probed exactly at this point, either by the prologue or most recent
+ // dynamic allocation.
+ assert(MFI.hasVarSizedObjects() &&
+ "non-reserved call frame without var sized objects?");
+ Register ScratchReg =
+ MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
+ inlineStackProbeFixed(I, ScratchReg, -Amount, StackOffset::get(0, 0));
+ } else {
+ emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP,
+ StackOffset::getFixed(Amount), TII);
+ }
}
} else if (CalleePopAmount != 0) {
// If the calling convention demands that the callee pops arguments from the
@@ -672,6 +692,74 @@ void AArch64FrameLowering::emitCalleeSavedSVERestores(
emitCalleeSavedRestores(MBB, MBBI, true);
}
+void AArch64FrameLowering::allocateSVEStackSpace(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ StackOffset AllocSize, StackOffset InitialOffset, bool EmitCFI) const {
+ DebugLoc DL;
+ MachineFunction &MF = *MBB.getParent();
+ const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+ const AArch64RegisterInfo &RegInfo = *Subtarget.getRegisterInfo();
+ const AArch64TargetLowering &TLI = *Subtarget.getTargetLowering();
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+
+ // If not probing the stack or the (uknown) allocation size is less than the
+ // probe size decrement the stack pointer right away. This avoids having to
+ // emit a probing loop when allocating space for up to 16 SVE registers when
+ // using 4k probes.
+
+ // The bit-length of SVE registers is architecturally limited.
+ const int64_t MAX_BYTES_PER_SCALABLE_BYTE = 16;
+ int64_t ProbeSize = TLI.getStackProbeSize(MF);
+ if (!TLI.hasInlineStackProbe(MF) ||
+ AllocSize.getScalable() * MAX_BYTES_PER_SCALABLE_BYTE +
+ AllocSize.getFixed() <=
+ ProbeSize) {
+ emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -AllocSize, &TII,
+ MachineInstr::FrameSetup, false, false, nullptr, EmitCFI,
+ InitialOffset);
+ if (TLI.hasInlineStackProbe(MF)) {
+ // Issue a probe at the top of the stack to prepare for subsequent
+ // allocations.
+ // STR XZR, [TargetReg]
+ BuildMI(MBB, MBBI, DL, TII.get(AArch64::STRXui))
+ .addReg(AArch64::XZR)
+ .addReg(AArch64::SP)
+ .addImm(0)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+ return;
+ }
+
+ // If we can't be sure the allocation size if less than the probe size, we
+ // have to emit a stack probing loop.
+ Register ScratchReg = findScratchNonCalleeSaveRegister(&MBB);
+ assert(ScratchReg != AArch64::NoRegister);
+ // Get the new top of the stack into a scratch register.
+ emitFrameOffset(MBB, MBBI, DL, ScratchReg, AArch64::SP, -AllocSize, &TII,
+ MachineInstr::FrameSetup, false, false, nullptr, EmitCFI,
+ InitialOffset);
+ // Arrange to emit a probing loop by decrementing SP until it reaches that
+ // new top of the stack.
+ BuildMI(MBB, MBBI, DL, TII.get(AArch64::PROBED_STACKALLOC_VAR), AArch64::SP)
+ .addReg(ScratchReg);
+ // Set SP to its new value.
+ // MOV SP, Xs
+ BuildMI(MBB, MBBI, DL, TII.get(AArch64::ADDXri), AArch64::SP)
+ .addReg(ScratchReg)
+ .addImm(0)
+ .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
+ .setMIFlags(MachineInstr::FrameSetup);
+ if (EmitCFI) {
+ // Set the CFA register back to SP.
+ unsigned Reg = RegInfo.getDwarfRegNum(AArch64::SP, true);
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
+ BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+}
+
static MCRegister getRegisterOrZero(MCRegister Reg, bool HasSVE) {
switch (Reg.id()) {
default:
@@ -855,9 +943,11 @@ bool AArch64FrameLowering::canUseAsPrologue(
MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+ const AArch64TargetLowering *TLI = Subtarget.getTargetLowering();
- // Don't need a scratch register if we're not going to re-align the stack.
- if (!RegInfo->hasStackRealignment(*MF))
+ // Don't need a scratch register if we're not going to re-align the stack or
+ // emit stack probes.
+ if (!RegInfo->hasStackRealignment(*MF) && TLI->hasInlineStackProbe(*MF))
return true;
// Otherwise, we can use any block as long as it has a scratch register
// available.
@@ -1429,6 +1519,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
const Function &F = MF.getFunction();
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+ const AArch64TargetLowering &TLI = *Subtarget.getTargetLowering();
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
MachineModuleInfo &MMI = MF.getMMI();
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
@@ -1784,12 +1875,14 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
}
}
- StackOffset AllocateBefore = SVEStackSize, AllocateAfter = {};
+ StackOffset SVECalleeSavedSize = {}, SVELocalsSize = SVEStackSize;
MachineBasicBlock::iterator CalleeSavesBegin = MBBI, CalleeSavesEnd = MBBI;
// Process the SVE callee-saves to determine what space needs to be
// allocated.
if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
+ LLVM_DEBUG(dbgs() << "SVECalleeSavedStackSize = " << CalleeSavedSize
+ << "\n");
// Find callee save instructions in frame.
CalleeSavesBegin = MBBI;
assert(IsSVECalleeSave(CalleeSavesBegin) && "Unexpected instruction");
@@ -1797,33 +1890,40 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
++MBBI;
CalleeSavesEnd = MBBI;
- AllocateBefore = StackOffset::getScalable(CalleeSavedSize);
- AllocateAfter = SVEStackSize - AllocateBefore;
+ SVECalleeSavedSize = StackOffset::getScalable(CalleeSavedSize);
+ SVELocalsSize = SVEStackSize - SVECalleeSavedSize;
+
+ // Allocate space for the SVE callee saves.
+ if (SVECalleeSavedSize) {
+ allocateSVEStackSpace(
+ MBB, CalleeSavesBegin, SVECalleeSavedSize,
+ StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes),
+ EmitAsyncCFI && !HasFP);
+ if (EmitAsyncCFI)
+ emitCalleeSavedSVELocations(MBB, CalleeSavesEnd);
+ }
}
- // Allocate space for the callee saves (if any).
- emitFrameOffset(
- MBB, CalleeSavesBegin, DL, AArch64::SP, AArch64::SP, -AllocateBefore, TII,
- MachineInstr::FrameSetup, false, false, nullptr,
- EmitAsyncCFI && !HasFP && AllocateBefore,
- StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes));
-
- if (EmitAsyncCFI)
- emitCalleeSavedSVELocations(MBB, CalleeSavesEnd);
-
- // Finally allocate remaining SVE stack space.
- emitFrameOffset(MBB, CalleeSavesEnd, DL, AArch64::SP, AArch64::SP,
- -AllocateAfter, TII, MachineInstr::FrameSetup, false, false,
- nullptr, EmitAsyncCFI && !HasFP && AllocateAfter,
- AllocateBefore + StackOffset::getFixed(
- (int64_t)MFI.getStackSize() - NumBytes));
+ // Allocate stack space for the local SVE objects.
+ if (SVELocalsSize)
+ allocateSVEStackSpace(
+ MBB, CalleeSavesEnd, SVELocalsSize,
+ SVECalleeSavedSize +
+ StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes),
+ EmitAsyncCFI && !HasFP);
// Allocate space for the rest of the frame.
if (NumBytes) {
unsigned scratchSPReg = AArch64::SP;
+ bool NeedsStackProbe = TLI.hasInlineStackProbe(MF) &&
+ (NumBytes > AArch64::StackProbeMaxUnprobedStack ||
+ MFI.hasVarSizedObjects());
if (NeedsRealignment) {
scratchSPReg = findScratchNonCalleeSaveRegister(&MBB);
+ NeedsStackProbe |= TLI.hasInlineStackProbe(MF) &&
+ (NumBytes + MFI.getMaxAlign().value()) >
+ AArch64::StackProbeMaxUnprobedStack;
assert(scratchSPReg != AArch64::NoRegister);
}
@@ -1832,12 +1932,36 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
// the correct value here, as NumBytes also includes padding bytes,
// which shouldn't be counted here.
- emitFrameOffset(
- MBB, MBBI, DL, scratchSPReg, AArch64::SP,
- StackOffset::getFixed(-NumBytes), TII, MachineInstr::FrameSetup,
- false, NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP,
+ StackOffset CFAOffset =
SVEStackSize +
- StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes));
+ StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes);
+ if (NeedsStackProbe && !NeedsRealignment) {
+ // If we don't need to re-align the stack, we can use a more efficient
+ // sequence for stack probing.
+ Register ScratchReg = findScratchNonCalleeSaveRegister(&MBB);
+ assert(ScratchReg != AArch64::NoRegister);
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::PROBED_STACKALLOC))
+ .addDef(ScratchReg)
+ .addImm(NumBytes)
+ .addImm(CFAOffset.getFixed())
+ .addImm(CFAOffset.getScalable());
+ // The fixed allocation may leave unprobed bytes at the top of the
+ // stack. If we have variable-sized objects, we need to issue an extra
+ // probe, so their allocations starts in a known state.
+ if (MFI.hasVarSizedObjects()) {
+ // STR XZR, [SP]
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
+ .addReg(AArch64::XZR)
+ .addReg(AArch64::SP)
+ .addImm(0)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+ } else {
+ emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP,
+ StackOffset::getFixed(-NumBytes), TII,
+ MachineInstr::FrameSetup, false, NeedsWinCFI,
+ &HasWinCFI, EmitAsyncCFI && !HasFP, CFAOffset);
+ }
}
if (NeedsRealignment) {
assert(MFI.getMaxAlign() > Align(1));
@@ -1846,12 +1970,48 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// SUB X9, SP, NumBytes
// -- X9 is temporary register, so shouldn't contain any live data here,
// -- free to use. This is already produced by emitFrameOffset above.
- // AND SP, X9, 0b11111...0000
- uint64_t AndMask = ~(MFI.getMaxAlign().value() - 1);
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
- .addReg(scratchSPReg, RegState::Kill)
- .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64));
+ const uint64_t MaxAlign = MFI.getMaxAlign().value();
+ const uint64_t AndMask = ~(MaxAlign - 1);
+
+ if (NeedsStackProbe) {
+ // If allocation size is known to not exceed the probe size, don't emit
+ // a probing loop.
+ if (NumBytes + MaxAlign - 1 <= TLI.getStackProbeSize(MF)) {
+ // AND SP, X9, 0b11111...0000
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
+ .addReg(scratchSPReg, RegState::Kill)
+ .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64))
+ .setMIFlags(MachineInstr::FrameSetup);
+ // STR XZR, [SP]
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
+ .addReg(AArch64::XZR)
+ .addReg(AArch64::SP)
+ .addImm(0)
+ .setMIFlags(MachineInstr::FrameSetup);
+ } else {
+ // AND X9, X9, 0b11111...0000
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), scratchSPReg)
+ .addReg(scratchSPReg, RegState::Kill)
+ .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64))
+ .setMIFlags(MachineInstr::FrameSetup);
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::PROBED_STACKALLOC_VAR),
+ AArch64::SP)
+ .addReg(scratchSPReg);
+ // MOV SP, X9
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri), AArch64::SP)
+ .addReg(scratchSPReg)
+ .addImm(0)
+ .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+ } else {
+ // AND SP, X9, 0b11111...0000
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
+ .addReg(scratchSPReg, RegState::Kill)
+ .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64))
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
AFI->setStackRealigned(true);
// No need for SEH instructions here; if we're realigning the stack,
@@ -4057,3 +4217,192 @@ void AArch64FrameLowering::orderFrameObjects(
dbgs() << "\n";
});
}
+
+/// Emit a loop to decrement SP until it is equal to TargetReg, with probes at
+/// least every ProbeSize bytes. Returns an iterator of the first instruction
+/// after the loop. The difference between SP and TargetReg must be an exact
+/// multiple of ProbeSize.
+MachineBasicBlock::iterator
+AArch64FrameLowering::inlineStackProbeLoopExactMultiple(
+ MachineBasicBlock::iterator MBBI, int64_t ProbeSize,
+ Register TargetReg) const {
+ MachineBasicBlock &MBB = *MBBI->getParent();
+ MachineFunction &MF = *MBB.getParent();
+ const AArch64InstrInfo *TII =
+ MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
+ DebugLoc DL = MBB.findDebugLoc(MBBI);
+
+ MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
+ MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock());
+ MF.insert(MBBInsertPoint, LoopMBB);
+ MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock());
+ MF.insert(MBBInsertPoint, ExitMBB);
+
+ // SUB SP, SP, #ProbeSize (or equivalent if ProbeSize is not encodable
+ // in SUB).
+ emitFrameOffset(*LoopMBB, LoopMBB->end(), DL, AArch64::SP, AArch64::SP,
+ StackOffset::getFixed(-ProbeSize), TII,
+ MachineInstr::FrameSetup);
+ // STR XZR, [SP]
+ BuildMI(*LoopMBB, LoopMBB->end(), DL, TII->get(AArch64::STRXui))
+ .addReg(AArch64::XZR)
+ .addReg(AArch64::SP)
+ .addImm(0)
+ .setMIFlags(MachineInstr::FrameSetup);
+ // CMP SP, TargetReg
+ BuildMI(*LoopMBB, LoopMBB->end(), DL, TII->get(AArch64::SUBSXrx64),
+ AArch64::XZR)
+ .addReg(AArch64::SP)
+ .addReg(TargetReg)
+ .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 0))
+ .setMIFlags(MachineInstr::FrameSetup);
+ // B.CC Loop
+ BuildMI(*LoopMBB, LoopMBB->end(), DL, TII->get(AArch64::Bcc))
+ .addImm(AArch64CC::NE)
+ .addMBB(LoopMBB)
+ .setMIFlags(MachineInstr::FrameSetup);
+
+ LoopMBB->addSuccessor(ExitMBB);
+ LoopMBB->addSuccessor(LoopMBB);
+ // Synthesize the exit MBB.
+ ExitMBB->splice(ExitMBB->end(), &MBB, MBBI, MBB.end());
+ ExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
+ MBB.addSuccessor(LoopMBB);
+ // Update liveins.
+ recomputeLiveIns(*LoopMBB);
+ recomputeLiveIns(*ExitMBB);
+
+ return ExitMBB->begin();
+}
+
+MachineBasicBlock::iterator AArch64FrameLowering::inlineStackProbeFixed(
+ MachineBasicBlock::iterator MBBI, Register ScratchReg, int64_t FrameSize,
+ StackOffset CFAOffset) const {
+ MachineBasicBlock *MBB = MBBI->getParent();
+ MachineFunction &MF = *MBB->getParent();
+ const AArch64TargetLowering *TLI =
+ MF.getSubtarget<AArch64Subtarget>().getTargetLowering();
+ const AArch64InstrInfo *TII =
+ MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
+ AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+ bool EmitAsyncCFI = AFI->needsAsyncDwarfUnwindInfo(MF);
+ bool HasFP = hasFP(MF);
+
+ DebugLoc DL;
+ int64_t ProbeSize = TLI->getStackProbeSize(MF);
+ int64_t NumBlocks = FrameSize / ProbeSize;
+ int64_t ResidualSize = FrameSize % ProbeSize;
+
+ LLVM_DEBUG(dbgs() << "Stack probing: total " << FrameSize << " bytes, "
+ << NumBlocks << " blocks of " << ProbeSize
+ << " bytes, plus " << ResidualSize << " bytes\n");
+
+ // Decrement SP by NumBlock * ProbeSize bytes, with either unrolled or
+ // ordinary loop.
+ if (NumBlocks <= AArch64::StackProbeMaxLoopUnroll) {
+ for (int i = 0; i < NumBlocks; ++i) {
+ // SUB SP, SP, #FrameSize (or equivalent if FrameSize is not
+ // encodable in a SUB).
+ emitFrameOffset(*MBB, MBBI, DL, AArch64::SP, AArch64::SP,
+ StackOffset::getFixed(-ProbeSize), TII,
+ MachineInstr::FrameSetup, false, false, nullptr,
+ EmitAsyncCFI && !HasFP, CFAOffset);
+ CFAOffset += StackOffset::getFixed(ProbeSize);
+ // STR XZR, [SP]
+ BuildMI(*MBB, MBBI, DL, TII->get(AArch64::STRXui))
+ .addReg(AArch64::XZR)
+ .addReg(AArch64::SP)
+ .addImm(0)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+ } else if (NumBlocks != 0) {
+ // SUB ScratchReg, SP, #FrameSize (or equivalent if FrameSize is not
+ // encodable in ADD). ScrathReg may temporarily become the CFA register.
+ emitFrameOffset(*MBB, MBBI, DL, ScratchReg, AArch64::SP,
+ StackOffset::getFixed(-ProbeSize * NumBlocks), TII,
+ MachineInstr::FrameSetup, false, false, nullptr,
+ EmitAsyncCFI && !HasFP, CFAOffset);
+ CFAOffset += StackOffset::getFixed(ProbeSize * NumBlocks);
+ MBBI = inlineStackProbeLoopExactMultiple(MBBI, ProbeSize, ScratchReg);
+ MBB = MBBI->getParent();
+ if (EmitAsyncCFI && !HasFP) {
+ // Set the CFA register back to SP.
+ const AArch64RegisterInfo &RegInfo =
+ *MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
+ unsigned Reg = RegInfo.getDwarfRegNum(AArch64::SP, true);
+...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/66525
More information about the llvm-commits
mailing list