[llvm] [AArch64] Stack probing for function prologues (PR #66524)
Oskar Wirga via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 7 13:45:49 PST 2023
================
@@ -9461,6 +9462,94 @@ bool AArch64InstrInfo::isReallyTriviallyReMaterializable(
return TargetInstrInfo::isReallyTriviallyReMaterializable(MI);
}
+MachineBasicBlock::iterator
+AArch64InstrInfo::probedStackAlloc(MachineBasicBlock::iterator MBBI,
+ Register TargetReg, bool FrameSetup) const {
+ assert(TargetReg != AArch64::SP && "New top of stack cannot aleady be in SP");
+
+ MachineBasicBlock &MBB = *MBBI->getParent();
+ MachineFunction &MF = *MBB.getParent();
+ const AArch64InstrInfo *TII =
+ MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
+ int64_t ProbeSize = MF.getInfo<AArch64FunctionInfo>()->getStackProbeSize();
+ DebugLoc DL = MBB.findDebugLoc(MBBI);
+
+ MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
+ MachineBasicBlock *LoopTestMBB =
+ MF.CreateMachineBasicBlock(MBB.getBasicBlock());
+ MF.insert(MBBInsertPoint, LoopTestMBB);
+ MachineBasicBlock *LoopBodyMBB =
+ MF.CreateMachineBasicBlock(MBB.getBasicBlock());
+ MF.insert(MBBInsertPoint, LoopBodyMBB);
+ MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock());
+ MF.insert(MBBInsertPoint, ExitMBB);
+ MachineInstr::MIFlag Flags =
+ FrameSetup ? MachineInstr::FrameSetup : MachineInstr::NoFlags;
+
+ // LoopTest:
+ // SUB SP, SP, #ProbeSize
+ emitFrameOffset(*LoopTestMBB, LoopTestMBB->end(), DL, AArch64::SP,
+ AArch64::SP, StackOffset::getFixed(-ProbeSize), TII, Flags);
+
+ // CMP SP, TargetReg
+ BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::SUBSXrx64),
+ AArch64::XZR)
+ .addReg(AArch64::SP)
+ .addReg(TargetReg)
+ .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 0))
+ .setMIFlags(Flags);
+
+ // B.<Cond> LoopExit
+ BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::Bcc))
+ .addImm(AArch64CC::LE)
+ .addMBB(ExitMBB)
+ .setMIFlags(Flags);
+
+ // STR XZR, [SP]
+ BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::STRXui))
+ .addReg(AArch64::XZR)
+ .addReg(AArch64::SP)
+ .addImm(0)
+ .setMIFlags(Flags);
+
+ // B loop
+ BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::B))
+ .addMBB(LoopTestMBB)
+ .setMIFlags(Flags);
+
+ // LoopExit:
+ // MOV SP, TargetReg
+ BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::ADDXri), AArch64::SP)
+ .addReg(TargetReg)
+ .addImm(0)
+ .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
+ .setMIFlags(Flags);
+
+ // STR XZR, [SP]
+ BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::STRXui))
+ .addReg(AArch64::XZR)
+ .addReg(AArch64::SP)
+ .addImm(0)
+ .setMIFlags(Flags);
----------------
oskarwirga wrote:
Thanks for that explanation, yes I see it leaves a large gap. Theres like a million optimizations and mitigations slapped on this code so it gets difficult understanding the root cause.
> I guess maybe weird things could happen if something tries to allocate 0 bytes of memory? Probably something that needs to be fixed, but it's unlikely you'd run into it from C code.
Thanks for this, it made me look into it a bit more and we're not allocating 0 bytes, but we _are_ allocating 0x10 bytes which might cause some issues. I appreciate your quick reply thank you again!
https://github.com/llvm/llvm-project/pull/66524
More information about the llvm-commits
mailing list