[clang] [AArch64] Stack probing for function prologues (PR #66524)
Oskar Wirga via cfe-commits
cfe-commits at lists.llvm.org
Thu Oct 26 18:32:10 PDT 2023
================
@@ -9460,6 +9461,94 @@ bool AArch64InstrInfo::isReallyTriviallyReMaterializable(
return TargetInstrInfo::isReallyTriviallyReMaterializable(MI);
}
+MachineBasicBlock::iterator
+AArch64InstrInfo::insertStackProbingLoop(MachineBasicBlock::iterator MBBI,
+ Register ScratchReg,
+ Register TargetReg) const {
+ MachineBasicBlock &MBB = *MBBI->getParent();
+ MachineFunction &MF = *MBB.getParent();
+ const AArch64InstrInfo *TII =
+ MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
+ int64_t ProbeSize = MF.getInfo<AArch64FunctionInfo>()->getStackProbeSize();
+ DebugLoc DL = MBB.findDebugLoc(MBBI);
+
+ MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
+ MachineBasicBlock *LoopTestMBB =
+ MF.CreateMachineBasicBlock(MBB.getBasicBlock());
+ MF.insert(MBBInsertPoint, LoopTestMBB);
+ MachineBasicBlock *LoopBodyMBB =
+ MF.CreateMachineBasicBlock(MBB.getBasicBlock());
+ MF.insert(MBBInsertPoint, LoopBodyMBB);
+ MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock());
+ MF.insert(MBBInsertPoint, ExitMBB);
+
+ // LoopTest:
+ // SUB ScratchReg, ScratchReg, #ProbeSize
+ emitFrameOffset(*LoopTestMBB, LoopTestMBB->end(), DL, ScratchReg, ScratchReg,
+ StackOffset::getFixed(-ProbeSize), TII,
+ MachineInstr::FrameSetup);
+
+ // CMP ScratchReg, TargetReg
+ AArch64CC::CondCode Cond = AArch64CC::LE;
+ Register Op1 = ScratchReg;
+ Register Op2 = TargetReg;
+ if (Op2 == AArch64::SP) {
+ assert(Op1 != AArch64::SP && "At most one of the registers can be SP");
+ // CMP TargetReg, ScratchReg
+ std::swap(Op1, Op2);
+ Cond = AArch64CC::GT;
+ }
+ BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::SUBSXrx64),
+ AArch64::XZR)
+ .addReg(Op1)
+ .addReg(Op2)
+ .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 0))
+ .setMIFlags(MachineInstr::FrameSetup);
+
+ // B.<Cond> LoopExit
+ BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::Bcc))
+ .addImm(Cond)
+ .addMBB(ExitMBB)
+ .setMIFlags(MachineInstr::FrameSetup);
+
+ // STR XZR, [ScratchReg]
+ BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::STRXui))
+ .addReg(AArch64::XZR)
+ .addReg(ScratchReg)
+ .addImm(0)
+ .setMIFlags(MachineInstr::FrameSetup);
+
+ // B loop
+ BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::B))
+ .addMBB(LoopTestMBB)
+ .setMIFlags(MachineInstr::FrameSetup);
+
+ // LoopExit:
+ // STR XZR, [TargetReg]
+ BuildMI(*ExitMBB, ExitMBB->begin(), DL, TII->get(AArch64::STRXui))
+ .addReg(AArch64::XZR)
+ .addReg(TargetReg)
+ .addImm(0)
+ .setMIFlags(MachineInstr::FrameSetup);
----------------
oskarwirga wrote:
> Can you spot a place where the probe instruction is not immediately after a decrement of the stack (disregarding some random register-to-register arithmetic that may appear)?
This was the thread that led to me understanding what is happening:
```
sub sp, sp, #0x1, lsl #0xc
cmp sp, x1
b.le 0x5555557388
str xzr, [x1] {0x0}
```
We are probing the _old_ stack head! `x1` contains `0x7fffffee80` but `sp` is at `7fffffde80`! This means that the selection of the `x1` register instead of `sp` is incorrect.
I confirmed this to be the case by fixing this probe here and testing again.
```suggestion
// STR XZR, [ScratchReg]
BuildMI(*ExitMBB, ExitMBB->begin(), DL, TII->get(AArch64::STRXui))
.addReg(AArch64::XZR)
.addReg(ScratchReg)
.addImm(0)
.setMIFlags(MachineInstr::FrameSetup);
```
https://github.com/llvm/llvm-project/pull/66524
More information about the cfe-commits
mailing list