[clang] [AArch64] Stack probing for function prologues (PR #66524)

Oskar Wirga via cfe-commits cfe-commits at lists.llvm.org
Thu Oct 26 18:32:10 PDT 2023


================
@@ -9460,6 +9461,94 @@ bool AArch64InstrInfo::isReallyTriviallyReMaterializable(
   return TargetInstrInfo::isReallyTriviallyReMaterializable(MI);
 }
 
+MachineBasicBlock::iterator
+AArch64InstrInfo::insertStackProbingLoop(MachineBasicBlock::iterator MBBI,
+                                         Register ScratchReg,
+                                         Register TargetReg) const {
+  MachineBasicBlock &MBB = *MBBI->getParent();
+  MachineFunction &MF = *MBB.getParent();
+  const AArch64InstrInfo *TII =
+      MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
+  int64_t ProbeSize = MF.getInfo<AArch64FunctionInfo>()->getStackProbeSize();
+  DebugLoc DL = MBB.findDebugLoc(MBBI);
+
+  MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
+  MachineBasicBlock *LoopTestMBB =
+      MF.CreateMachineBasicBlock(MBB.getBasicBlock());
+  MF.insert(MBBInsertPoint, LoopTestMBB);
+  MachineBasicBlock *LoopBodyMBB =
+      MF.CreateMachineBasicBlock(MBB.getBasicBlock());
+  MF.insert(MBBInsertPoint, LoopBodyMBB);
+  MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock());
+  MF.insert(MBBInsertPoint, ExitMBB);
+
+  // LoopTest:
+  //   SUB ScratchReg, ScratchReg, #ProbeSize
+  emitFrameOffset(*LoopTestMBB, LoopTestMBB->end(), DL, ScratchReg, ScratchReg,
+                  StackOffset::getFixed(-ProbeSize), TII,
+                  MachineInstr::FrameSetup);
+
+  //   CMP ScratchReg, TargetReg
+  AArch64CC::CondCode Cond = AArch64CC::LE;
+  Register Op1 = ScratchReg;
+  Register Op2 = TargetReg;
+  if (Op2 == AArch64::SP) {
+    assert(Op1 != AArch64::SP && "At most one of the registers can be SP");
+    // CMP TargetReg, ScratchReg
+    std::swap(Op1, Op2);
+    Cond = AArch64CC::GT;
+  }
+  BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::SUBSXrx64),
+          AArch64::XZR)
+      .addReg(Op1)
+      .addReg(Op2)
+      .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 0))
+      .setMIFlags(MachineInstr::FrameSetup);
+
+  //   B.<Cond> LoopExit
+  BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::Bcc))
+      .addImm(Cond)
+      .addMBB(ExitMBB)
+      .setMIFlags(MachineInstr::FrameSetup);
+
+  //   STR XZR, [ScratchReg]
+  BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::STRXui))
+      .addReg(AArch64::XZR)
+      .addReg(ScratchReg)
+      .addImm(0)
+      .setMIFlags(MachineInstr::FrameSetup);
+
+  //   B loop
+  BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::B))
+      .addMBB(LoopTestMBB)
+      .setMIFlags(MachineInstr::FrameSetup);
+
+  // LoopExit:
+  //   STR XZR, [TargetReg]
+  BuildMI(*ExitMBB, ExitMBB->begin(), DL, TII->get(AArch64::STRXui))
+      .addReg(AArch64::XZR)
+      .addReg(TargetReg)
+      .addImm(0)
+      .setMIFlags(MachineInstr::FrameSetup);
----------------
oskarwirga wrote:

> Can you spot a place where the probe instruction is not immediately after a decrement of the stack (disregarding some random register-to-register arithmetic that may appear)?

This was the thread that led to me understanding what is happening:
```
sub     sp, sp, #0x1, lsl #0xc
cmp     sp, x1
b.le    0x5555557388
str     xzr, [x1]  {0x0}
```

We are probing the _old_ stack head! `x1` contains `0x7fffffee80` but `sp` is at `7fffffde80`! This means that the selection of the `x1` register instead of `sp` is incorrect.

I confirmed this to be the case by fixing this probe here and testing again.

```suggestion
  //   STR XZR, [ScratchReg]
  BuildMI(*ExitMBB, ExitMBB->begin(), DL, TII->get(AArch64::STRXui))
      .addReg(AArch64::XZR)
      .addReg(ScratchReg)
      .addImm(0)
      .setMIFlags(MachineInstr::FrameSetup);
```

https://github.com/llvm/llvm-project/pull/66524


More information about the cfe-commits mailing list