[clang] [llvm] [AArch64] Stack probing for function prologues (PR #66524)

Momchil Velikov via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 21 16:41:13 PST 2023


================
@@ -672,6 +673,74 @@ void AArch64FrameLowering::emitCalleeSavedSVERestores(
   emitCalleeSavedRestores(MBB, MBBI, true);
 }
 
+void AArch64FrameLowering::allocateSVEStackSpace(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+    StackOffset AllocSize, StackOffset InitialOffset, bool EmitCFI) const {
+  DebugLoc DL;
+  MachineFunction &MF = *MBB.getParent();
+  const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+  const AArch64RegisterInfo &RegInfo = *Subtarget.getRegisterInfo();
+  const AArch64TargetLowering &TLI = *Subtarget.getTargetLowering();
+  const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+
+  // If not probing the stack or the (uknown) allocation size is less than the
+  // probe size decrement the stack pointer right away. This avoids having to
+  // emit a probing loop when allocating space for up to 16 SVE registers when
+  // using 4k probes.
+
+  // The bit-length of SVE registers is architecturally limited.
+  const int64_t MAX_BYTES_PER_SCALABLE_BYTE = 16;
+  int64_t ProbeSize = TLI.getStackProbeSize(MF);
+  if (!TLI.hasInlineStackProbe(MF) ||
+      AllocSize.getScalable() * MAX_BYTES_PER_SCALABLE_BYTE +
+              AllocSize.getFixed() <=
+          ProbeSize) {
+    emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -AllocSize, &TII,
+                    MachineInstr::FrameSetup, false, false, nullptr, EmitCFI,
+                    InitialOffset);
+    if (TLI.hasInlineStackProbe(MF)) {
+      // Issue a probe at the top of the stack to prepare for subsequent
+      // allocations.
+      // STR XZR, [TargetReg]
+      BuildMI(MBB, MBBI, DL, TII.get(AArch64::STRXui))
+          .addReg(AArch64::XZR)
+          .addReg(AArch64::SP)
+          .addImm(0)
+          .setMIFlags(MachineInstr::FrameSetup);
+    }
+    return;
+  }
+
+  // If we can't be sure the allocation size if less than the probe size, we
+  // have to emit a stack probing loop.
+  Register ScratchReg = findScratchNonCalleeSaveRegister(&MBB);
+  assert(ScratchReg != AArch64::NoRegister);
+  // Get the new top of the stack into a scratch register.
+  emitFrameOffset(MBB, MBBI, DL, ScratchReg, AArch64::SP, -AllocSize, &TII,
+                  MachineInstr::FrameSetup, false, false, nullptr, EmitCFI,
+                  InitialOffset);
+  // Arrange to emit a probing loop by decrementing SP until it reaches that
+  // new top of the stack.
+  BuildMI(MBB, MBBI, DL, TII.get(AArch64::PROBED_STACKALLOC_VAR), AArch64::SP)
+      .addReg(ScratchReg);
+  // Set SP to its new value.
+  // MOV SP, Xs
+  BuildMI(MBB, MBBI, DL, TII.get(AArch64::ADDXri), AArch64::SP)
+      .addReg(ScratchReg)
+      .addImm(0)
+      .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
+      .setMIFlags(MachineInstr::FrameSetup);
+  if (EmitCFI) {
----------------
momchil-velikov wrote:

What if we have FP?

https://github.com/llvm/llvm-project/pull/66524


More information about the llvm-commits mailing list