[llvm] r321150 - [AArch64] Implement stack probing for windows

Martin Storsjo via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 19 22:51:45 PST 2017


Author: mstorsjo
Date: Tue Dec 19 22:51:45 2017
New Revision: 321150

URL: http://llvm.org/viewvc/llvm-project?rev=321150&view=rev
Log:
[AArch64] Implement stack probing for windows

Differential Revision: https://reviews.llvm.org/D41131

Added:
    llvm/trunk/test/CodeGen/AArch64/chkstk.ll
Modified:
    llvm/trunk/docs/Extensions.rst
    llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp

Modified: llvm/trunk/docs/Extensions.rst
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/docs/Extensions.rst?rev=321150&r1=321149&r2=321150&view=diff
==============================================================================
--- llvm/trunk/docs/Extensions.rst (original)
+++ llvm/trunk/docs/Extensions.rst Tue Dec 19 22:51:45 2017
@@ -288,3 +288,31 @@ standard stack probe emission.
 
 The MSVC environment does not emit code for VLAs currently.
 
+Windows on ARM64
+----------------
+
+Stack Probe Emission
+^^^^^^^^^^^^^^^^^^^^
+
+The reference implementation (Microsoft Visual Studio 2017) emits stack probes
+in the following fashion:
+
+.. code-block:: gas
+
+  mov x15, #constant
+  bl __chkstk
+  sub sp, sp, x15, lsl #4
+
+However, this has the limitation of 256 MiB (±128MiB).  In order to accommodate
+larger binaries, LLVM supports the use of ``-mcode-model=large`` to allow a 8GiB
+(±4GiB) range via a slight deviation.  It will generate an indirect jump as
+follows:
+
+.. code-block:: gas
+
+  mov x15, #constant
+  adrp x16, __chkstk
+  add x16, x16, :lo12:__chkstk
+  blr x16
+  sub sp, sp, x15, lsl #4
+

Modified: llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp?rev=321150&r1=321149&r2=321150&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp Tue Dec 19 22:51:45 2017
@@ -97,6 +97,7 @@
 #include "AArch64RegisterInfo.h"
 #include "AArch64Subtarget.h"
 #include "AArch64TargetMachine.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LivePhysRegs.h"
@@ -335,6 +336,22 @@ bool AArch64FrameLowering::canUseAsProlo
   return findScratchNonCalleeSaveRegister(TmpMBB) != AArch64::NoRegister;
 }
 
+static bool windowsRequiresStackProbe(MachineFunction &MF,
+                                      unsigned StackSizeInBytes) {
+  const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+  if (!Subtarget.isTargetWindows())
+    return false;
+  const Function &F = MF.getFunction();
+  // TODO: When implementing stack protectors, take that into account
+  // for the probe threshold.
+  unsigned StackProbeSize = 4096;
+  if (F.hasFnAttribute("stack-probe-size"))
+    F.getFnAttribute("stack-probe-size")
+        .getValueAsString()
+        .getAsInteger(0, StackProbeSize);
+  return StackSizeInBytes >= StackProbeSize;
+}
+
 bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
     MachineFunction &MF, unsigned StackBumpBytes) const {
   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
@@ -347,7 +364,7 @@ bool AArch64FrameLowering::shouldCombine
 
   // 512 is the maximum immediate for stp/ldp that will be used for
   // callee-save save/restores
-  if (StackBumpBytes >= 512)
+  if (StackBumpBytes >= 512 || windowsRequiresStackProbe(MF, StackBumpBytes))
     return false;
 
   if (MFI.hasVarSizedObjects())
@@ -478,7 +495,7 @@ void AArch64FrameLowering::emitPrologue(
     return;
 
   int NumBytes = (int)MFI.getStackSize();
-  if (!AFI->hasStackFrame()) {
+  if (!AFI->hasStackFrame() && !windowsRequiresStackProbe(MF, NumBytes)) {
     assert(!HasFP && "unexpected function without stack frame but with FP");
 
     // All of the stack allocation is for locals.
@@ -550,6 +567,44 @@ void AArch64FrameLowering::emitPrologue(
                     MachineInstr::FrameSetup);
   }
 
+  if (windowsRequiresStackProbe(MF, NumBytes)) {
+    uint32_t NumWords = NumBytes >> 4;
+
+    BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), AArch64::X15)
+        .addImm(NumWords)
+        .setMIFlags(MachineInstr::FrameSetup);
+
+    switch (MF.getTarget().getCodeModel()) {
+    case CodeModel::Small:
+    case CodeModel::Medium:
+    case CodeModel::Kernel:
+      BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
+          .addExternalSymbol("__chkstk")
+          .addReg(AArch64::X15, RegState::Implicit)
+          .setMIFlags(MachineInstr::FrameSetup);
+      break;
+    case CodeModel::Large:
+      BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVaddrEXT))
+          .addReg(AArch64::X16, RegState::Define)
+          .addExternalSymbol("__chkstk")
+          .addExternalSymbol("__chkstk")
+          .setMIFlags(MachineInstr::FrameSetup);
+
+      BuildMI(MBB, MBBI, DL, TII->get(AArch64::BLR))
+          .addReg(AArch64::X16, RegState::Kill)
+          .addReg(AArch64::X15, RegState::Implicit | RegState::Define)
+          .setMIFlags(MachineInstr::FrameSetup);
+      break;
+    }
+
+    BuildMI(MBB, MBBI, DL, TII->get(AArch64::SUBXrx64), AArch64::SP)
+        .addReg(AArch64::SP, RegState::Kill)
+        .addReg(AArch64::X15, RegState::Kill)
+        .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 4))
+        .setMIFlags(MachineInstr::FrameSetup);
+    NumBytes = 0;
+  }
+
   // Allocate space for the rest of the frame.
   if (NumBytes) {
     const bool NeedsRealignment = RegInfo->needsStackRealignment(MF);
@@ -1164,18 +1219,32 @@ void AArch64FrameLowering::determineCall
   unsigned UnspilledCSGPR = AArch64::NoRegister;
   unsigned UnspilledCSGPRPaired = AArch64::NoRegister;
 
+  MachineFrameInfo &MFI = MF.getFrameInfo();
+  const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
+
+  unsigned BasePointerReg = RegInfo->hasBasePointer(MF)
+                                ? RegInfo->getBaseRegister()
+                                : (unsigned)AArch64::NoRegister;
+
+  unsigned SpillEstimate = SavedRegs.count();
+  for (unsigned i = 0; CSRegs[i]; ++i) {
+    unsigned Reg = CSRegs[i];
+    unsigned PairedReg = CSRegs[i ^ 1];
+    if (Reg == BasePointerReg)
+      SpillEstimate++;
+    if (produceCompactUnwindFrame(MF) && !SavedRegs.test(PairedReg))
+      SpillEstimate++;
+  }
+  SpillEstimate += 2; // Conservatively include FP+LR in the estimate
+  unsigned StackEstimate = MFI.estimateStackSize(MF) + 8 * SpillEstimate;
+
   // The frame record needs to be created by saving the appropriate registers
-  if (hasFP(MF)) {
+  if (hasFP(MF) || windowsRequiresStackProbe(MF, StackEstimate)) {
     SavedRegs.set(AArch64::FP);
     SavedRegs.set(AArch64::LR);
   }
 
-  unsigned BasePointerReg = AArch64::NoRegister;
-  if (RegInfo->hasBasePointer(MF))
-    BasePointerReg = RegInfo->getBaseRegister();
-
   unsigned ExtraCSSpill = 0;
-  const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
   // Figure out which callee-saved registers to save/restore.
   for (unsigned i = 0; CSRegs[i]; ++i) {
     const unsigned Reg = CSRegs[i];
@@ -1217,7 +1286,6 @@ void AArch64FrameLowering::determineCall
 
   // The CSR spill slots have not been allocated yet, so estimateStackSize
   // won't include them.
-  MachineFrameInfo &MFI = MF.getFrameInfo();
   unsigned CFSize = MFI.estimateStackSize(MF) + 8 * NumRegsSpilled;
   DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n");
   unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit(MF);

Added: llvm/trunk/test/CodeGen/AArch64/chkstk.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/chkstk.ll?rev=321150&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/chkstk.ll (added)
+++ llvm/trunk/test/CodeGen/AArch64/chkstk.ll Tue Dec 19 22:51:45 2017
@@ -0,0 +1,25 @@
+; RUN: llc -mtriple=aarch64-windows -verify-machineinstrs %s -o - \
+; RUN:  | FileCheck -check-prefix CHECK-DEFAULT-CODE-MODEL %s
+
+; RUN: llc -mtriple=aarch64-windows -verify-machineinstrs -code-model=large %s -o - \
+; RUN:  | FileCheck -check-prefix CHECK-LARGE-CODE-MODEL %s
+
+define void @check_watermark() {
+entry:
+  %buffer = alloca [4096 x i8], align 1
+  ret void
+}
+
+; CHECK-DEFAULT-CODE-MODEL: check_watermark:
+; CHECK-DEFAULT-CODE-MODEL-DAG: stp x29, x30, [sp
+; CHECK-DEFAULT-CODE-MODEL-DAG: orr x15, xzr, #0x100
+; CHECK-DEFAULT-CODE-MODEL:     bl __chkstk
+; CHECK-DEFAULT-CODE-MODEL:     sub sp, sp, x15, lsl #4
+
+; CHECK-LARGE-CODE-MODEL: check_watermark:
+; CHECK-LARGE-CODE-MODEL-DAG: stp x29, x30, [sp
+; CHECK-LARGE-CODE-MODEL-DAG: orr x15, xzr, #0x100
+; CHECK-LARGE-CODE-MODEL-DAG: adrp x16, __chkstk
+; CHECK-LARGE-CODE-MODEL-DAG: add x16, x16, __chkstk
+; CHECK-LARGE-CODE-MODEL:     blr x16
+; CHECK-LARGE-CODE-MODEL:     sub sp, sp, x15, lsl #4




More information about the llvm-commits mailing list