[PATCH] ARM: support stack probe emission for Windows on ARM

Tue Apr 1 16:11:40 PDT 2014

Hi t.p.northover, rnk,

This introduces the stack lowering emission of the stack probe function for
Windows on ARM.  The stack on Windows on ARM is a dynamically paged stack where
any page allocation which crosses a page boundary of the following guard page
will cause a page fault.  This page fault must be handled by the kernel to
ensure that the page is faulted in.  If this does not occur and a write access
any memory beyond that, the page fault will go unserviced, resulting in an
abnormal program termination.

The watermark for the stack probe appears to be at 4080 bytes (for accommodating
the stack guard canaries and stack alignment).  Follow suit and emit it at that
water mark (this would ideally be done similarly where if SSP is disabled, we
would use the 4096 value).

http://llvm-reviews.chandlerc.com/D3255

Files:
  docs/Extensions.rst
  lib/Target/ARM/ARMFrameLowering.cpp
  test/CodeGen/ARM/Windows/chkstk.ll

Index: docs/Extensions.rst
===================================================================

--- docs/Extensions.rst
+++ docs/Extensions.rst
@@ -159,3 +159,34 @@
   .globl Symbol2
   Symbol2:
   .long 1
+
+Target Specific Behaviour
+=========================
+
+Windows on ARM
+--------------
+
+Stack Probe Emission
+^^^^^^^^^^^^^^^^^^^^
+
+The reference implementation (Microsoft Visual Studio 2012) emits stack probes
+in the following fashion:
+
+.. code-block:: gas
+
+  movw r4, #constant
+  bl __chkstk
+  sub.w sp, sp, r4
+
+However, this has the limitation of 32 MiB (±16MiB).  In order to accomodate
+larger binaries, LLVM supports the use of ``-mcode-model=large`` to allow a 4GiB
+range via a slight deviation.  It will generate an indirect jump as follows:
+
+.. code-block:: gas
+
+  movw r4, #constant
+  movw r12, :lower16:__chkstk
+  movt r12, :upper16:__chkstk
+  blx r12
+  sub.w sp, sp, r4
+
Index: lib/Target/ARM/ARMFrameLowering.cpp
===================================================================
--- lib/Target/ARM/ARMFrameLowering.cpp
+++ lib/Target/ARM/ARMFrameLowering.cpp
@@ -24,6 +24,7 @@
 #include "llvm/CodeGen/RegisterScavenging.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Target/TargetOptions.h"
@@ -148,15 +149,16 @@
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   MachineModuleInfo &MMI = MF.getMMI();
   MCContext &Context = MMI.getContext();
+  const TargetMachine &TM = MF.getTarget();
   const MCRegisterInfo *MRI = Context.getRegisterInfo();
   const ARMBaseRegisterInfo *RegInfo =
-    static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo());
+    static_cast<const ARMBaseRegisterInfo*>(TM.getRegisterInfo());
   const ARMBaseInstrInfo &TII =
-    *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
+    *static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo());
   assert(!AFI->isThumb1OnlyFunction() &&
          "This emitPrologue does not support Thumb1!");
   bool isARM = !AFI->isThumbFunction();
-  unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
+  unsigned Align = TM.getFrameLowering()->getStackAlignment();
   unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align);
   unsigned NumBytes = MFI->getStackSize();
   const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
@@ -186,7 +188,8 @@
         .addCFIIndex(CFIIndex);
   }
 
-  if (!AFI->hasStackFrame()) {
+  if (!AFI->hasStackFrame() &&
+      !(NumBytes >= 4080 /* 4096 */ && STI.isTargetWindows())) {
     if (NumBytes - ArgRegsSaveSize != 0) {
       emitSPUpdate(isARM, MBB, MBBI, dl, TII, -(NumBytes - ArgRegsSaveSize),
                    MachineInstr::FrameSetup);
@@ -283,6 +286,52 @@
   } else
     NumBytes = DPRCSOffset;
 
+  if (STI.isTargetWindows()) {
+    if (NumBytes >= 4080 /* 4096 */) {
+      uint32_t NumWords = NumBytes >> 2;
+
+      if (NumWords < 65536)
+        AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
+                       .addImm(NumWords));
+      else
+        BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R4)
+          .addImm(NumWords);
+
+      switch (TM.getCodeModel()) {
+      case CodeModel::Small:
+      case CodeModel::Medium:
+      case CodeModel::Default:
+      case CodeModel::Kernel:
+        BuildMI(MBB, MBBI, dl, TII.get(ARM::tBL))
+          .addImm((unsigned)ARMCC::AL).addReg(0)
+          .addExternalSymbol("__chkstk")
+          .addReg(ARM::R4, RegState::Implicit);
+        break;
+      case CodeModel::Large:
+      case CodeModel::JITDefault: {
+        LLVMContext &Ctx = MF.getMMI().getModule()->getContext();
+        const GlobalValue *F =
+          Function::Create(FunctionType::get(Type::getVoidTy(Ctx), false),
+                           GlobalValue::AvailableExternallyLinkage, "__chkstk");
+
+        BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R12)
+          .addGlobalAddress(F);
+        BuildMI(MBB, MBBI, dl, TII.get(ARM::BLX))
+          .addReg(ARM::R12, RegState::Kill)
+          .addReg(ARM::R4, RegState::Implicit);
+        break;
+      }
+      }
+
+      AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr),
+                                          ARM::SP)
+                                  .addReg(ARM::SP, RegState::Define)
+                                  .addReg(ARM::R4, RegState::Kill)
+                                  .setMIFlags(MachineInstr::FrameSetup)));
+      NumBytes = 0;
+    }
+  }
+
   unsigned adjustedGPRCS1Size = GPRCS1Size;
   if (NumBytes) {
     // Adjust SP after all the callee-save spills.
Index: test/CodeGen/ARM/Windows/chkstk.ll
===================================================================
--- /dev/null
+++ test/CodeGen/ARM/Windows/chkstk.ll
@@ -0,0 +1,26 @@
+; RUN: llc -mtriple=thumbv7-windows -mcpu=cortex-a9 %s -o - \
+; RUN:  | FileCheck -check-prefix CHECK-DEFAULT-CODE-MODEL %s
+
+; RUN: llc -mtriple=thumbv7-windows -mcpu=cortex-a9 -code-model=large %s -o - \
+; RUN:  | FileCheck -check-prefix CHECK-LARGE-CODE-MODEL %s
+
+define arm_aapcs_vfpcc void @check_watermark() #0 {
+entry:
+  %buffer = alloca [4080 x i8], align 1
+  ret void
+}
+
+; CHECK-DEFAULT-CODE-MODEL: check_watermark:
+; CHECK-DEFAULT-CODE-MODEL: 	movw r4, #1020
+; CHECK-DEFAULT-CODE-MODEL: 	bl __chkstk
+; CHECK-DEFAULT-CODE-MODEL: 	sub.w sp, sp, r4
+
+; CHECK-LARGE-CODE-MODEL: check_watermark:
+; CHECK-LARGE-CODE-MODEL: 	movw r12, :lower16:__chkstk
+; CHECK-LARGE-CODE-MODEL: 	movt r12, :upper16:__chkstk
+; CHECK-LARGE-CODE-MODEL: 	movw r4, #1020
+; CHECK-LARGE-CODE-MODEL: 	blx r12
+; CHECK-LARGE-CODE-MODEL: 	sub.w sp, sp, r4
+
+attributes #0 = { nounwind }
+
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D3255.1.patch
Type: text/x-patch
Size: 5801 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140401/6a1e2820/attachment.bin>