[llvm] r321150 - [AArch64] Implement stack probing for windows
Martin Storsjo via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 19 22:51:45 PST 2017
Author: mstorsjo
Date: Tue Dec 19 22:51:45 2017
New Revision: 321150
URL: http://llvm.org/viewvc/llvm-project?rev=321150&view=rev
Log:
[AArch64] Implement stack probing for windows
Differential Revision: https://reviews.llvm.org/D41131
Added:
llvm/trunk/test/CodeGen/AArch64/chkstk.ll
Modified:
llvm/trunk/docs/Extensions.rst
llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp
Modified: llvm/trunk/docs/Extensions.rst
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/docs/Extensions.rst?rev=321150&r1=321149&r2=321150&view=diff
==============================================================================
--- llvm/trunk/docs/Extensions.rst (original)
+++ llvm/trunk/docs/Extensions.rst Tue Dec 19 22:51:45 2017
@@ -288,3 +288,31 @@ standard stack probe emission.
The MSVC environment does not emit code for VLAs currently.
+Windows on ARM64
+----------------
+
+Stack Probe Emission
+^^^^^^^^^^^^^^^^^^^^
+
+The reference implementation (Microsoft Visual Studio 2017) emits stack probes
+in the following fashion:
+
+.. code-block:: gas
+
+ mov x15, #constant
+ bl __chkstk
+ sub sp, sp, x15, lsl #4
+
+However, this has the limitation of 256 MiB (±128MiB). In order to accommodate
+larger binaries, LLVM supports the use of ``-mcode-model=large`` to allow a 8GiB
+(±4GiB) range via a slight deviation. It will generate an indirect jump as
+follows:
+
+.. code-block:: gas
+
+ mov x15, #constant
+ adrp x16, __chkstk
+ add x16, x16, :lo12:__chkstk
+ blr x16
+ sub sp, sp, x15, lsl #4
+
Modified: llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp?rev=321150&r1=321149&r2=321150&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp Tue Dec 19 22:51:45 2017
@@ -97,6 +97,7 @@
#include "AArch64RegisterInfo.h"
#include "AArch64Subtarget.h"
#include "AArch64TargetMachine.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LivePhysRegs.h"
@@ -335,6 +336,22 @@ bool AArch64FrameLowering::canUseAsProlo
return findScratchNonCalleeSaveRegister(TmpMBB) != AArch64::NoRegister;
}
+static bool windowsRequiresStackProbe(MachineFunction &MF,
+ unsigned StackSizeInBytes) {
+ const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+ if (!Subtarget.isTargetWindows())
+ return false;
+ const Function &F = MF.getFunction();
+ // TODO: When implementing stack protectors, take that into account
+ // for the probe threshold.
+ unsigned StackProbeSize = 4096;
+ if (F.hasFnAttribute("stack-probe-size"))
+ F.getFnAttribute("stack-probe-size")
+ .getValueAsString()
+ .getAsInteger(0, StackProbeSize);
+ return StackSizeInBytes >= StackProbeSize;
+}
+
bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
MachineFunction &MF, unsigned StackBumpBytes) const {
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
@@ -347,7 +364,7 @@ bool AArch64FrameLowering::shouldCombine
// 512 is the maximum immediate for stp/ldp that will be used for
// callee-save save/restores
- if (StackBumpBytes >= 512)
+ if (StackBumpBytes >= 512 || windowsRequiresStackProbe(MF, StackBumpBytes))
return false;
if (MFI.hasVarSizedObjects())
@@ -478,7 +495,7 @@ void AArch64FrameLowering::emitPrologue(
return;
int NumBytes = (int)MFI.getStackSize();
- if (!AFI->hasStackFrame()) {
+ if (!AFI->hasStackFrame() && !windowsRequiresStackProbe(MF, NumBytes)) {
assert(!HasFP && "unexpected function without stack frame but with FP");
// All of the stack allocation is for locals.
@@ -550,6 +567,44 @@ void AArch64FrameLowering::emitPrologue(
MachineInstr::FrameSetup);
}
+ if (windowsRequiresStackProbe(MF, NumBytes)) {
+ uint32_t NumWords = NumBytes >> 4;
+
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), AArch64::X15)
+ .addImm(NumWords)
+ .setMIFlags(MachineInstr::FrameSetup);
+
+ switch (MF.getTarget().getCodeModel()) {
+ case CodeModel::Small:
+ case CodeModel::Medium:
+ case CodeModel::Kernel:
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
+ .addExternalSymbol("__chkstk")
+ .addReg(AArch64::X15, RegState::Implicit)
+ .setMIFlags(MachineInstr::FrameSetup);
+ break;
+ case CodeModel::Large:
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVaddrEXT))
+ .addReg(AArch64::X16, RegState::Define)
+ .addExternalSymbol("__chkstk")
+ .addExternalSymbol("__chkstk")
+ .setMIFlags(MachineInstr::FrameSetup);
+
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::BLR))
+ .addReg(AArch64::X16, RegState::Kill)
+ .addReg(AArch64::X15, RegState::Implicit | RegState::Define)
+ .setMIFlags(MachineInstr::FrameSetup);
+ break;
+ }
+
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SUBXrx64), AArch64::SP)
+ .addReg(AArch64::SP, RegState::Kill)
+ .addReg(AArch64::X15, RegState::Kill)
+ .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 4))
+ .setMIFlags(MachineInstr::FrameSetup);
+ NumBytes = 0;
+ }
+
// Allocate space for the rest of the frame.
if (NumBytes) {
const bool NeedsRealignment = RegInfo->needsStackRealignment(MF);
@@ -1164,18 +1219,32 @@ void AArch64FrameLowering::determineCall
unsigned UnspilledCSGPR = AArch64::NoRegister;
unsigned UnspilledCSGPRPaired = AArch64::NoRegister;
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
+
+ unsigned BasePointerReg = RegInfo->hasBasePointer(MF)
+ ? RegInfo->getBaseRegister()
+ : (unsigned)AArch64::NoRegister;
+
+ unsigned SpillEstimate = SavedRegs.count();
+ for (unsigned i = 0; CSRegs[i]; ++i) {
+ unsigned Reg = CSRegs[i];
+ unsigned PairedReg = CSRegs[i ^ 1];
+ if (Reg == BasePointerReg)
+ SpillEstimate++;
+ if (produceCompactUnwindFrame(MF) && !SavedRegs.test(PairedReg))
+ SpillEstimate++;
+ }
+ SpillEstimate += 2; // Conservatively include FP+LR in the estimate
+ unsigned StackEstimate = MFI.estimateStackSize(MF) + 8 * SpillEstimate;
+
// The frame record needs to be created by saving the appropriate registers
- if (hasFP(MF)) {
+ if (hasFP(MF) || windowsRequiresStackProbe(MF, StackEstimate)) {
SavedRegs.set(AArch64::FP);
SavedRegs.set(AArch64::LR);
}
- unsigned BasePointerReg = AArch64::NoRegister;
- if (RegInfo->hasBasePointer(MF))
- BasePointerReg = RegInfo->getBaseRegister();
-
unsigned ExtraCSSpill = 0;
- const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
// Figure out which callee-saved registers to save/restore.
for (unsigned i = 0; CSRegs[i]; ++i) {
const unsigned Reg = CSRegs[i];
@@ -1217,7 +1286,6 @@ void AArch64FrameLowering::determineCall
// The CSR spill slots have not been allocated yet, so estimateStackSize
// won't include them.
- MachineFrameInfo &MFI = MF.getFrameInfo();
unsigned CFSize = MFI.estimateStackSize(MF) + 8 * NumRegsSpilled;
DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n");
unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit(MF);
Added: llvm/trunk/test/CodeGen/AArch64/chkstk.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/chkstk.ll?rev=321150&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/chkstk.ll (added)
+++ llvm/trunk/test/CodeGen/AArch64/chkstk.ll Tue Dec 19 22:51:45 2017
@@ -0,0 +1,25 @@
+; RUN: llc -mtriple=aarch64-windows -verify-machineinstrs %s -o - \
+; RUN: | FileCheck -check-prefix CHECK-DEFAULT-CODE-MODEL %s
+
+; RUN: llc -mtriple=aarch64-windows -verify-machineinstrs -code-model=large %s -o - \
+; RUN: | FileCheck -check-prefix CHECK-LARGE-CODE-MODEL %s
+
+define void @check_watermark() {
+entry:
+ %buffer = alloca [4096 x i8], align 1
+ ret void
+}
+
+; CHECK-DEFAULT-CODE-MODEL: check_watermark:
+; CHECK-DEFAULT-CODE-MODEL-DAG: stp x29, x30, [sp
+; CHECK-DEFAULT-CODE-MODEL-DAG: orr x15, xzr, #0x100
+; CHECK-DEFAULT-CODE-MODEL: bl __chkstk
+; CHECK-DEFAULT-CODE-MODEL: sub sp, sp, x15, lsl #4
+
+; CHECK-LARGE-CODE-MODEL: check_watermark:
+; CHECK-LARGE-CODE-MODEL-DAG: stp x29, x30, [sp
+; CHECK-LARGE-CODE-MODEL-DAG: orr x15, xzr, #0x100
+; CHECK-LARGE-CODE-MODEL-DAG: adrp x16, __chkstk
+; CHECK-LARGE-CODE-MODEL-DAG: add x16, x16, __chkstk
+; CHECK-LARGE-CODE-MODEL: blr x16
+; CHECK-LARGE-CODE-MODEL: sub sp, sp, x15, lsl #4
More information about the llvm-commits
mailing list