[llvm] r234471 - [AArch64] Add support for dynamic stack alignment
Kristof Beyls
kristof.beyls at arm.com
Thu Apr 9 01:49:47 PDT 2015
Author: kbeyls
Date: Thu Apr 9 03:49:47 2015
New Revision: 234471
URL: http://llvm.org/viewvc/llvm-project?rev=234471&view=rev
Log:
[AArch64] Add support for dynamic stack alignment
Differential Revision: http://reviews.llvm.org/D8876
Added:
llvm/trunk/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll
Modified:
llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp
llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.h
llvm/trunk/lib/Target/AArch64/AArch64RegisterInfo.cpp
llvm/trunk/lib/Target/AArch64/AArch64RegisterInfo.h
Modified: llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp?rev=234471&r1=234470&r2=234471&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp Thu Apr 9 03:49:47 2015
@@ -9,6 +9,82 @@
//
// This file contains the AArch64 implementation of TargetFrameLowering class.
//
+// On AArch64, stack frames are structured as follows:
+//
+// The stack grows downward.
+//
+// All of the individual frame areas on the frame below are optional, i.e. it's
+// possible to create a function so that the particular area isn't present
+// in the frame.
+//
+// At function entry, the "frame" looks as follows:
+//
+// | | Higher address
+// |-----------------------------------|
+// | |
+// | arguments passed on the stack |
+// | |
+// |-----------------------------------| <- sp
+// | | Lower address
+//
+//
+// After the prologue has run, the frame has the following general structure.
+// Note that this doesn't depict the case where a red-zone is used. Also,
+// technically the last frame area (VLAs) doesn't get created until in the
+// main function body, after the prologue is run. However, it's depicted here
+// for completeness.
+//
+// | | Higher address
+// |-----------------------------------|
+// | |
+// | arguments passed on the stack |
+// | |
+// |-----------------------------------|
+// | |
+// | prev_fp, prev_lr |
+// | (a.k.a. "frame record") |
+// |-----------------------------------| <- fp(=x29)
+// | |
+// | other callee-saved registers |
+// | |
+// |-----------------------------------|
+// |.empty.space.to.make.part.below....|
+// |.aligned.in.case.it.needs.more.than| (size of this area is unknown at
+// |.the.standard.16-byte.alignment....| compile time; if present)
+// |-----------------------------------|
+// | |
+// | local variables of fixed size |
+// | including spill slots |
+// |-----------------------------------| <- bp(not defined by ABI,
+// |.variable-sized.local.variables....| LLVM chooses X19)
+// |.(VLAs)............................| (size of this area is unknown at
+// |...................................| compile time)
+// |-----------------------------------| <- sp
+// | | Lower address
+//
+//
+// To access the data in a frame, at-compile time, a constant offset must be
+// computable from one of the pointers (fp, bp, sp) to access it. The size
+// of the areas with a dotted background cannot be computed at compile-time
+// if they are present, making it required to have all three of fp, bp and
+// sp to be set up to be able to access all contents in the frame areas,
+// assuming all of the frame areas are non-empty.
+//
+// For most functions, some of the frame areas are empty. For those functions,
+// it may not be necessary to set up fp or bp:
+// * A base pointer is definitly needed when there are both VLAs and local
+// variables with more-than-default alignment requirements.
+// * A frame pointer is definitly needed when there are local variables with
+// more-than-default alignment requirements.
+//
+// In some cases when a base pointer is not strictly needed, it is generated
+// anyway when offsets from the frame pointer to access local variables become
+// so large that the offset can't be encoded in the immediate fields of loads
+// or stores.
+//
+// FIXME: also explain the redzone concept.
+// FIXME: also explain the concept of reserved call frames.
+//
//===----------------------------------------------------------------------===//
#include "AArch64FrameLowering.h"
@@ -39,26 +115,6 @@ static cl::opt<bool> EnableRedZone("aarc
STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
-static unsigned estimateStackSize(MachineFunction &MF) {
- const MachineFrameInfo *FFI = MF.getFrameInfo();
- int Offset = 0;
- for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) {
- int FixedOff = -FFI->getObjectOffset(i);
- if (FixedOff > Offset)
- Offset = FixedOff;
- }
- for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) {
- if (FFI->isDeadObjectIndex(i))
- continue;
- Offset += FFI->getObjectSize(i);
- unsigned Align = FFI->getObjectAlignment(i);
- // Adjust to alignment boundary
- Offset = (Offset + Align - 1) / Align * Align;
- }
- // This does not include the 16 bytes used for fp and lr.
- return (unsigned)Offset;
-}
-
bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
if (!EnableRedZone)
return false;
@@ -83,16 +139,10 @@ bool AArch64FrameLowering::canUseRedZone
/// pointer register.
bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
-
-#ifndef NDEBUG
const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
- assert(!RegInfo->needsStackRealignment(MF) &&
- "No stack realignment on AArch64!");
-#endif
-
return (MFI->hasCalls() || MFI->hasVarSizedObjects() ||
MFI->isFrameAddressTaken() || MFI->hasStackMap() ||
- MFI->hasPatchPoint());
+ MFI->hasPatchPoint() || RegInfo->needsStackRealignment(MF));
}
/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
@@ -288,11 +338,48 @@ void AArch64FrameLowering::emitPrologue(
AFI->setLocalStackSize(NumBytes);
// Allocate space for the rest of the frame.
- if (NumBytes) {
- // If we're a leaf function, try using the red zone.
- if (!canUseRedZone(MF))
- emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
- MachineInstr::FrameSetup);
+
+ const unsigned Alignment = MFI->getMaxAlignment();
+ const bool NeedsRealignment = (Alignment > 16);
+ unsigned scratchSPReg = AArch64::SP;
+ if (NeedsRealignment) {
+ // Use the first callee-saved register as a scratch register
+ assert(MF.getRegInfo().isPhysRegUsed(AArch64::X9) &&
+ "No scratch register to align SP!");
+ scratchSPReg = AArch64::X9;
+ }
+
+ // If we're a leaf function, try using the red zone.
+ if (NumBytes && !canUseRedZone(MF))
+ // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
+ // the correct value here, as NumBytes also includes padding bytes,
+ // which shouldn't be counted here.
+ emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII,
+ MachineInstr::FrameSetup);
+
+ assert(!(NeedsRealignment && NumBytes==0) &&
+ "NumBytes should never be 0 when realignment is needed");
+
+ if (NumBytes && NeedsRealignment) {
+ const unsigned NrBitsToZero = countTrailingZeros(Alignment);
+ assert(NrBitsToZero > 1);
+ assert(scratchSPReg != AArch64::SP);
+
+ // SUB X9, SP, NumBytes
+ // -- X9 is temporary register, so shouldn't contain any live data here,
+ // -- free to use. This is already produced by emitFrameOffset above.
+ // AND SP, X9, 0b11111...0000
+ // The logical immediates have a non-trivial encoding. The following
+ // formula computes the encoded immediate with all ones but
+ // NrBitsToZero zero bits as least significant bits.
+ uint32_t andMaskEncoded =
+ (1 <<12) // = N
+ | ((64-NrBitsToZero) << 6) // immr
+ | ((64-NrBitsToZero-1) << 0) // imms
+ ;
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
+ .addReg(scratchSPReg, RegState::Kill)
+ .addImm(andMaskEncoded);
}
// If we need a base pointer, set it up here. It's whatever the value of the
@@ -302,15 +389,15 @@ void AArch64FrameLowering::emitPrologue(
// FIXME: Clarify FrameSetup flags here.
// Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
// needed.
- //
- if (RegInfo->hasBasePointer(MF))
- TII->copyPhysReg(MBB, MBBI, DL, AArch64::X19, AArch64::SP, false);
+ if (RegInfo->hasBasePointer(MF)) {
+ TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP,
+ false);
+ }
if (needsFrameMoves) {
const DataLayout *TD = MF.getTarget().getDataLayout();
const int StackGrowth = -TD->getPointerSize(0);
unsigned FramePtr = RegInfo->getFrameRegister(MF);
-
// An example of the prologue:
//
// .globl __foo
@@ -460,7 +547,7 @@ void AArch64FrameLowering::emitEpilogue(
if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
return;
- // Initial and residual are named for consitency with the prologue. Note that
+ // Initial and residual are named for consistency with the prologue. Note that
// in the epilogue, the residual adjustment is executed first.
uint64_t ArgumentPopSize = 0;
if (RetOpcode == AArch64::TCRETURNdi || RetOpcode == AArch64::TCRETURNri) {
@@ -571,9 +658,9 @@ int AArch64FrameLowering::resolveFrameIn
bool isFixed = MFI->isFixedObjectIndex(FI);
// Use frame pointer to reference fixed objects. Use it for locals if
- // there are VLAs (and thus the SP isn't reliable as a base).
- // Make sure useFPForScavengingIndex() does the right thing for the emergency
- // spill slot.
+ // there are VLAs or a dynamically realigned SP (and thus the SP isn't
+ // reliable as a base). Make sure useFPForScavengingIndex() does the
+ // right thing for the emergency spill slot.
bool UseFP = false;
if (AFI->hasStackFrame()) {
// Note: Keeping the following as multiple 'if' statements rather than
@@ -582,7 +669,8 @@ int AArch64FrameLowering::resolveFrameIn
// Argument access should always use the FP.
if (isFixed) {
UseFP = hasFP(MF);
- } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF)) {
+ } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF) &&
+ !RegInfo->needsStackRealignment(MF)) {
// Use SP or FP, whichever gives us the best chance of the offset
// being in range for direct access. If the FPOffset is positive,
// that'll always be best, as the SP will be even further away.
@@ -598,6 +686,10 @@ int AArch64FrameLowering::resolveFrameIn
}
}
+ assert((isFixed || !RegInfo->needsStackRealignment(MF) || !UseFP) &&
+ "In the presence of dynamic stack pointer realignment, "
+ "non-argument objects cannot be accessed through the frame pointer");
+
if (UseFP) {
FrameReg = RegInfo->getFrameRegister(MF);
return FPOffset;
@@ -794,6 +886,9 @@ void AArch64FrameLowering::processFuncti
if (RegInfo->hasBasePointer(MF))
MRI->setPhysRegUsed(RegInfo->getBaseRegister());
+ if (RegInfo->needsStackRealignment(MF) && !RegInfo->hasBasePointer(MF))
+ MRI->setPhysRegUsed(AArch64::X9);
+
// If any callee-saved registers are used, the frame cannot be eliminated.
unsigned NumGPRSpilled = 0;
unsigned NumFPRSpilled = 0;
@@ -867,7 +962,8 @@ void AArch64FrameLowering::processFuncti
// The CSR spill slots have not been allocated yet, so estimateStackSize
// won't include them.
MachineFrameInfo *MFI = MF.getFrameInfo();
- unsigned CFSize = estimateStackSize(MF) + 8 * (NumGPRSpilled + NumFPRSpilled);
+ unsigned CFSize =
+ MFI->estimateStackSize(MF) + 8 * (NumGPRSpilled + NumFPRSpilled);
DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n");
bool BigStack = (CFSize >= 256);
if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
Modified: llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.h?rev=234471&r1=234470&r2=234471&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.h (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.h Thu Apr 9 03:49:47 2015
@@ -22,7 +22,7 @@ class AArch64FrameLowering : public Targ
public:
explicit AArch64FrameLowering()
: TargetFrameLowering(StackGrowsDown, 16, 0, 16,
- false /*StackRealignable*/) {}
+ true /*StackRealignable*/) {}
void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
Modified: llvm/trunk/lib/Target/AArch64/AArch64RegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64RegisterInfo.cpp?rev=234471&r1=234470&r2=234471&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64RegisterInfo.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64RegisterInfo.cpp Thu Apr 9 03:49:47 2015
@@ -165,7 +165,12 @@ bool AArch64RegisterInfo::hasBasePointer
// large enough that referencing from the FP won't result in things being
// in range relatively often, we can use a base pointer to allow access
// from the other direction like the SP normally works.
+ // Furthermore, if both variable sized objects are present, and the
+ // stack needs to be dynamically re-aligned, the base pointer is the only
+ // reliable way to reference the locals.
if (MFI->hasVarSizedObjects()) {
+ if (needsStackRealignment(MF))
+ return true;
// Conservatively estimate whether the negative offset from the frame
// pointer will be sufficient to reach. If a function has a smallish
// frame, it's less likely to have lots of spills and callee saved
@@ -181,6 +186,31 @@ bool AArch64RegisterInfo::hasBasePointer
return false;
}
+bool AArch64RegisterInfo::canRealignStack(const MachineFunction &MF) const {
+
+ if (MF.getFunction()->hasFnAttribute("no-realign-stack"))
+ return false;
+
+ return true;
+}
+
+// FIXME: share this with other backends with identical implementation?
+bool
+AArch64RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const Function *F = MF.getFunction();
+ unsigned StackAlign = MF.getTarget()
+ .getSubtargetImpl(*MF.getFunction())
+ ->getFrameLowering()
+ ->getStackAlignment();
+ bool requiresRealignment =
+ ((MFI->getMaxAlignment() > StackAlign) ||
+ F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::StackAlignment));
+
+ return requiresRealignment && canRealignStack(MF);
+}
+
unsigned
AArch64RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
Modified: llvm/trunk/lib/Target/AArch64/AArch64RegisterInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64RegisterInfo.h?rev=234471&r1=234470&r2=234471&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64RegisterInfo.h (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64RegisterInfo.h Thu Apr 9 03:49:47 2015
@@ -93,6 +93,9 @@ public:
unsigned getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const override;
+ // Base pointer (stack realignment) support.
+ bool canRealignStack(const MachineFunction &MF) const;
+ bool needsStackRealignment(const MachineFunction &MF) const override;
};
} // end namespace llvm
Added: llvm/trunk/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll?rev=234471&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll (added)
+++ llvm/trunk/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll Thu Apr 9 03:49:47 2015
@@ -0,0 +1,491 @@
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+
+; This test aims to check basic correctness of frame layout &
+; frame access code. There are 8 functions in this test file,
+; each function implements one element in the cartesian product
+; of:
+; . a function having a VLA/noVLA
+; . a function with dynamic stack realignment/no dynamic stack realignment.
+; . a function needing a frame pionter/no frame pointer,
+; since the presence/absence of these has influence on the frame
+; layout and which pointer to use to access various part of the
+; frame (bp,sp,fp).
+;
+; Furthermore: in every test function:
+; . there is always one integer and 1 floating point argument to be able
+; to check those are accessed correctly.
+; . there is always one local variable to check that is accessed
+; correctly
+;
+; The LLVM-IR below was produced by clang on the following C++ code:
+;extern "C" int g();
+;extern "C" int novla_nodynamicrealign_call(int i1, int i2, int i3, int i4, int i5, int i6, int i7, int i8, int i9, int i10,
+; double d1, double d2, double d3, double d4, double d5, double d6, double d7, double d8, double d9, double d10)
+;{
+; // use an argument passed on the stack.
+; volatile int l1;
+; return i10 + (int)d10 + l1 + g();
+;}
+;extern "C" int novla_nodynamicrealign_nocall(int i1, int i2, int i3, int i4, int i5, int i6, int i7, int i8, int i9, int i10,
+; double d1, double d2, double d3, double d4, double d5, double d6, double d7, double d8, double d9, double d10)
+;{
+; // use an argument passed on the stack.
+; volatile int l1;
+; return i10 + (int)d10 + l1;
+;}
+;extern "C" int novla_dynamicrealign_call(int i1, int i2, int i3, int i4, int i5, int i6, int i7, int i8, int i9, int i10,
+; double d1, double d2, double d3, double d4, double d5, double d6, double d7, double d8, double d9, double d10)
+;{
+; // use an argument passed on the stack.
+; alignas(128) volatile int l1;
+; return i10 + (int)d10 + l1 + g();
+;}
+;extern "C" int novla_dynamicrealign_nocall(int i1, int i2, int i3, int i4, int i5, int i6, int i7, int i8, int i9, int i10,
+; double d1, double d2, double d3, double d4, double d5, double d6, double d7, double d8, double d9, double d10)
+;{
+; // use an argument passed on the stack.
+; alignas(128) volatile int l1;
+; return i10 + (int)d10 + l1;
+;}
+;
+;extern "C" int vla_nodynamicrealign_call(int i1, int i2, int i3, int i4, int i5, int i6, int i7, int i8, int i9, int i10,
+; double d1, double d2, double d3, double d4, double d5, double d6, double d7, double d8, double d9, double d10)
+;{
+; // use an argument passed on the stack.
+; volatile int l1;
+; volatile int vla[i1];
+; return i10 + (int)d10 + l1 + g() + vla[0];
+;}
+;extern "C" int vla_nodynamicrealign_nocall(int i1, int i2, int i3, int i4, int i5, int i6, int i7, int i8, int i9, int i10,
+; double d1, double d2, double d3, double d4, double d5, double d6, double d7, double d8, double d9, double d10)
+;{
+; // use an argument passed on the stack.
+; volatile int l1;
+; volatile int vla[i1];
+; return i10 + (int)d10 + l1 + vla[0];
+;}
+;extern "C" int vla_dynamicrealign_call(int i1, int i2, int i3, int i4, int i5, int i6, int i7, int i8, int i9, int i10,
+; double d1, double d2, double d3, double d4, double d5, double d6, double d7, double d8, double d9, double d10)
+;{
+; // use an argument passed on the stack.
+; alignas(128) volatile int l1;
+; volatile int vla[i1];
+; return i10 + (int)d10 + l1 + g() + vla[0];
+;}
+;extern "C" int vla_dynamicrealign_nocall(int i1, int i2, int i3, int i4, int i5, int i6, int i7, int i8, int i9, int i10,
+; double d1, double d2, double d3, double d4, double d5, double d6, double d7, double d8, double d9, double d10)
+;{
+; // use an argument passed on the stack.
+; alignas(128) volatile int l1;
+; volatile int vla[i1];
+; return i10 + (int)d10 + l1 + vla[0];
+;}
+
+
+
+define i32 @novla_nodynamicrealign_call(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #0 {
+entry:
+ %l1 = alloca i32, align 4
+ %conv = fptosi double %d10 to i32
+ %add = add nsw i32 %conv, %i10
+ %l1.0.l1.0. = load volatile i32, i32* %l1, align 4
+ %add1 = add nsw i32 %add, %l1.0.l1.0.
+ %call = tail call i32 @g()
+ %add2 = add nsw i32 %add1, %call
+ ret i32 %add2
+}
+; CHECK-LABEL: novla_nodynamicrealign_call
+; CHECK: .cfi_startproc
+; Check that used callee-saved registers are saved
+; CHECK: stp x20, x19, [sp, #-32]!
+; Check that the frame pointer is created:
+; CHECK: stp x29, x30, [sp, #16]
+; CHECK: add x29, sp, #16
+; Check correctness of cfi pseudo-instructions
+; CHECK: .cfi_def_cfa w29, 16
+; CHECK: .cfi_offset w30, -8
+; CHECK: .cfi_offset w29, -16
+; CHECK: .cfi_offset w19, -24
+; CHECK: .cfi_offset w20, -32
+; Check correct access to arguments passed on the stack, through frame pointer
+; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40]
+; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24]
+; Check correct access to local variable on the stack, through stack pointer
+; CHECK: ldr w[[ILOC:[0-9]+]], [sp, #12]
+; Check epilogue:
+; CHECK: ldp x29, x30, [sp, #16]
+; CHECK: ldp x20, x19, [sp], #32
+; CHECK: ret
+; CHECK: .cfi_endproc
+
+
+declare i32 @g() #0
+
+; Function Attrs: nounwind
+define i32 @novla_nodynamicrealign_nocall(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #1 {
+entry:
+ %l1 = alloca i32, align 4
+ %conv = fptosi double %d10 to i32
+ %add = add nsw i32 %conv, %i10
+ %l1.0.l1.0. = load volatile i32, i32* %l1, align 4
+ %add1 = add nsw i32 %add, %l1.0.l1.0.
+ ret i32 %add1
+}
+; CHECK-LABEL: novla_nodynamicrealign_nocall
+; Check that space is reserved for one local variable on the stack.
+; CHECK: sub sp, sp, #16 // =16
+; Check correct access to arguments passed on the stack, through stack pointer
+; CHECK: ldr d[[DARG:[0-9]+]], [sp, #40]
+; CHECK: ldr w[[IARG:[0-9]+]], [sp, #24]
+; Check correct access to local variable on the stack, through stack pointer
+; CHECK: ldr w[[ILOC:[0-9]+]], [sp, #12]
+; Check epilogue:
+; CHECK: add sp, sp, #16 // =16
+; CHECK: ret
+
+
+define i32 @novla_dynamicrealign_call(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #0 {
+entry:
+ %l1 = alloca i32, align 128
+ %conv = fptosi double %d10 to i32
+ %add = add nsw i32 %conv, %i10
+ %l1.0.l1.0. = load volatile i32, i32* %l1, align 128
+ %add1 = add nsw i32 %add, %l1.0.l1.0.
+ %call = tail call i32 @g()
+ %add2 = add nsw i32 %add1, %call
+ ret i32 %add2
+}
+
+; CHECK-LABEL: novla_dynamicrealign_call
+; CHECK: .cfi_startproc
+; Check that used callee-saved registers are saved
+; CHECK: stp x20, x19, [sp, #-32]!
+; Check that the frame pointer is created:
+; CHECK: stp x29, x30, [sp, #16]
+; CHECK: add x29, sp, #16
+; Check the dynamic realignment of the stack pointer to a 128-byte boundary
+; CHECK: sub x9, sp, #96
+; CHECK: and sp, x9, #0xffffffffffffff80
+; Check correctness of cfi pseudo-instructions
+; CHECK: .cfi_def_cfa w29, 16
+; CHECK: .cfi_offset w30, -8
+; CHECK: .cfi_offset w29, -16
+; CHECK: .cfi_offset w19, -24
+; CHECK: .cfi_offset w20, -32
+; Check correct access to arguments passed on the stack, through frame pointer
+; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40]
+; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24]
+; Check correct access to local variable on the stack, through re-aligned stack pointer
+; CHECK: ldr w[[ILOC:[0-9]+]], [sp]
+; Check epilogue:
+; Check that stack pointer get restored from frame pointer.
+; CHECK: sub sp, x29, #16 // =16
+; CHECK: ldp x29, x30, [sp, #16]
+; CHECK: ldp x20, x19, [sp], #32
+; CHECK: ret
+; CHECK: .cfi_endproc
+
+
+; Function Attrs: nounwind
+define i32 @novla_dynamicrealign_nocall(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #1 {
+entry:
+ %l1 = alloca i32, align 128
+ %conv = fptosi double %d10 to i32
+ %add = add nsw i32 %conv, %i10
+ %l1.0.l1.0. = load volatile i32, i32* %l1, align 128
+ %add1 = add nsw i32 %add, %l1.0.l1.0.
+ ret i32 %add1
+}
+
+; CHECK-LABEL: novla_dynamicrealign_nocall
+; Check that the frame pointer is created:
+; CHECK: stp x29, x30, [sp, #-16]!
+; CHECK: mov x29, sp
+; Check the dynamic realignment of the stack pointer to a 128-byte boundary
+; CHECK: sub x9, sp, #112
+; CHECK: and sp, x9, #0xffffffffffffff80
+; Check correct access to arguments passed on the stack, through frame pointer
+; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40]
+; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24]
+; Check correct access to local variable on the stack, through re-aligned stack pointer
+; CHECK: ldr w[[ILOC:[0-9]+]], [sp]
+; Check epilogue:
+; Check that stack pointer get restored from frame pointer.
+; CHECK: mov sp, x29
+; CHECK: ldp x29, x30, [sp], #16
+; CHECK: ret
+
+
+define i32 @vla_nodynamicrealign_call(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #0 {
+entry:
+ %l1 = alloca i32, align 4
+ %0 = zext i32 %i1 to i64
+ %vla = alloca i32, i64 %0, align 4
+ %conv = fptosi double %d10 to i32
+ %add = add nsw i32 %conv, %i10
+ %l1.0.l1.0. = load volatile i32, i32* %l1, align 4
+ %add1 = add nsw i32 %add, %l1.0.l1.0.
+ %call = tail call i32 @g()
+ %add2 = add nsw i32 %add1, %call
+ %1 = load volatile i32, i32* %vla, align 4, !tbaa !1
+ %add3 = add nsw i32 %add2, %1
+ ret i32 %add3
+}
+
+; CHECK-LABEL: vla_nodynamicrealign_call
+; CHECK: .cfi_startproc
+; Check that used callee-saved registers are saved
+; CHECK: stp x20, x19, [sp, #-32]!
+; Check that the frame pointer is created:
+; CHECK: stp x29, x30, [sp, #16]
+; CHECK: add x29, sp, #16
+; Check that space is reserved on the stack for the local variable,
+; rounded up to a multiple of 16 to keep the stack pointer 16-byte aligned.
+; CHECK: sub sp, sp, #16
+; Check correctness of cfi pseudo-instructions
+; CHECK: .cfi_def_cfa w29, 16
+; CHECK: .cfi_offset w30, -8
+; CHECK: .cfi_offset w29, -16
+; CHECK: .cfi_offset w19, -24
+; CHECK: .cfi_offset w20, -32
+; Check correct access to arguments passed on the stack, through frame pointer
+; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24]
+; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40]
+; Check correct reservation of 16-byte aligned VLA (size in w0) on stack
+; CHECK: ubfx x9, x0, #0, #32
+; CHECK: lsl x9, x9, #2
+; CHECK: add x9, x9, #15
+; CHECK: and x9, x9, #0xfffffffffffffff0
+; CHECK: mov x10, sp
+; CHECK: sub x[[VLASPTMP:[0-9]+]], x10, x9
+; CHECK: mov sp, x[[VLASPTMP]]
+; Check correct access to local variable, through frame pointer
+; CHECK: ldur w[[ILOC:[0-9]+]], [x29, #-20]
+; Check correct accessing of the VLA variable through the base pointer
+; CHECK: ldr w[[VLA:[0-9]+]], [x[[VLASPTMP]]]
+; Check epilogue:
+; Check that stack pointer get restored from frame pointer.
+; CHECK: sub sp, x29, #16 // =16
+; CHECK: ldp x29, x30, [sp, #16]
+; CHECK: ldp x20, x19, [sp], #32
+; CHECK: ret
+; CHECK: .cfi_endproc
+
+
+; Function Attrs: nounwind
+define i32 @vla_nodynamicrealign_nocall(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #1 {
+entry:
+ %l1 = alloca i32, align 4
+ %0 = zext i32 %i1 to i64
+ %vla = alloca i32, i64 %0, align 4
+ %conv = fptosi double %d10 to i32
+ %add = add nsw i32 %conv, %i10
+ %l1.0.l1.0. = load volatile i32, i32* %l1, align 4
+ %add1 = add nsw i32 %add, %l1.0.l1.0.
+ %1 = load volatile i32, i32* %vla, align 4, !tbaa !1
+ %add2 = add nsw i32 %add1, %1
+ ret i32 %add2
+}
+
+; CHECK-LABEL: vla_nodynamicrealign_nocall
+; Check that the frame pointer is created:
+; CHECK: stp x29, x30, [sp, #-16]!
+; CHECK: mov x29, sp
+; Check that space is reserved on the stack for the local variable,
+; rounded up to a multiple of 16 to keep the stack pointer 16-byte aligned.
+; CHECK: sub sp, sp, #16
+; Check correctness of cfi pseudo-instructions
+; Check correct access to arguments passed on the stack, through frame pointer
+; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24]
+; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40]
+; Check correct reservation of 16-byte aligned VLA (size in w0) on stack
+; CHECK: ubfx x9, x0, #0, #32
+; CHECK: lsl x9, x9, #2
+; CHECK: add x9, x9, #15
+; CHECK: and x9, x9, #0xfffffffffffffff0
+; CHECK: mov x10, sp
+; CHECK: sub x[[VLASPTMP:[0-9]+]], x10, x9
+; CHECK: mov sp, x[[VLASPTMP]]
+; Check correct access to local variable, through frame pointer
+; CHECK: ldur w[[ILOC:[0-9]+]], [x29, #-4]
+; Check correct accessing of the VLA variable through the base pointer
+; CHECK: ldr w[[VLA:[0-9]+]], [x[[VLASPTMP]]]
+; Check epilogue:
+; Check that stack pointer get restored from frame pointer.
+; CHECK: mov sp, x29
+; CHECK: ldp x29, x30, [sp], #16
+; CHECK: ret
+
+
+define i32 @vla_dynamicrealign_call(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #0 {
+entry:
+ %l1 = alloca i32, align 128
+ %0 = zext i32 %i1 to i64
+ %vla = alloca i32, i64 %0, align 4
+ %conv = fptosi double %d10 to i32
+ %add = add nsw i32 %conv, %i10
+ %l1.0.l1.0. = load volatile i32, i32* %l1, align 128
+ %add1 = add nsw i32 %add, %l1.0.l1.0.
+ %call = tail call i32 @g()
+ %add2 = add nsw i32 %add1, %call
+ %1 = load volatile i32, i32* %vla, align 4, !tbaa !1
+ %add3 = add nsw i32 %add2, %1
+ ret i32 %add3
+}
+
+; CHECK-LABEL: vla_dynamicrealign_call
+; CHECK: .cfi_startproc
+; Check that used callee-saved registers are saved
+; CHECK: stp x22, x21, [sp, #-48]!
+; CHECK: stp x20, x19, [sp, #16]
+; Check that the frame pointer is created:
+; CHECK: stp x29, x30, [sp, #32]
+; CHECK: add x29, sp, #32
+; Check that the stack pointer gets re-aligned to 128
+; bytes & the base pointer (x19) gets initialized to
+; this 128-byte aligned area for local variables &
+; spill slots
+; CHECK: sub x9, sp, #80 // =80
+; CHECK: and sp, x9, #0xffffffffffffff80
+; CHECK: mov x19, sp
+; Check correctness of cfi pseudo-instructions
+; CHECK: .cfi_def_cfa w29, 16
+; CHECK: .cfi_offset w30, -8
+; CHECK: .cfi_offset w29, -16
+; CHECK: .cfi_offset w19, -24
+; CHECK: .cfi_offset w20, -32
+; CHECK: .cfi_offset w21, -40
+; CHECK: .cfi_offset w22, -48
+; Check correct access to arguments passed on the stack, through frame pointer
+; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24]
+; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40]
+; Check correct reservation of 16-byte aligned VLA (size in w0) on stack
+; and set-up of base pointer (x19).
+; CHECK: ubfx x9, x0, #0, #32
+; CHECK: lsl x9, x9, #2
+; CHECK: add x9, x9, #15
+; CHECK: and x9, x9, #0xfffffffffffffff0
+; CHECK: mov x10, sp
+; CHECK: sub x[[VLASPTMP:[0-9]+]], x10, x9
+; CHECK: mov sp, x[[VLASPTMP]]
+; Check correct access to local variable, through base pointer
+; CHECK: ldr w[[ILOC:[0-9]+]], [x19]
+; CHECK: ldr w[[VLA:[0-9]+]], [x[[VLASPTMP]]]
+; Check epilogue:
+; Check that stack pointer get restored from frame pointer.
+; CHECK: sub sp, x29, #32
+; CHECK: ldp x29, x30, [sp, #32]
+; CHECK: ldp x20, x19, [sp, #16]
+; CHECK: ldp x22, x21, [sp], #48
+; CHECK: ret
+; CHECK: .cfi_endproc
+
+
+; Function Attrs: nounwind
+define i32 @vla_dynamicrealign_nocall(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #1 {
+entry:
+ %l1 = alloca i32, align 128
+ %0 = zext i32 %i1 to i64
+ %vla = alloca i32, i64 %0, align 4
+ %conv = fptosi double %d10 to i32
+ %add = add nsw i32 %conv, %i10
+ %l1.0.l1.0. = load volatile i32, i32* %l1, align 128
+ %add1 = add nsw i32 %add, %l1.0.l1.0.
+ %1 = load volatile i32, i32* %vla, align 4, !tbaa !1
+ %add2 = add nsw i32 %add1, %1
+ ret i32 %add2
+}
+
+; CHECK-LABEL: vla_dynamicrealign_nocall
+; Check that used callee-saved registers are saved
+; CHECK: stp x20, x19, [sp, #-32]!
+; Check that the frame pointer is created:
+; CHECK: stp x29, x30, [sp, #16]
+; CHECK: add x29, sp, #16
+; Check that the stack pointer gets re-aligned to 128
+; bytes & the base pointer (x19) gets initialized to
+; this 128-byte aligned area for local variables &
+; spill slots
+; CHECK: sub x9, sp, #96
+; CHECK: and sp, x9, #0xffffffffffffff80
+; CHECK: mov x19, sp
+; Check correct access to arguments passed on the stack, through frame pointer
+; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24]
+; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40]
+; Check correct reservation of 16-byte aligned VLA (size in w0) on stack
+; and set-up of base pointer (x19).
+; CHECK: ubfx x9, x0, #0, #32
+; CHECK: lsl x9, x9, #2
+; CHECK: add x9, x9, #15
+; CHECK: and x9, x9, #0xfffffffffffffff0
+; CHECK: mov x10, sp
+; CHECK: sub x[[VLASPTMP:[0-9]+]], x10, x9
+; CHECK: mov sp, x[[VLASPTMP]]
+; Check correct access to local variable, through base pointer
+; CHECK: ldr w[[ILOC:[0-9]+]], [x19]
+; CHECK: ldr w[[VLA:[0-9]+]], [x[[VLASPTMP]]]
+; Check epilogue:
+; Check that stack pointer get restored from frame pointer.
+; CHECK: sub sp, x29, #16
+; CHECK: ldp x29, x30, [sp, #16]
+; CHECK: ldp x20, x19, [sp], #32
+; CHECK: ret
+
+
+; Function Attrs: nounwind
+define i32 @vla_dynamicrealign_nocall_large_align(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #1 {
+entry:
+ %l1 = alloca i32, align 32768
+ %0 = zext i32 %i1 to i64
+ %vla = alloca i32, i64 %0, align 4
+ %conv = fptosi double %d10 to i32
+ %add = add nsw i32 %conv, %i10
+ %l1.0.l1.0. = load volatile i32, i32* %l1, align 32768
+ %add1 = add nsw i32 %add, %l1.0.l1.0.
+ %1 = load volatile i32, i32* %vla, align 4, !tbaa !1
+ %add2 = add nsw i32 %add1, %1
+ ret i32 %add2
+}
+
+; CHECK-LABEL: vla_dynamicrealign_nocall_large_align
+; Check that used callee-saved registers are saved
+; CHECK: stp x20, x19, [sp, #-32]!
+; Check that the frame pointer is created:
+; CHECK: stp x29, x30, [sp, #16]
+; CHECK: add x29, sp, #16
+; Check that the stack pointer gets re-aligned to 128
+; bytes & the base pointer (x19) gets initialized to
+; this 128-byte aligned area for local variables &
+; spill slots
+; CHECK: sub x9, sp, #7, lsl #12
+; CHECK: and sp, x9, #0xffffffffffff8000
+; CHECK: mov x19, sp
+; Check correct access to arguments passed on the stack, through frame pointer
+; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24]
+; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40]
+; Check correct reservation of 16-byte aligned VLA (size in w0) on stack
+; and set-up of base pointer (x19).
+; CHECK: ubfx x9, x0, #0, #32
+; CHECK: lsl x9, x9, #2
+; CHECK: add x9, x9, #15
+; CHECK: and x9, x9, #0xfffffffffffffff0
+; CHECK: mov x10, sp
+; CHECK: sub x[[VLASPTMP:[0-9]+]], x10, x9
+; CHECK: mov sp, x[[VLASPTMP]]
+; Check correct access to local variable, through base pointer
+; CHECK: ldr w[[ILOC:[0-9]+]], [x19]
+; CHECK: ldr w[[VLA:[0-9]+]], [x[[VLASPTMP]]]
+; Check epilogue:
+; Check that stack pointer get restored from frame pointer.
+; CHECK: sub sp, x29, #16
+; CHECK: ldp x29, x30, [sp, #16]
+; CHECK: ldp x20, x19, [sp], #32
+; CHECK: ret
+
+attributes #0 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!1 = !{!2, !2, i64 0}
+!2 = !{!"int", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}
More information about the llvm-commits
mailing list