[llvm] r234471 - [AArch64] Add support for dynamic stack alignment

Kristof Beyls kristof.beyls at arm.com
Thu Apr 9 01:49:47 PDT 2015


Author: kbeyls
Date: Thu Apr  9 03:49:47 2015
New Revision: 234471

URL: http://llvm.org/viewvc/llvm-project?rev=234471&view=rev
Log:
[AArch64] Add support for dynamic stack alignment

Differential Revision: http://reviews.llvm.org/D8876


Added:
    llvm/trunk/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll
Modified:
    llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp
    llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.h
    llvm/trunk/lib/Target/AArch64/AArch64RegisterInfo.cpp
    llvm/trunk/lib/Target/AArch64/AArch64RegisterInfo.h

Modified: llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp?rev=234471&r1=234470&r2=234471&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp Thu Apr  9 03:49:47 2015
@@ -9,6 +9,82 @@
 //
 // This file contains the AArch64 implementation of TargetFrameLowering class.
 //
+// On AArch64, stack frames are structured as follows:
+//
+// The stack grows downward.
+//
+// All of the individual frame areas on the frame below are optional, i.e. it's
+// possible to create a function so that the particular area isn't present
+// in the frame.
+//
+// At function entry, the "frame" looks as follows:
+//
+// |                                   | Higher address
+// |-----------------------------------|
+// |                                   |
+// | arguments passed on the stack     |
+// |                                   |
+// |-----------------------------------| <- sp
+// |                                   | Lower address
+//
+//
+// After the prologue has run, the frame has the following general structure.
+// Note that this doesn't depict the case where a red-zone is used. Also,
+// technically the last frame area (VLAs) doesn't get created until in the
+// main function body, after the prologue is run. However, it's depicted here
+// for completeness.
+//
+// |                                   | Higher address
+// |-----------------------------------|
+// |                                   |
+// | arguments passed on the stack     |
+// |                                   |
+// |-----------------------------------|
+// |                                   |
+// | prev_fp, prev_lr                  |
+// | (a.k.a. "frame record")           |
+// |-----------------------------------| <- fp(=x29)
+// |                                   |
+// | other callee-saved registers      |
+// |                                   |
+// |-----------------------------------|
+// |.empty.space.to.make.part.below....|
+// |.aligned.in.case.it.needs.more.than| (size of this area is unknown at
+// |.the.standard.16-byte.alignment....|  compile time; if present)
+// |-----------------------------------|
+// |                                   |
+// | local variables of fixed size     |
+// | including spill slots             |
+// |-----------------------------------| <- bp(not defined by ABI,
+// |.variable-sized.local.variables....|       LLVM chooses X19)
+// |.(VLAs)............................| (size of this area is unknown at
+// |...................................|  compile time)
+// |-----------------------------------| <- sp
+// |                                   | Lower address
+//
+//
+// To access the data in a frame, at-compile time, a constant offset must be
+// computable from one of the pointers (fp, bp, sp) to access it. The size
+// of the areas with a dotted background cannot be computed at compile-time
+// if they are present, making it required to have all three of fp, bp and
+// sp to be set up to be able to access all contents in the frame areas,
+// assuming all of the frame areas are non-empty.
+//
+// For most functions, some of the frame areas are empty. For those functions,
+// it may not be necessary to set up fp or bp:
+// * A base pointer is definitly needed when there are both VLAs and local
+//   variables with more-than-default alignment requirements.
+// * A frame pointer is definitly needed when there are local variables with
+//   more-than-default alignment requirements.
+//
+// In some cases when a base pointer is not strictly needed, it is generated
+// anyway when offsets from the frame pointer to access local variables become
+// so large that the offset can't be encoded in the immediate fields of loads
+// or stores.
+//
+// FIXME: also explain the redzone concept.
+// FIXME: also explain the concept of reserved call frames.
+//
 //===----------------------------------------------------------------------===//
 
 #include "AArch64FrameLowering.h"
@@ -39,26 +115,6 @@ static cl::opt<bool> EnableRedZone("aarc
 
 STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
 
-static unsigned estimateStackSize(MachineFunction &MF) {
-  const MachineFrameInfo *FFI = MF.getFrameInfo();
-  int Offset = 0;
-  for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) {
-    int FixedOff = -FFI->getObjectOffset(i);
-    if (FixedOff > Offset)
-      Offset = FixedOff;
-  }
-  for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) {
-    if (FFI->isDeadObjectIndex(i))
-      continue;
-    Offset += FFI->getObjectSize(i);
-    unsigned Align = FFI->getObjectAlignment(i);
-    // Adjust to alignment boundary
-    Offset = (Offset + Align - 1) / Align * Align;
-  }
-  // This does not include the 16 bytes used for fp and lr.
-  return (unsigned)Offset;
-}
-
 bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
   if (!EnableRedZone)
     return false;
@@ -83,16 +139,10 @@ bool AArch64FrameLowering::canUseRedZone
 /// pointer register.
 bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
-
-#ifndef NDEBUG
   const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
-  assert(!RegInfo->needsStackRealignment(MF) &&
-         "No stack realignment on AArch64!");
-#endif
-
   return (MFI->hasCalls() || MFI->hasVarSizedObjects() ||
           MFI->isFrameAddressTaken() || MFI->hasStackMap() ||
-          MFI->hasPatchPoint());
+          MFI->hasPatchPoint() || RegInfo->needsStackRealignment(MF));
 }
 
 /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
@@ -288,11 +338,48 @@ void AArch64FrameLowering::emitPrologue(
   AFI->setLocalStackSize(NumBytes);
 
   // Allocate space for the rest of the frame.
-  if (NumBytes) {
-    // If we're a leaf function, try using the red zone.
-    if (!canUseRedZone(MF))
-      emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
-                      MachineInstr::FrameSetup);
+
+  const unsigned Alignment = MFI->getMaxAlignment();
+  const bool NeedsRealignment = (Alignment > 16);
+  unsigned scratchSPReg = AArch64::SP;
+  if (NeedsRealignment) {
+    // Use the first callee-saved register as a scratch register
+    assert(MF.getRegInfo().isPhysRegUsed(AArch64::X9) &&
+           "No scratch register to align SP!");
+    scratchSPReg = AArch64::X9;
+  }
+
+  // If we're a leaf function, try using the red zone.
+  if (NumBytes && !canUseRedZone(MF))
+    // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
+    // the correct value here, as NumBytes also includes padding bytes,
+    // which shouldn't be counted here.
+    emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII,
+                    MachineInstr::FrameSetup);
+
+  assert(!(NeedsRealignment && NumBytes==0) &&
+         "NumBytes should never be 0 when realignment is needed");
+
+  if (NumBytes && NeedsRealignment) {
+    const unsigned NrBitsToZero = countTrailingZeros(Alignment);
+    assert(NrBitsToZero > 1);
+    assert(scratchSPReg != AArch64::SP);
+
+    // SUB X9, SP, NumBytes
+    //   -- X9 is temporary register, so shouldn't contain any live data here,
+    //   -- free to use. This is already produced by emitFrameOffset above.
+    // AND SP, X9, 0b11111...0000
+    // The logical immediates have a non-trivial encoding. The following
+    // formula computes the encoded immediate with all ones but
+    // NrBitsToZero zero bits as least significant bits.
+    uint32_t andMaskEncoded =
+        (1                   <<12) // = N
+      | ((64-NrBitsToZero)   << 6) // immr
+      | ((64-NrBitsToZero-1) << 0) // imms
+      ;
+    BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
+      .addReg(scratchSPReg, RegState::Kill)
+      .addImm(andMaskEncoded);
   }
 
   // If we need a base pointer, set it up here. It's whatever the value of the
@@ -302,15 +389,15 @@ void AArch64FrameLowering::emitPrologue(
   // FIXME: Clarify FrameSetup flags here.
   // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
   // needed.
-  //
-  if (RegInfo->hasBasePointer(MF))
-    TII->copyPhysReg(MBB, MBBI, DL, AArch64::X19, AArch64::SP, false);
+  if (RegInfo->hasBasePointer(MF)) {
+    TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP,
+                     false);
+  }
 
   if (needsFrameMoves) {
     const DataLayout *TD = MF.getTarget().getDataLayout();
     const int StackGrowth = -TD->getPointerSize(0);
     unsigned FramePtr = RegInfo->getFrameRegister(MF);
-
     // An example of the prologue:
     //
     //     .globl __foo
@@ -460,7 +547,7 @@ void AArch64FrameLowering::emitEpilogue(
   if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
     return;
 
-  // Initial and residual are named for consitency with the prologue. Note that
+  // Initial and residual are named for consistency with the prologue. Note that
   // in the epilogue, the residual adjustment is executed first.
   uint64_t ArgumentPopSize = 0;
   if (RetOpcode == AArch64::TCRETURNdi || RetOpcode == AArch64::TCRETURNri) {
@@ -571,9 +658,9 @@ int AArch64FrameLowering::resolveFrameIn
   bool isFixed = MFI->isFixedObjectIndex(FI);
 
   // Use frame pointer to reference fixed objects. Use it for locals if
-  // there are VLAs (and thus the SP isn't reliable as a base).
-  // Make sure useFPForScavengingIndex() does the right thing for the emergency
-  // spill slot.
+  // there are VLAs or a dynamically realigned SP (and thus the SP isn't
+  // reliable as a base). Make sure useFPForScavengingIndex() does the
+  // right thing for the emergency spill slot.
   bool UseFP = false;
   if (AFI->hasStackFrame()) {
     // Note: Keeping the following as multiple 'if' statements rather than
@@ -582,7 +669,8 @@ int AArch64FrameLowering::resolveFrameIn
     // Argument access should always use the FP.
     if (isFixed) {
       UseFP = hasFP(MF);
-    } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF)) {
+    } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF) &&
+               !RegInfo->needsStackRealignment(MF)) {
       // Use SP or FP, whichever gives us the best chance of the offset
       // being in range for direct access. If the FPOffset is positive,
       // that'll always be best, as the SP will be even further away.
@@ -598,6 +686,10 @@ int AArch64FrameLowering::resolveFrameIn
     }
   }
 
+  assert((isFixed || !RegInfo->needsStackRealignment(MF) || !UseFP) &&
+         "In the presence of dynamic stack pointer realignment, "
+         "non-argument objects cannot be accessed through the frame pointer");
+
   if (UseFP) {
     FrameReg = RegInfo->getFrameRegister(MF);
     return FPOffset;
@@ -794,6 +886,9 @@ void AArch64FrameLowering::processFuncti
   if (RegInfo->hasBasePointer(MF))
     MRI->setPhysRegUsed(RegInfo->getBaseRegister());
 
+  if (RegInfo->needsStackRealignment(MF) && !RegInfo->hasBasePointer(MF))
+    MRI->setPhysRegUsed(AArch64::X9);
+
   // If any callee-saved registers are used, the frame cannot be eliminated.
   unsigned NumGPRSpilled = 0;
   unsigned NumFPRSpilled = 0;
@@ -867,7 +962,8 @@ void AArch64FrameLowering::processFuncti
   // The CSR spill slots have not been allocated yet, so estimateStackSize
   // won't include them.
   MachineFrameInfo *MFI = MF.getFrameInfo();
-  unsigned CFSize = estimateStackSize(MF) + 8 * (NumGPRSpilled + NumFPRSpilled);
+  unsigned CFSize =
+      MFI->estimateStackSize(MF) + 8 * (NumGPRSpilled + NumFPRSpilled);
   DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n");
   bool BigStack = (CFSize >= 256);
   if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))

Modified: llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.h?rev=234471&r1=234470&r2=234471&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.h (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.h Thu Apr  9 03:49:47 2015
@@ -22,7 +22,7 @@ class AArch64FrameLowering : public Targ
 public:
   explicit AArch64FrameLowering()
       : TargetFrameLowering(StackGrowsDown, 16, 0, 16,
-                            false /*StackRealignable*/) {}
+                            true /*StackRealignable*/) {}
 
   void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
                                  MachineBasicBlock::iterator MBBI,

Modified: llvm/trunk/lib/Target/AArch64/AArch64RegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64RegisterInfo.cpp?rev=234471&r1=234470&r2=234471&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64RegisterInfo.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64RegisterInfo.cpp Thu Apr  9 03:49:47 2015
@@ -165,7 +165,12 @@ bool AArch64RegisterInfo::hasBasePointer
   // large enough that referencing from the FP won't result in things being
   // in range relatively often, we can use a base pointer to allow access
   // from the other direction like the SP normally works.
+  // Furthermore, if both variable sized objects are present, and the
+  // stack needs to be dynamically re-aligned, the base pointer is the only
+  // reliable way to reference the locals.
   if (MFI->hasVarSizedObjects()) {
+    if (needsStackRealignment(MF))
+      return true;
     // Conservatively estimate whether the negative offset from the frame
     // pointer will be sufficient to reach. If a function has a smallish
     // frame, it's less likely to have lots of spills and callee saved
@@ -181,6 +186,31 @@ bool AArch64RegisterInfo::hasBasePointer
   return false;
 }
 
+bool AArch64RegisterInfo::canRealignStack(const MachineFunction &MF) const {
+
+  if (MF.getFunction()->hasFnAttribute("no-realign-stack"))
+    return false;
+
+  return true;
+}
+
+// FIXME: share this with other backends with identical implementation?
+bool
+AArch64RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const Function *F = MF.getFunction();
+  unsigned StackAlign = MF.getTarget()
+                            .getSubtargetImpl(*MF.getFunction())
+                            ->getFrameLowering()
+                            ->getStackAlignment();
+  bool requiresRealignment =
+      ((MFI->getMaxAlignment() > StackAlign) ||
+       F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+                                       Attribute::StackAlignment));
+
+  return requiresRealignment && canRealignStack(MF);
+}
+
 unsigned
 AArch64RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
   const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();

Modified: llvm/trunk/lib/Target/AArch64/AArch64RegisterInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64RegisterInfo.h?rev=234471&r1=234470&r2=234471&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64RegisterInfo.h (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64RegisterInfo.h Thu Apr  9 03:49:47 2015
@@ -93,6 +93,9 @@ public:
 
   unsigned getRegPressureLimit(const TargetRegisterClass *RC,
                                MachineFunction &MF) const override;
+  // Base pointer (stack realignment) support.
+  bool canRealignStack(const MachineFunction &MF) const;
+  bool needsStackRealignment(const MachineFunction &MF) const override;
 };
 
 } // end namespace llvm

Added: llvm/trunk/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll?rev=234471&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll (added)
+++ llvm/trunk/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll Thu Apr  9 03:49:47 2015
@@ -0,0 +1,491 @@
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+
+; This test aims to check basic correctness of frame layout &
+; frame access code. There are 8 functions in this test file,
+; each function implements one element in the cartesian product
+; of:
+; . a function having a VLA/noVLA
+; . a function with dynamic stack realignment/no dynamic stack realignment.
+; . a function needing a frame pionter/no frame pointer,
+; since the presence/absence of these has influence on the frame
+; layout and which pointer to use to access various part of the
+; frame (bp,sp,fp).
+;
+; Furthermore: in every test function:
+; . there is always one integer and 1 floating point argument to be able
+;   to check those are accessed correctly.
+; . there is always one local variable to check that is accessed
+;   correctly
+;
+; The LLVM-IR below was produced by clang on the following C++ code:
+;extern "C" int g();
+;extern "C" int novla_nodynamicrealign_call(int i1, int i2, int i3, int i4, int i5, int i6, int i7, int i8, int i9, int i10,
+;                                             double d1, double d2, double d3, double d4, double d5, double d6, double d7, double d8, double d9, double d10)
+;{
+;  // use an argument passed on the stack.
+;  volatile int l1;
+;  return i10 + (int)d10 + l1 + g();
+;}
+;extern "C" int novla_nodynamicrealign_nocall(int i1, int i2, int i3, int i4, int i5, int i6, int i7, int i8, int i9, int i10,
+;                                             double d1, double d2, double d3, double d4, double d5, double d6, double d7, double d8, double d9, double d10)
+;{
+;  // use an argument passed on the stack.
+;  volatile int l1;
+;  return i10 + (int)d10 + l1;
+;}
+;extern "C" int novla_dynamicrealign_call(int i1, int i2, int i3, int i4, int i5, int i6, int i7, int i8, int i9, int i10,
+;                                         double d1, double d2, double d3, double d4, double d5, double d6, double d7, double d8, double d9, double d10)
+;{
+;  // use an argument passed on the stack.
+;  alignas(128) volatile int l1;
+;  return i10 + (int)d10 + l1 + g();
+;}
+;extern "C" int novla_dynamicrealign_nocall(int i1, int i2, int i3, int i4, int i5, int i6, int i7, int i8, int i9, int i10,
+;                                           double d1, double d2, double d3, double d4, double d5, double d6, double d7, double d8, double d9, double d10)
+;{
+;  // use an argument passed on the stack.
+;  alignas(128) volatile int l1;
+;  return i10 + (int)d10 + l1;
+;}
+;
+;extern "C" int vla_nodynamicrealign_call(int i1, int i2, int i3, int i4, int i5, int i6, int i7, int i8, int i9, int i10,
+;                                         double d1, double d2, double d3, double d4, double d5, double d6, double d7, double d8, double d9, double d10)
+;{
+;  // use an argument passed on the stack.
+;  volatile int l1;
+;  volatile int vla[i1];
+;  return i10 + (int)d10 + l1 + g() + vla[0];
+;}
+;extern "C" int vla_nodynamicrealign_nocall(int i1, int i2, int i3, int i4, int i5, int i6, int i7, int i8, int i9, int i10,
+;                                           double d1, double d2, double d3, double d4, double d5, double d6, double d7, double d8, double d9, double d10)
+;{
+;  // use an argument passed on the stack.
+;  volatile int l1;
+;  volatile int vla[i1];
+;  return i10 + (int)d10 + l1 + vla[0];
+;}
+;extern "C" int vla_dynamicrealign_call(int i1, int i2, int i3, int i4, int i5, int i6, int i7, int i8, int i9, int i10,
+;                                       double d1, double d2, double d3, double d4, double d5, double d6, double d7, double d8, double d9, double d10)
+;{
+;  // use an argument passed on the stack.
+;  alignas(128) volatile int l1;
+;  volatile int vla[i1];
+;  return i10 + (int)d10 + l1 + g() + vla[0];
+;}
+;extern "C" int vla_dynamicrealign_nocall(int i1, int i2, int i3, int i4, int i5, int i6, int i7, int i8, int i9, int i10,
+;                                         double d1, double d2, double d3, double d4, double d5, double d6, double d7, double d8, double d9, double d10)
+;{
+;  // use an argument passed on the stack.
+;  alignas(128) volatile int l1;
+;  volatile int vla[i1];
+;  return i10 + (int)d10 + l1 + vla[0];
+;}
+
+
+
+define i32 @novla_nodynamicrealign_call(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #0 {
+entry:
+  %l1 = alloca i32, align 4
+  %conv = fptosi double %d10 to i32
+  %add = add nsw i32 %conv, %i10
+  %l1.0.l1.0. = load volatile i32, i32* %l1, align 4
+  %add1 = add nsw i32 %add, %l1.0.l1.0.
+  %call = tail call i32 @g()
+  %add2 = add nsw i32 %add1, %call
+  ret i32 %add2
+}
+; CHECK-LABEL: novla_nodynamicrealign_call
+; CHECK: .cfi_startproc
+;   Check that used callee-saved registers are saved
+; CHECK: stp	x20, x19, [sp, #-32]!
+;   Check that the frame pointer is created:
+; CHECK: stp	x29, x30, [sp, #16]
+; CHECK: add	x29, sp, #16
+;   Check correctness of cfi pseudo-instructions
+; CHECK: .cfi_def_cfa w29, 16
+; CHECK: .cfi_offset w30, -8
+; CHECK: .cfi_offset w29, -16
+; CHECK: .cfi_offset w19, -24
+; CHECK: .cfi_offset w20, -32
+;   Check correct access to arguments passed on the stack, through frame pointer
+; CHECK: ldr	d[[DARG:[0-9]+]], [x29, #40]
+; CHECK: ldr	w[[IARG:[0-9]+]], [x29, #24]
+;   Check correct access to local variable on the stack, through stack pointer
+; CHECK: ldr	w[[ILOC:[0-9]+]], [sp, #12]
+;   Check epilogue:
+; CHECK: ldp	x29, x30, [sp, #16]
+; CHECK: ldp	x20, x19, [sp], #32
+; CHECK: ret
+; CHECK: .cfi_endproc
+
+
+declare i32 @g() #0
+
+; Function Attrs: nounwind
+define i32 @novla_nodynamicrealign_nocall(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #1 {
+entry:
+  %l1 = alloca i32, align 4
+  %conv = fptosi double %d10 to i32
+  %add = add nsw i32 %conv, %i10
+  %l1.0.l1.0. = load volatile i32, i32* %l1, align 4
+  %add1 = add nsw i32 %add, %l1.0.l1.0.
+  ret i32 %add1
+}
+; CHECK-LABEL: novla_nodynamicrealign_nocall
+;   Check that space is reserved for one local variable on the stack.
+; CHECK:	sub	sp, sp, #16             // =16
+;   Check correct access to arguments passed on the stack, through stack pointer
+; CHECK: ldr	d[[DARG:[0-9]+]], [sp, #40]
+; CHECK: ldr	w[[IARG:[0-9]+]], [sp, #24]
+;   Check correct access to local variable on the stack, through stack pointer
+; CHECK: ldr	w[[ILOC:[0-9]+]], [sp, #12]
+;   Check epilogue:
+; CHECK: add	sp, sp, #16             // =16
+; CHECK: ret
+
+
+define i32 @novla_dynamicrealign_call(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #0 {
+entry:
+  %l1 = alloca i32, align 128
+  %conv = fptosi double %d10 to i32
+  %add = add nsw i32 %conv, %i10
+  %l1.0.l1.0. = load volatile i32, i32* %l1, align 128
+  %add1 = add nsw i32 %add, %l1.0.l1.0.
+  %call = tail call i32 @g()
+  %add2 = add nsw i32 %add1, %call
+  ret i32 %add2
+}
+
+; CHECK-LABEL: novla_dynamicrealign_call
+; CHECK: .cfi_startproc
+;   Check that used callee-saved registers are saved
+; CHECK: stp	x20, x19, [sp, #-32]!
+;   Check that the frame pointer is created:
+; CHECK: stp	x29, x30, [sp, #16]
+; CHECK: add	x29, sp, #16
+;   Check the dynamic realignment of the stack pointer to a 128-byte boundary
+; CHECK: sub	x9, sp, #96
+; CHECK: and	sp, x9, #0xffffffffffffff80
+;   Check correctness of cfi pseudo-instructions
+; CHECK: .cfi_def_cfa w29, 16
+; CHECK: .cfi_offset w30, -8
+; CHECK: .cfi_offset w29, -16
+; CHECK: .cfi_offset w19, -24
+; CHECK: .cfi_offset w20, -32
+;   Check correct access to arguments passed on the stack, through frame pointer
+; CHECK: ldr	d[[DARG:[0-9]+]], [x29, #40]
+; CHECK: ldr	w[[IARG:[0-9]+]], [x29, #24]
+;   Check correct access to local variable on the stack, through re-aligned stack pointer
+; CHECK: ldr	w[[ILOC:[0-9]+]], [sp]
+;   Check epilogue:
+;     Check that stack pointer get restored from frame pointer.
+; CHECK: sub	sp, x29, #16            // =16
+; CHECK: ldp	x29, x30, [sp, #16]
+; CHECK: ldp	x20, x19, [sp], #32
+; CHECK: ret
+; CHECK: .cfi_endproc
+
+
+; Function Attrs: nounwind
+define i32 @novla_dynamicrealign_nocall(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #1 {
+entry:
+  %l1 = alloca i32, align 128
+  %conv = fptosi double %d10 to i32
+  %add = add nsw i32 %conv, %i10
+  %l1.0.l1.0. = load volatile i32, i32* %l1, align 128
+  %add1 = add nsw i32 %add, %l1.0.l1.0.
+  ret i32 %add1
+}
+
+; CHECK-LABEL: novla_dynamicrealign_nocall
+;   Check that the frame pointer is created:
+; CHECK: stp	x29, x30, [sp, #-16]!
+; CHECK: mov	x29, sp
+;   Check the dynamic realignment of the stack pointer to a 128-byte boundary
+; CHECK: sub	x9, sp, #112
+; CHECK: and	sp, x9, #0xffffffffffffff80
+;   Check correct access to arguments passed on the stack, through frame pointer
+; CHECK: ldr	d[[DARG:[0-9]+]], [x29, #40]
+; CHECK: ldr	w[[IARG:[0-9]+]], [x29, #24]
+;   Check correct access to local variable on the stack, through re-aligned stack pointer
+; CHECK: ldr	w[[ILOC:[0-9]+]], [sp]
+;   Check epilogue:
+;     Check that stack pointer get restored from frame pointer.
+; CHECK: mov	sp, x29
+; CHECK: ldp	x29, x30, [sp], #16
+; CHECK: ret
+
+
+define i32 @vla_nodynamicrealign_call(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #0 {
+entry:
+  %l1 = alloca i32, align 4
+  %0 = zext i32 %i1 to i64
+  %vla = alloca i32, i64 %0, align 4
+  %conv = fptosi double %d10 to i32
+  %add = add nsw i32 %conv, %i10
+  %l1.0.l1.0. = load volatile i32, i32* %l1, align 4
+  %add1 = add nsw i32 %add, %l1.0.l1.0.
+  %call = tail call i32 @g()
+  %add2 = add nsw i32 %add1, %call
+  %1 = load volatile i32, i32* %vla, align 4, !tbaa !1
+  %add3 = add nsw i32 %add2, %1
+  ret i32 %add3
+}
+
+; CHECK-LABEL: vla_nodynamicrealign_call
+; CHECK: .cfi_startproc
+;   Check that used callee-saved registers are saved
+; CHECK: stp	x20, x19, [sp, #-32]!
+;   Check that the frame pointer is created:
+; CHECK: stp	x29, x30, [sp, #16]
+; CHECK: add	x29, sp, #16
+;   Check that space is reserved on the stack for the local variable,
+;   rounded up to a multiple of 16 to keep the stack pointer 16-byte aligned.
+; CHECK: sub	sp, sp, #16
+;   Check correctness of cfi pseudo-instructions
+; CHECK: .cfi_def_cfa w29, 16
+; CHECK: .cfi_offset w30, -8
+; CHECK: .cfi_offset w29, -16
+; CHECK: .cfi_offset w19, -24
+; CHECK: .cfi_offset w20, -32
+;   Check correct access to arguments passed on the stack, through frame pointer
+; CHECK: ldr	w[[IARG:[0-9]+]], [x29, #24]
+; CHECK: ldr	d[[DARG:[0-9]+]], [x29, #40]
+;   Check correct reservation of 16-byte aligned VLA (size in w0) on stack
+; CHECK: ubfx	x9, x0, #0, #32
+; CHECK: lsl	x9, x9, #2
+; CHECK: add	x9, x9, #15
+; CHECK: and	x9, x9, #0xfffffffffffffff0
+; CHECK: mov	 x10, sp
+; CHECK: sub	 x[[VLASPTMP:[0-9]+]], x10, x9
+; CHECK: mov	 sp, x[[VLASPTMP]]
+;   Check correct access to local variable, through frame pointer
+; CHECK: ldur	w[[ILOC:[0-9]+]], [x29, #-20]
+;   Check correct accessing of the VLA variable through the base pointer
+; CHECK: ldr	w[[VLA:[0-9]+]], [x[[VLASPTMP]]]
+;   Check epilogue:
+;     Check that stack pointer get restored from frame pointer.
+; CHECK: sub	sp, x29, #16            // =16
+; CHECK: ldp	x29, x30, [sp, #16]
+; CHECK: ldp	x20, x19, [sp], #32
+; CHECK: ret
+; CHECK: .cfi_endproc
+
+
+; Function Attrs: nounwind
+define i32 @vla_nodynamicrealign_nocall(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #1 {
+entry:
+  %l1 = alloca i32, align 4
+  %0 = zext i32 %i1 to i64
+  %vla = alloca i32, i64 %0, align 4
+  %conv = fptosi double %d10 to i32
+  %add = add nsw i32 %conv, %i10
+  %l1.0.l1.0. = load volatile i32, i32* %l1, align 4
+  %add1 = add nsw i32 %add, %l1.0.l1.0.
+  %1 = load volatile i32, i32* %vla, align 4, !tbaa !1
+  %add2 = add nsw i32 %add1, %1
+  ret i32 %add2
+}
+
+; CHECK-LABEL: vla_nodynamicrealign_nocall
+;   Check that the frame pointer is created:
+; CHECK: stp	x29, x30, [sp, #-16]!
+; CHECK: mov	x29, sp
+;   Check that space is reserved on the stack for the local variable,
+;   rounded up to a multiple of 16 to keep the stack pointer 16-byte aligned.
+; CHECK: sub	sp, sp, #16
+;   Check correctness of cfi pseudo-instructions
+;   Check correct access to arguments passed on the stack, through frame pointer
+; CHECK: ldr	w[[IARG:[0-9]+]], [x29, #24]
+; CHECK: ldr	d[[DARG:[0-9]+]], [x29, #40]
+;   Check correct reservation of 16-byte aligned VLA (size in w0) on stack
+; CHECK: ubfx	x9, x0, #0, #32
+; CHECK: lsl	x9, x9, #2
+; CHECK: add	x9, x9, #15
+; CHECK: and	x9, x9, #0xfffffffffffffff0
+; CHECK: mov	 x10, sp
+; CHECK: sub	 x[[VLASPTMP:[0-9]+]], x10, x9
+; CHECK: mov	 sp, x[[VLASPTMP]]
+;   Check correct access to local variable, through frame pointer
+; CHECK: ldur	w[[ILOC:[0-9]+]], [x29, #-4]
+;   Check correct accessing of the VLA variable through the base pointer
+; CHECK: ldr	w[[VLA:[0-9]+]], [x[[VLASPTMP]]]
+;   Check epilogue:
+;     Check that stack pointer get restored from frame pointer.
+; CHECK: mov    sp, x29
+; CHECK: ldp	x29, x30, [sp], #16
+; CHECK: ret
+
+
+define i32 @vla_dynamicrealign_call(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #0 {
+entry:
+  %l1 = alloca i32, align 128
+  %0 = zext i32 %i1 to i64
+  %vla = alloca i32, i64 %0, align 4
+  %conv = fptosi double %d10 to i32
+  %add = add nsw i32 %conv, %i10
+  %l1.0.l1.0. = load volatile i32, i32* %l1, align 128
+  %add1 = add nsw i32 %add, %l1.0.l1.0.
+  %call = tail call i32 @g()
+  %add2 = add nsw i32 %add1, %call
+  %1 = load volatile i32, i32* %vla, align 4, !tbaa !1
+  %add3 = add nsw i32 %add2, %1
+  ret i32 %add3
+}
+
+; CHECK-LABEL: vla_dynamicrealign_call
+; CHECK: .cfi_startproc
+;   Check that used callee-saved registers are saved
+; CHECK: stp	x22, x21, [sp, #-48]!
+; CHECK: stp	x20, x19, [sp, #16]
+;   Check that the frame pointer is created:
+; CHECK: stp	x29, x30, [sp, #32]
+; CHECK: add	x29, sp, #32
+;   Check that the stack pointer gets re-aligned to 128
+;   bytes & the base pointer (x19) gets initialized to
+;   this 128-byte aligned area for local variables &
+;   spill slots
+; CHECK: sub	x9, sp, #80            // =80
+; CHECK: and	sp, x9, #0xffffffffffffff80
+; CHECK: mov    x19, sp
+;   Check correctness of cfi pseudo-instructions
+; CHECK: .cfi_def_cfa w29, 16
+; CHECK: .cfi_offset w30, -8
+; CHECK: .cfi_offset w29, -16
+; CHECK: .cfi_offset w19, -24
+; CHECK: .cfi_offset w20, -32
+; CHECK: .cfi_offset w21, -40
+; CHECK: .cfi_offset w22, -48
+;   Check correct access to arguments passed on the stack, through frame pointer
+; CHECK: ldr	w[[IARG:[0-9]+]], [x29, #24]
+; CHECK: ldr	d[[DARG:[0-9]+]], [x29, #40]
+;   Check correct reservation of 16-byte aligned VLA (size in w0) on stack
+;   and set-up of base pointer (x19).
+; CHECK: ubfx	x9, x0, #0, #32
+; CHECK: lsl	x9, x9, #2
+; CHECK: add	x9, x9, #15
+; CHECK: and	x9, x9, #0xfffffffffffffff0
+; CHECK: mov	 x10, sp
+; CHECK: sub	 x[[VLASPTMP:[0-9]+]], x10, x9
+; CHECK: mov	 sp, x[[VLASPTMP]]
+;   Check correct access to local variable, through base pointer
+; CHECK: ldr	w[[ILOC:[0-9]+]], [x19]
+; CHECK: ldr	 w[[VLA:[0-9]+]], [x[[VLASPTMP]]]
+;   Check epilogue:
+;     Check that stack pointer get restored from frame pointer.
+; CHECK: sub	sp, x29, #32
+; CHECK: ldp	x29, x30, [sp, #32]
+; CHECK: ldp	x20, x19, [sp, #16]
+; CHECK: ldp	x22, x21, [sp], #48
+; CHECK: ret
+; CHECK: .cfi_endproc
+
+
+; Function Attrs: nounwind
+define i32 @vla_dynamicrealign_nocall(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #1 {
+entry:
+  %l1 = alloca i32, align 128
+  %0 = zext i32 %i1 to i64
+  %vla = alloca i32, i64 %0, align 4
+  %conv = fptosi double %d10 to i32
+  %add = add nsw i32 %conv, %i10
+  %l1.0.l1.0. = load volatile i32, i32* %l1, align 128
+  %add1 = add nsw i32 %add, %l1.0.l1.0.
+  %1 = load volatile i32, i32* %vla, align 4, !tbaa !1
+  %add2 = add nsw i32 %add1, %1
+  ret i32 %add2
+}
+
+; CHECK-LABEL: vla_dynamicrealign_nocall
+;   Check that used callee-saved registers are saved
+; CHECK: stp	x20, x19, [sp, #-32]!
+;   Check that the frame pointer is created:
+; CHECK: stp	x29, x30, [sp, #16]
+; CHECK: add	x29, sp, #16
+;   Check that the stack pointer gets re-aligned to 128
+;   bytes & the base pointer (x19) gets initialized to
+;   this 128-byte aligned area for local variables &
+;   spill slots
+; CHECK: sub	x9, sp, #96
+; CHECK: and	sp, x9, #0xffffffffffffff80
+; CHECK: mov    x19, sp
+;   Check correct access to arguments passed on the stack, through frame pointer
+; CHECK: ldr	w[[IARG:[0-9]+]], [x29, #24]
+; CHECK: ldr	d[[DARG:[0-9]+]], [x29, #40]
+;   Check correct reservation of 16-byte aligned VLA (size in w0) on stack
+;   and set-up of base pointer (x19).
+; CHECK: ubfx	x9, x0, #0, #32
+; CHECK: lsl	x9, x9, #2
+; CHECK: add	x9, x9, #15
+; CHECK: and	x9, x9, #0xfffffffffffffff0
+; CHECK: mov	 x10, sp
+; CHECK: sub	 x[[VLASPTMP:[0-9]+]], x10, x9
+; CHECK: mov	 sp, x[[VLASPTMP]]
+;   Check correct access to local variable, through base pointer
+; CHECK: ldr	w[[ILOC:[0-9]+]], [x19]
+; CHECK: ldr	 w[[VLA:[0-9]+]], [x[[VLASPTMP]]]
+;   Check epilogue:
+;     Check that stack pointer get restored from frame pointer.
+; CHECK: sub	sp, x29, #16
+; CHECK: ldp	x29, x30, [sp, #16]
+; CHECK: ldp	x20, x19, [sp], #32
+; CHECK: ret
+
+
+; Function Attrs: nounwind
+define i32 @vla_dynamicrealign_nocall_large_align(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #1 {
+entry:
+  %l1 = alloca i32, align 32768
+  %0 = zext i32 %i1 to i64
+  %vla = alloca i32, i64 %0, align 4
+  %conv = fptosi double %d10 to i32
+  %add = add nsw i32 %conv, %i10
+  %l1.0.l1.0. = load volatile i32, i32* %l1, align 32768
+  %add1 = add nsw i32 %add, %l1.0.l1.0.
+  %1 = load volatile i32, i32* %vla, align 4, !tbaa !1
+  %add2 = add nsw i32 %add1, %1
+  ret i32 %add2
+}
+
+; CHECK-LABEL: vla_dynamicrealign_nocall_large_align
+;   Check that used callee-saved registers are saved
+; CHECK: stp	x20, x19, [sp, #-32]!
+;   Check that the frame pointer is created:
+; CHECK: stp	x29, x30, [sp, #16]
+; CHECK: add	x29, sp, #16
+;   Check that the stack pointer gets re-aligned to 128
+;   bytes & the base pointer (x19) gets initialized to
+;   this 128-byte aligned area for local variables &
+;   spill slots
+; CHECK: sub	x9, sp, #7, lsl #12
+; CHECK: and	sp, x9, #0xffffffffffff8000
+; CHECK: mov    x19, sp
+;   Check correct access to arguments passed on the stack, through frame pointer
+; CHECK: ldr	w[[IARG:[0-9]+]], [x29, #24]
+; CHECK: ldr	d[[DARG:[0-9]+]], [x29, #40]
+;   Check correct reservation of 16-byte aligned VLA (size in w0) on stack
+;   and set-up of base pointer (x19).
+; CHECK: ubfx	x9, x0, #0, #32
+; CHECK: lsl	x9, x9, #2
+; CHECK: add	x9, x9, #15
+; CHECK: and	x9, x9, #0xfffffffffffffff0
+; CHECK: mov	 x10, sp
+; CHECK: sub	 x[[VLASPTMP:[0-9]+]], x10, x9
+; CHECK: mov	 sp, x[[VLASPTMP]]
+;   Check correct access to local variable, through base pointer
+; CHECK: ldr	w[[ILOC:[0-9]+]], [x19]
+; CHECK: ldr	 w[[VLA:[0-9]+]], [x[[VLASPTMP]]]
+;   Check epilogue:
+;     Check that stack pointer get restored from frame pointer.
+; CHECK: sub	sp, x29, #16
+; CHECK: ldp	x29, x30, [sp, #16]
+; CHECK: ldp	x20, x19, [sp], #32
+; CHECK: ret
+
+attributes #0 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!1 = !{!2, !2, i64 0}
+!2 = !{!"int", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}





More information about the llvm-commits mailing list