[llvm-commits] [llvm] r82767 - in /llvm/trunk: include/llvm/Target/TargetFrameInfo.h lib/CodeGen/PrologEpilogInserter.cpp lib/Target/ARM/ARMFrameInfo.h test/CodeGen/ARM/2009-09-24-spill-align.ll

Bob Wilson bob.wilson at apple.com
Fri Sep 25 07:41:49 PDT 2009


Author: bwilson
Date: Fri Sep 25 09:41:49 2009
New Revision: 82767

URL: http://llvm.org/viewvc/llvm-project?rev=82767&view=rev
Log:
pr4926: ARM requires the stack pointer to be aligned, even for leaf functions.
For the AAPCS ABI, SP must always be 4-byte aligned, and at any "public
interface" it must be 8-byte aligned.  For the older ARM APCS ABI, the stack
alignment is just always 4 bytes.  For X86, we currently align SP at
entry to a function (e.g., to 16 bytes for Darwin), but no stack alignment
is needed at other times, such as for a leaf function.

After discussing this with Dan, I decided to go with the approach of adding
a new "TransientStackAlignment" field to TargetFrameInfo.  This value
specifies the stack alignment that must be maintained even in between calls.
It defaults to 1 except for ARM, where it is 4.  (Some other targets may
also want to set this if they have similar stack requirements. It's not
currently required for PPC because it sets targetHandlesStackFrameRounding
and handles the alignment in target-specific code.) The existing StackAlignment
value specifies the alignment upon entry to a function, which is how we've
been using it anyway.

Added:
    llvm/trunk/test/CodeGen/ARM/2009-09-24-spill-align.ll
Modified:
    llvm/trunk/include/llvm/Target/TargetFrameInfo.h
    llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp
    llvm/trunk/lib/Target/ARM/ARMFrameInfo.h

Modified: llvm/trunk/include/llvm/Target/TargetFrameInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetFrameInfo.h?rev=82767&r1=82766&r2=82767&view=diff

==============================================================================
--- llvm/trunk/include/llvm/Target/TargetFrameInfo.h (original)
+++ llvm/trunk/include/llvm/Target/TargetFrameInfo.h Fri Sep 25 09:41:49 2009
@@ -34,10 +34,13 @@
 private:
   StackDirection StackDir;
   unsigned StackAlignment;
+  unsigned TransientStackAlignment;
   int LocalAreaOffset;
 public:
-  TargetFrameInfo(StackDirection D, unsigned StackAl, int LAO)
-    : StackDir(D), StackAlignment(StackAl), LocalAreaOffset(LAO) {}
+  TargetFrameInfo(StackDirection D, unsigned StackAl, int LAO,
+                  unsigned TransAl = 1)
+    : StackDir(D), StackAlignment(StackAl), TransientStackAlignment(TransAl),
+      LocalAreaOffset(LAO) {}
 
   virtual ~TargetFrameInfo();
 
@@ -48,12 +51,20 @@
   ///
   StackDirection getStackGrowthDirection() const { return StackDir; }
 
-  /// getStackAlignment - This method returns the number of bytes that the stack
-  /// pointer must be aligned to.  Typically, this is the largest alignment for
-  /// any data object in the target.
+  /// getStackAlignment - This method returns the number of bytes to which the
+  /// stack pointer must be aligned on entry to a function.  Typically, this
+  /// is the largest alignment for any data object in the target.
   ///
   unsigned getStackAlignment() const { return StackAlignment; }
 
+  /// getTransientStackAlignment - This method returns the number of bytes to
+  /// which the stack pointer must be aligned at all times, even between
+  /// calls.
+  ///
+  unsigned getTransientStackAlignment() const {
+    return TransientStackAlignment;
+  }
+
   /// getOffsetOfLocalArea - This method returns the offset of the local area
   /// from the stack pointer on entrance to a function.
   ///

Modified: llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp?rev=82767&r1=82766&r2=82767&view=diff

==============================================================================
--- llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp (original)
+++ llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp Fri Sep 25 09:41:49 2009
@@ -484,7 +484,7 @@
   // Loop over all of the stack objects, assigning sequential addresses...
   MachineFrameInfo *FFI = Fn.getFrameInfo();
 
-  unsigned MaxAlign = FFI->getMaxAlignment();
+  unsigned MaxAlign = 1;
 
   // Start at the beginning of the local area.
   // The Offset is the distance from the stack top in the direction
@@ -586,23 +586,28 @@
       AdjustStackOffset(FFI, SFI, StackGrowsDown, Offset, MaxAlign);
   }
 
-  // Round up the size to a multiple of the alignment, but only if there are
-  // calls or alloca's in the function.  This ensures that any calls to
-  // subroutines have their stack frames suitably aligned.
-  // Also do this if we need runtime alignment of the stack.  In this case
-  // offsets will be relative to SP not FP; round up the stack size so this
-  // works.
-  if (!RegInfo->targetHandlesStackFrameRounding() &&
-      (FFI->hasCalls() || FFI->hasVarSizedObjects() ||
-       (RegInfo->needsStackRealignment(Fn) &&
-        FFI->getObjectIndexEnd() != 0))) {
+  if (!RegInfo->targetHandlesStackFrameRounding()) {
     // If we have reserved argument space for call sites in the function
     // immediately on entry to the current function, count it as part of the
     // overall stack size.
-    if (RegInfo->hasReservedCallFrame(Fn))
+    if (FFI->hasCalls() && RegInfo->hasReservedCallFrame(Fn))
       Offset += FFI->getMaxCallFrameSize();
 
-    unsigned AlignMask = std::max(TFI.getStackAlignment(), MaxAlign) - 1;
+    // Round up the size to a multiple of the alignment.  If the function has
+    // any calls or alloca's, align to the target's StackAlignment value to
+    // ensure that the callee's frame or the alloca data is suitably aligned;
+    // otherwise, for leaf functions, align to the TransientStackAlignment
+    // value.
+    unsigned StackAlign;
+    if (FFI->hasCalls() || FFI->hasVarSizedObjects() ||
+        (RegInfo->needsStackRealignment(Fn) && FFI->getObjectIndexEnd() != 0))
+      StackAlign = TFI.getStackAlignment();
+    else
+      StackAlign = TFI.getTransientStackAlignment();
+    // If the frame pointer is eliminated, all frame offsets will be relative
+    // to SP not FP; align to MaxAlign so this works.
+    StackAlign = std::max(StackAlign, MaxAlign);
+    unsigned AlignMask = StackAlign - 1;
     Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
   }
 
@@ -611,7 +616,8 @@
 
   // Remember the required stack alignment in case targets need it to perform
   // dynamic stack alignment.
-  FFI->setMaxAlignment(MaxAlign);
+  if (MaxAlign > FFI->getMaxAlignment())
+    FFI->setMaxAlignment(MaxAlign);
 }
 
 

Modified: llvm/trunk/lib/Target/ARM/ARMFrameInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMFrameInfo.h?rev=82767&r1=82766&r2=82767&view=diff

==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMFrameInfo.h (original)
+++ llvm/trunk/lib/Target/ARM/ARMFrameInfo.h Fri Sep 25 09:41:49 2009
@@ -23,7 +23,7 @@
 class ARMFrameInfo : public TargetFrameInfo {
 public:
   explicit ARMFrameInfo(const ARMSubtarget &ST)
-    : TargetFrameInfo(StackGrowsDown, ST.getStackAlignment(), 0) {
+    : TargetFrameInfo(StackGrowsDown, ST.getStackAlignment(), 0, 4) {
   }
 };
 

Added: llvm/trunk/test/CodeGen/ARM/2009-09-24-spill-align.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/2009-09-24-spill-align.ll?rev=82767&view=auto

==============================================================================
--- llvm/trunk/test/CodeGen/ARM/2009-09-24-spill-align.ll (added)
+++ llvm/trunk/test/CodeGen/ARM/2009-09-24-spill-align.ll Fri Sep 25 09:41:49 2009
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; pr4926
+
+define arm_apcscc void @test_vget_lanep16() nounwind {
+entry:
+  %arg0_poly16x4_t = alloca <4 x i16>             ; <<4 x i16>*> [#uses=1]
+  %out_poly16_t = alloca i16                      ; <i16*> [#uses=1]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+; CHECK: fldd
+  %0 = load <4 x i16>* %arg0_poly16x4_t, align 8  ; <<4 x i16>> [#uses=1]
+  %1 = extractelement <4 x i16> %0, i32 1         ; <i16> [#uses=1]
+  store i16 %1, i16* %out_poly16_t, align 2
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}





More information about the llvm-commits mailing list