[PATCH] D12337: [Codegen] Ensure stack is properly aligned for call argument initialization

Tue Aug 25 14:39:56 PDT 2015

jketema created this revision.
jketema added a reviewer: rnk.
jketema added a subscriber: llvm-commits.
Herald added a subscriber: qcolombet.

Arguments spilled on the stack before a function call may have
alignment requirements, for example in the case of vectors.
These requirements are exploited by the code generator by using
move instructions that have similar alignment requirements, e.g.,
movaps on x86.

Although the code generator properly aligns the arguments with
respect to the displacement of the stack pointer it computes,
the displacement itself may cause misalignment. For example if
we have

  %3 = load <16 x float>, <16 x float>* %1, align 64
  call void @bar(<16 x float> %3, i32 0)

The x86 back-end emits:

  movaps  32(%ecx), %xmm2
  movaps  (%ecx), %xmm0
  movaps  16(%ecx), %xmm1
  movaps  48(%ecx), %xmm3
  subl    $20, %esp       <-- if %esp was 16-byte aligned before this instruction, it no longer will be afterwards 
  movaps  %xmm3, (%esp)   <-- movaps requires 16-byte alignment, while %esp is not aligned as such.
  movl    $0, 16(%esp)
  calll   __bar

To solve this, we need to make sure that the computed value with which
the stack pointer is changed is a multiple af the maximal alignment seen
during its computation. With this change we get proper alignment:

  subl    $32, %esp
  movaps  %xmm3, (%esp)

http://reviews.llvm.org/D12337

Files:
  include/llvm/CodeGen/CallingConvLower.h
  lib/CodeGen/CallingConvLower.cpp
  test/CodeGen/X86/aligned-variadic.ll
  test/CodeGen/X86/win32-spill-xmm.ll

Index: test/CodeGen/X86/win32-spill-xmm.ll
===================================================================

--- test/CodeGen/X86/win32-spill-xmm.ll
+++ test/CodeGen/X86/win32-spill-xmm.ll
@@ -0,0 +1,14 @@
+; RUN: llc -mcpu=generic -mtriple=i686-pc-windows-msvc -mattr=+sse < %s | FileCheck %s
+; CHECK: subl    $32, %esp
+; CHECK: movaps  %xmm3, (%esp)
+; CHECK: movl    $0, 16(%esp)
+
+declare void @bar(<16 x float> %a, i32 %b) nounwind
+
+define void @foo(i32, <16 x float> * nocapture readonly) nounwind {
+entry:
+  %2 = alloca i32, i32 %0
+  %3 = load <16 x float>, <16 x float> * %1, align 64
+  tail call void @bar(<16 x float> %3, i32 0) nounwind
+  ret void
+}
Index: test/CodeGen/X86/aligned-variadic.ll
===================================================================
--- test/CodeGen/X86/aligned-variadic.ll
+++ test/CodeGen/X86/aligned-variadic.ll
@@ -15,7 +15,7 @@
   %overflow_arg_area = load i8*, i8** %overflow_arg_area_p, align 8
   %overflow_arg_area.next = getelementptr i8, i8* %overflow_arg_area, i64 24
   store i8* %overflow_arg_area.next, i8** %overflow_arg_area_p, align 8
-; X32: leal    68(%esp), [[REG:%.*]]
+; X32: leal    72(%esp), [[REG:%.*]]
 ; X32: movl    [[REG]], 16(%esp)
 ; X64: leaq    232(%rsp), [[REG:%.*]]
 ; X64: movq    [[REG]], 184(%rsp)
Index: lib/CodeGen/CallingConvLower.cpp
===================================================================
--- lib/CodeGen/CallingConvLower.cpp
+++ lib/CodeGen/CallingConvLower.cpp
@@ -32,6 +32,7 @@
       CallOrPrologue(Unknown) {
   // No stack is used.
   StackOffset = 0;
+  MinStackAlign = 1;
 
   clearByValRegsInfo();
   UsedRegs.resize((TRI.getNumRegs()+31)/32);
@@ -192,6 +193,7 @@
 void CCState::getRemainingRegParmsForType(SmallVectorImpl<MCPhysReg> &Regs,
                                           MVT VT, CCAssignFn Fn) {
   unsigned SavedStackOffset = StackOffset;
+  unsigned SavedMinStackAlign = MinStackAlign;
   unsigned NumLocs = Locs.size();
 
   // Set the 'inreg' flag if it is used for this calling convention.
@@ -223,6 +225,7 @@
   // as allocated so that future queries don't return the same registers, i.e.
   // when i64 and f64 are both passed in GPRs.
   StackOffset = SavedStackOffset;
+  MinStackAlign = SavedMinStackAlign;
   Locs.resize(NumLocs);
 }
 
Index: include/llvm/CodeGen/CallingConvLower.h
===================================================================
--- include/llvm/CodeGen/CallingConvLower.h
+++ include/llvm/CodeGen/CallingConvLower.h
@@ -201,6 +201,7 @@
   LLVMContext &Context;
 
   unsigned StackOffset;
+  unsigned MinStackAlign;
   SmallVector<uint32_t, 16> UsedRegs;
   SmallVector<CCValAssign, 4> PendingLocs;
 
@@ -270,7 +271,9 @@
   CallingConv::ID getCallingConv() const { return CallingConv; }
   bool isVarArg() const { return IsVarArg; }
 
-  unsigned getNextStackOffset() const { return StackOffset; }
+  unsigned getNextStackOffset() const {
+    return ((StackOffset + MinStackAlign - 1) & ~(MinStackAlign - 1));
+  }
 
   /// isAllocated - Return true if the specified register (or an alias) is
   /// allocated.
@@ -403,6 +406,7 @@
     StackOffset = ((StackOffset + Align - 1) & ~(Align - 1));
     unsigned Result = StackOffset;
     StackOffset += Size;
+    MinStackAlign = Align > MinStackAlign ? Align : MinStackAlign;
     MF.getFrameInfo()->ensureMaxAlignment(Align);
     return Result;
   }


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D12337.33126.patch
Type: text/x-patch
Size: 3365 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150825/b8ba2c51/attachment.bin>