[PATCH] [Codegen] Ensure stack is properly aligned for call argument initialization

Tue Aug 11 02:58:23 PDT 2015

Ping.

Jeroen

On 05/08/2015 11:07, Jeroen Ketema wrote:
> Arguments spilled on the stack before a function call may have
> alignment requirements, for example in the case of vectors.
> These requirements are exploited by the code generator by using
> move instructions that have similar alignment requirements, e.g.,
> movaps on x86.
>
> Although the code generator properly aligns the arguments with
> respect to the displacement of the stack pointer it computes,
> the displacement itself may cause misalignment. For example if
> we have
>
>   %3 = load <16 x float>, <16 x float>* %1, align 64
>   call void @bar(<16 x float> %3, i32 0)
>
> The x86 back-end emits:
>
>   movaps  32(%ecx), %xmm2
>   movaps  (%ecx), %xmm0
>   movaps  16(%ecx), %xmm1
>   movaps  48(%ecx), %xmm3
>   subl    $20, %esp       <-- if %esp was 16-byte aligned before this
>                               instruction, it no longer will be 
> afterwards
>   movaps  %xmm3, (%esp)   <-- movaps requires 16-byte alignment, while 
> %esp
>                               is not aligned as such.
>   movl    $0, 16(%esp)
>   calll   __bar
>
> To solve this, we need to make sure that the computed value with which
> the stack pointer is changed is a multiple af the maximal alignment seen
> during its computation. With this change we get proper alignment:
>
>   subl    $32, %esp
>   movaps  %xmm3, (%esp)

-- 
Jeroen Ketema
Senior Software Engineer, Compilers
Codeplay Software Ltd.
45 York Place, Edinburgh, EH1 3HP
Tel: 0131 466 0503
Fax: 0131 557 6600
Website: http://www.codeplay.com
Twitter: https://twitter.com/codeplaysoft

This email and any attachments may contain confidential and /or privileged
information and is for use by the addressee only. If you are not the intended
recipient, please notify Codeplay Software Ltd immediately and delete the message
from your computer. You may not copy or forward it,or use or disclose its contents
to any other person. Any views or other information in this message which do not
relate to our business are not authorized by Codeplay software Ltd, nor does this
message form part of any contract unless so stated.

As internet communications are capable of data corruption Codeplay Software Ltd does
not accept any responsibility for any changes made to this message after it was
sent. Please note that Codeplay Software Ltd does not accept any liability or
responsibility for viruses and it is your responsibility to scan any attachments.

Company registered in England and Wales, number: 04567874
Registered office: 81 Linkfield Street, Redhill RH1 6BY

-------------- next part --------------

diff --git a/include/llvm/CodeGen/CallingConvLower.h b/include/llvm/CodeGen/CallingConvLower.h
index 1fd4eeb..79f83ba 100644
--- a/include/llvm/CodeGen/CallingConvLower.h
+++ b/include/llvm/CodeGen/CallingConvLower.h
@@ -201,6 +201,7 @@ private:
   LLVMContext &Context;
 
   unsigned StackOffset;
+  unsigned MinStackAlign;
   SmallVector<uint32_t, 16> UsedRegs;
   SmallVector<CCValAssign, 4> PendingLocs;
 
@@ -270,7 +271,9 @@ public:
   CallingConv::ID getCallingConv() const { return CallingConv; }
   bool isVarArg() const { return IsVarArg; }
 
-  unsigned getNextStackOffset() const { return StackOffset; }
+  unsigned getNextStackOffset() const {
+    return ((StackOffset + MinStackAlign - 1) & ~(MinStackAlign - 1));
+  }
 
   /// isAllocated - Return true if the specified register (or an alias) is
   /// allocated.
@@ -403,6 +406,7 @@ public:
     StackOffset = ((StackOffset + Align - 1) & ~(Align - 1));
     unsigned Result = StackOffset;
     StackOffset += Size;
+    MinStackAlign = Align > MinStackAlign ? Align : MinStackAlign;
     MF.getFrameInfo()->ensureMaxAlignment(Align);
     return Result;
   }
diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp
index fb29b1d..559e076 100644
--- a/lib/CodeGen/CallingConvLower.cpp
+++ b/lib/CodeGen/CallingConvLower.cpp
@@ -32,6 +32,7 @@ CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf,
       CallOrPrologue(Unknown) {
   // No stack is used.
   StackOffset = 0;
+  MinStackAlign = 1;
 
   clearByValRegsInfo();
   UsedRegs.resize((TRI.getNumRegs()+31)/32);
@@ -192,6 +193,7 @@ static bool isValueTypeInRegForCC(CallingConv::ID CC, MVT VT) {
 void CCState::getRemainingRegParmsForType(SmallVectorImpl<MCPhysReg> &Regs,
                                           MVT VT, CCAssignFn Fn) {
   unsigned SavedStackOffset = StackOffset;
+  unsigned SavedMinStackAlign = MinStackAlign;
   unsigned NumLocs = Locs.size();
 
   // Set the 'inreg' flag if it is used for this calling convention.
@@ -223,6 +225,7 @@ void CCState::getRemainingRegParmsForType(SmallVectorImpl<MCPhysReg> &Regs,
   // as allocated so that future queries don't return the same registers, i.e.
   // when i64 and f64 are both passed in GPRs.
   StackOffset = SavedStackOffset;
+  MinStackAlign = SavedMinStackAlign;
   Locs.resize(NumLocs);
 }
 
diff --git a/test/CodeGen/X86/aligned-variadic.ll b/test/CodeGen/X86/aligned-variadic.ll
index 2941592..2eb8a58 100644
--- a/test/CodeGen/X86/aligned-variadic.ll
+++ b/test/CodeGen/X86/aligned-variadic.ll
@@ -15,7 +15,7 @@ entry:
   %overflow_arg_area = load i8*, i8** %overflow_arg_area_p, align 8
   %overflow_arg_area.next = getelementptr i8, i8* %overflow_arg_area, i64 24
   store i8* %overflow_arg_area.next, i8** %overflow_arg_area_p, align 8
-; X32: leal    68(%esp), [[REG:%.*]]
+; X32: leal    72(%esp), [[REG:%.*]]
 ; X32: movl    [[REG]], 16(%esp)
 ; X64: leaq    232(%rsp), [[REG:%.*]]
 ; X64: movq    [[REG]], 184(%rsp)
diff --git a/test/CodeGen/X86/win32-spill-xmm.ll b/test/CodeGen/X86/win32-spill-xmm.ll
new file mode 100644
index 0000000..80741dd
--- /dev/null
+++ b/test/CodeGen/X86/win32-spill-xmm.ll
@@ -0,0 +1,14 @@
+; RUN: llc -mcpu=generic -mtriple=i686-pc-windows-msvc -mattr=+sse < %s | FileCheck %s
+; CHECK: subl    $32, %esp
+; CHECK: movaps  %xmm3, (%esp)
+; CHECK: movl    $0, 16(%esp)
+
+declare void @bar(<16 x float> %a, i32 %b) nounwind
+
+define void @foo(i32, <16 x float> * nocapture readonly) nounwind {
+entry:
+  %2 = alloca i32, i32 %0
+  %3 = load <16 x float>, <16 x float> * %1, align 64
+  tail call void @bar(<16 x float> %3, i32 0) nounwind
+  ret void
+}