[PATCH] D12337: [Codegen] Ensure stack is properly aligned for call argument initialization
Jeroen Ketema via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 25 14:39:56 PDT 2015
jketema created this revision.
jketema added a reviewer: rnk.
jketema added a subscriber: llvm-commits.
Herald added a subscriber: qcolombet.
Arguments spilled on the stack before a function call may have
alignment requirements, for example in the case of vectors.
These requirements are exploited by the code generator by using
move instructions that have similar alignment requirements, e.g.,
movaps on x86.
Although the code generator properly aligns the arguments with
respect to the displacement of the stack pointer it computes,
the displacement itself may cause misalignment. For example if
we have
%3 = load <16 x float>, <16 x float>* %1, align 64
call void @bar(<16 x float> %3, i32 0)
The x86 back-end emits:
movaps 32(%ecx), %xmm2
movaps (%ecx), %xmm0
movaps 16(%ecx), %xmm1
movaps 48(%ecx), %xmm3
subl $20, %esp <-- if %esp was 16-byte aligned before this instruction, it no longer will be afterwards
movaps %xmm3, (%esp) <-- movaps requires 16-byte alignment, while %esp is not aligned as such.
movl $0, 16(%esp)
calll __bar
To solve this, we need to make sure that the computed value with which
the stack pointer is changed is a multiple af the maximal alignment seen
during its computation. With this change we get proper alignment:
subl $32, %esp
movaps %xmm3, (%esp)
http://reviews.llvm.org/D12337
Files:
include/llvm/CodeGen/CallingConvLower.h
lib/CodeGen/CallingConvLower.cpp
test/CodeGen/X86/aligned-variadic.ll
test/CodeGen/X86/win32-spill-xmm.ll
Index: test/CodeGen/X86/win32-spill-xmm.ll
===================================================================
--- test/CodeGen/X86/win32-spill-xmm.ll
+++ test/CodeGen/X86/win32-spill-xmm.ll
@@ -0,0 +1,14 @@
+; RUN: llc -mcpu=generic -mtriple=i686-pc-windows-msvc -mattr=+sse < %s | FileCheck %s
+; CHECK: subl $32, %esp
+; CHECK: movaps %xmm3, (%esp)
+; CHECK: movl $0, 16(%esp)
+
+declare void @bar(<16 x float> %a, i32 %b) nounwind
+
+define void @foo(i32, <16 x float> * nocapture readonly) nounwind {
+entry:
+ %2 = alloca i32, i32 %0
+ %3 = load <16 x float>, <16 x float> * %1, align 64
+ tail call void @bar(<16 x float> %3, i32 0) nounwind
+ ret void
+}
Index: test/CodeGen/X86/aligned-variadic.ll
===================================================================
--- test/CodeGen/X86/aligned-variadic.ll
+++ test/CodeGen/X86/aligned-variadic.ll
@@ -15,7 +15,7 @@
%overflow_arg_area = load i8*, i8** %overflow_arg_area_p, align 8
%overflow_arg_area.next = getelementptr i8, i8* %overflow_arg_area, i64 24
store i8* %overflow_arg_area.next, i8** %overflow_arg_area_p, align 8
-; X32: leal 68(%esp), [[REG:%.*]]
+; X32: leal 72(%esp), [[REG:%.*]]
; X32: movl [[REG]], 16(%esp)
; X64: leaq 232(%rsp), [[REG:%.*]]
; X64: movq [[REG]], 184(%rsp)
Index: lib/CodeGen/CallingConvLower.cpp
===================================================================
--- lib/CodeGen/CallingConvLower.cpp
+++ lib/CodeGen/CallingConvLower.cpp
@@ -32,6 +32,7 @@
CallOrPrologue(Unknown) {
// No stack is used.
StackOffset = 0;
+ MinStackAlign = 1;
clearByValRegsInfo();
UsedRegs.resize((TRI.getNumRegs()+31)/32);
@@ -192,6 +193,7 @@
void CCState::getRemainingRegParmsForType(SmallVectorImpl<MCPhysReg> &Regs,
MVT VT, CCAssignFn Fn) {
unsigned SavedStackOffset = StackOffset;
+ unsigned SavedMinStackAlign = MinStackAlign;
unsigned NumLocs = Locs.size();
// Set the 'inreg' flag if it is used for this calling convention.
@@ -223,6 +225,7 @@
// as allocated so that future queries don't return the same registers, i.e.
// when i64 and f64 are both passed in GPRs.
StackOffset = SavedStackOffset;
+ MinStackAlign = SavedMinStackAlign;
Locs.resize(NumLocs);
}
Index: include/llvm/CodeGen/CallingConvLower.h
===================================================================
--- include/llvm/CodeGen/CallingConvLower.h
+++ include/llvm/CodeGen/CallingConvLower.h
@@ -201,6 +201,7 @@
LLVMContext &Context;
unsigned StackOffset;
+ unsigned MinStackAlign;
SmallVector<uint32_t, 16> UsedRegs;
SmallVector<CCValAssign, 4> PendingLocs;
@@ -270,7 +271,9 @@
CallingConv::ID getCallingConv() const { return CallingConv; }
bool isVarArg() const { return IsVarArg; }
- unsigned getNextStackOffset() const { return StackOffset; }
+ unsigned getNextStackOffset() const {
+ return ((StackOffset + MinStackAlign - 1) & ~(MinStackAlign - 1));
+ }
/// isAllocated - Return true if the specified register (or an alias) is
/// allocated.
@@ -403,6 +406,7 @@
StackOffset = ((StackOffset + Align - 1) & ~(Align - 1));
unsigned Result = StackOffset;
StackOffset += Size;
+ MinStackAlign = Align > MinStackAlign ? Align : MinStackAlign;
MF.getFrameInfo()->ensureMaxAlignment(Align);
return Result;
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D12337.33126.patch
Type: text/x-patch
Size: 3365 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150825/b8ba2c51/attachment.bin>
More information about the llvm-commits
mailing list