[llvm] r258442 - Avoid unnecessary stack realignment in musttail thunks with SSE2 enabled

Reid Kleckner via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 21 14:23:22 PST 2016


Author: rnk
Date: Thu Jan 21 16:23:22 2016
New Revision: 258442

URL: http://llvm.org/viewvc/llvm-project?rev=258442&view=rev
Log:
Avoid unnecessary stack realignment in musttail thunks with SSE2 enabled

The X86 musttail implementation finds register parameters to forward by
running the calling convention algorithm until a non-register location
is returned. However, assigning a vector memory location has the side
effect of increasing the function's stack alignment. We shouldn't
increase the stack alignment when we are only looking for register
parameters, so this change conditionalizes it.

Modified:
    llvm/trunk/include/llvm/CodeGen/CallingConvLower.h
    llvm/trunk/lib/CodeGen/CallingConvLower.cpp
    llvm/trunk/test/CodeGen/X86/musttail-varargs.ll

Modified: llvm/trunk/include/llvm/CodeGen/CallingConvLower.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/CallingConvLower.h?rev=258442&r1=258441&r2=258442&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/CallingConvLower.h (original)
+++ llvm/trunk/include/llvm/CodeGen/CallingConvLower.h Thu Jan 21 16:23:22 2016
@@ -195,6 +195,7 @@ class CCState {
 private:
   CallingConv::ID CallingConv;
   bool IsVarArg;
+  bool AnalyzingMustTailForwardedRegs = false;
   MachineFunction &MF;
   const TargetRegisterInfo &TRI;
   SmallVectorImpl<CCValAssign> &Locs;
@@ -416,10 +417,15 @@ public:
     unsigned Result = StackOffset;
     StackOffset += Size;
     MaxStackArgAlign = std::max(Align, MaxStackArgAlign);
-    MF.getFrameInfo()->ensureMaxAlignment(Align);
+    ensureMaxAlignment(Align);
     return Result;
   }
 
+  void ensureMaxAlignment(unsigned Align) {
+    if (!AnalyzingMustTailForwardedRegs)
+      MF.getFrameInfo()->ensureMaxAlignment(Align);
+  }
+
   /// Version of AllocateStack with extra register to be shadowed.
   unsigned AllocateStack(unsigned Size, unsigned Align, unsigned ShadowReg) {
     MarkAllocated(ShadowReg);

Modified: llvm/trunk/lib/CodeGen/CallingConvLower.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/CallingConvLower.cpp?rev=258442&r1=258441&r2=258442&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/CallingConvLower.cpp (original)
+++ llvm/trunk/lib/CodeGen/CallingConvLower.cpp Thu Jan 21 16:23:22 2016
@@ -51,7 +51,7 @@ void CCState::HandleByVal(unsigned ValNo
     Size = MinSize;
   if (MinAlign > (int)Align)
     Align = MinAlign;
-  MF.getFrameInfo()->ensureMaxAlignment(Align);
+  ensureMaxAlignment(Align);
   MF.getSubtarget().getTargetLowering()->HandleByVal(this, Size, Align);
   Size = unsigned(alignTo(Size, MinAlign));
   unsigned Offset = AllocateStack(Size, Align);
@@ -236,6 +236,7 @@ void CCState::analyzeMustTailForwardedRe
   // variadic functions, so we need to assume we're not variadic so that we get
   // all the registers that might be used in a non-variadic call.
   SaveAndRestore<bool> SavedVarArg(IsVarArg, false);
+  SaveAndRestore<bool> SavedMustTail(AnalyzingMustTailForwardedRegs, true);
 
   for (MVT RegVT : RegParmTypes) {
     SmallVector<MCPhysReg, 8> RemainingRegs;

Modified: llvm/trunk/test/CodeGen/X86/musttail-varargs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/musttail-varargs.ll?rev=258442&r1=258441&r2=258442&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/musttail-varargs.ll (original)
+++ llvm/trunk/test/CodeGen/X86/musttail-varargs.ll Thu Jan 21 16:23:22 2016
@@ -2,6 +2,7 @@
 ; RUN: llc < %s -enable-tail-merge=0 -mtriple=x86_64-linux-gnux32 | FileCheck %s --check-prefix=LINUX-X32
 ; RUN: llc < %s -enable-tail-merge=0 -mtriple=x86_64-windows | FileCheck %s --check-prefix=WINDOWS
 ; RUN: llc < %s -enable-tail-merge=0 -mtriple=i686-windows | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -enable-tail-merge=0 -mtriple=i686-windows -mattr=+sse2 | FileCheck %s --check-prefix=X86
 
 ; Test that we actually spill and reload all arguments in the variadic argument
 ; pack. Doing a normal call will clobber all argument registers, and we will
@@ -136,6 +137,8 @@ define void @g_thunk(i8* %fptr_i8, ...)
 ; WINDOWS: jmpq *%rcx # TAILCALL
 
 ; X86-LABEL: _g_thunk:
+; X86-NOT: push %ebp
+; X86-NOT: andl {{.*}}, %esp
 ; X86: jmpl *%eax # TAILCALL
 
 ; Do a simple multi-exit multi-bb test.




More information about the llvm-commits mailing list