[llvm-commits] [llvm] r55881 - /llvm/trunk/lib/Target/X86/X86ISelLowering.cpp

Thu Sep 11 02:02:07 PDT 2008

This commit invalidates tail call stack handling.

When tail call optimization is enabled all stack frame sizes need to
be aligned according to the platform alignment. This is so to ensure
that the difference between two stack frame sizes does not interfere
with alignment. When a tail call is performed this difference is
'popped of the stack'.

After this commit call sites and stack enter code no longer agrees if
the call site is not a tail call.

In X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG
&DAG) it correctly says:
> -  if (CC == CallingConv::Fast)
> +  if (PerformTailCallOpt && CC == CallingConv::Fast)
>     StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
>

But in  X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) it
only adjusts the stack when really tail calling:
>   // If the function takes variable number of arguments, make a frame index for
> @@ -1485,7 +1483,7 @@
>
>   // Get a count of how many bytes are to be pushed on the stack.
>   unsigned NumBytes = CCInfo.getNextStackOffset();
> -  if (CC == CallingConv::Fast)
> +  if (IsTailCall)
>     NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);

It should read
if (PerformTailCallOpt && CC == CallingConv::Fast)
    StackSize = GetAlignedArgumentStackSize(StackSize, DAG);

Request for permission to change ;-)

regards arnold

an example that goes wrong:
; RUN: llvm-as < %s | llc  -mtriple=i686-unknown-linux  -tailcallopt  > %t1
; linux has 8byte alignment so the params cause stack size 20 when tailcallopt
; is enabled
define fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
entry:
	ret i32 %a3
}

define fastcc i32 @tailcaller(i32 %in1, i32 %in2, i32 %in3, i32 %in4) {
entry:
	%tmp11 = tail call fastcc i32 @tailcallee( i32 %in1, i32 %in2, i32
%in1, i32 %in2 )		; <i32> [#uses=1]
	ret i32 %tmp11
}

define i32 @main(i32 %argc, i8** %argv) {
  %tmp1 = call fastcc i32 @tailcaller( i32 1, i32 2, i32 3, i32 4 )
  ret i32 %tmp1
}

generated assembler:
tailcallee:
	movl	12(%esp), %eax
	ret	$20

tailcaller:
	subl	$20, %esp
	...
	addl	$20, %esp
	jmp	tailcallee  # TAILCALL

main:
	subl	$20, %esp
	...
	call	tailcaller
	addl	$4, %esp <<<<<  callsite thinks tailcaller pops 16 instead of 20bytes!
	ret

On Sun, Sep 7, 2008 at 11:07 AM, Evan Cheng <evan.cheng at apple.com> wrote:
> Author: evancheng
> Date: Sun Sep  7 04:07:23 2008
> New Revision: 55881
>
> URL: http://llvm.org/viewvc/llvm-project?rev=55881&view=rev
> Log:
> Some code clean up.
>
> Modified:
>    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
>
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=55881&r1=55880&r2=55881&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Sep  7 04:07:23 2008
> @@ -1088,12 +1088,10 @@
>   if (Subtarget->is64Bit()) {
>     if (Subtarget->isTargetWin64())
>       return CC_X86_Win64_C;
> -    else {
> -      if (CC == CallingConv::Fast && PerformTailCallOpt)
> -        return CC_X86_64_TailCall;
> -      else
> -        return CC_X86_64_C;
> -    }
> +    else if (CC == CallingConv::Fast && PerformTailCallOpt)
> +      return CC_X86_64_TailCall;
> +    else
> +      return CC_X86_64_C;
>   }
>
>   if (CC == CallingConv::X86_FastCall)
> @@ -1294,7 +1292,7 @@
>
>   unsigned StackSize = CCInfo.getNextStackOffset();
>   // align stack specially for tail calls
> -  if (CC == CallingConv::Fast)
> +  if (PerformTailCallOpt && CC == CallingConv::Fast)
>     StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
>
>   // If the function takes variable number of arguments, make a frame index for
> @@ -1485,7 +1483,7 @@
>
>   // Get a count of how many bytes are to be pushed on the stack.
>   unsigned NumBytes = CCInfo.getNextStackOffset();
> -  if (CC == CallingConv::Fast)
> +  if (IsTailCall)
>     NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
>
>   int FPDiff = 0;
> @@ -1829,25 +1827,22 @@
>  /// for a 16 byte align requirement.
>  unsigned X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
>                                                         SelectionDAG& DAG) {
> -  if (PerformTailCallOpt) {
> -    MachineFunction &MF = DAG.getMachineFunction();
> -    const TargetMachine &TM = MF.getTarget();
> -    const TargetFrameInfo &TFI = *TM.getFrameInfo();
> -    unsigned StackAlignment = TFI.getStackAlignment();
> -    uint64_t AlignMask = StackAlignment - 1;
> -    int64_t Offset = StackSize;
> -    unsigned SlotSize = Subtarget->is64Bit() ? 8 : 4;
> -    if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) {
> -      // Number smaller than 12 so just add the difference.
> -      Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask));
> -    } else {
> -      // Mask out lower bits, add stackalignment once plus the 12 bytes.
> -      Offset = ((~AlignMask) & Offset) + StackAlignment +
> -        (StackAlignment-SlotSize);
> -    }
> -    StackSize = Offset;
> +  MachineFunction &MF = DAG.getMachineFunction();
> +  const TargetMachine &TM = MF.getTarget();
> +  const TargetFrameInfo &TFI = *TM.getFrameInfo();
> +  unsigned StackAlignment = TFI.getStackAlignment();
> +  uint64_t AlignMask = StackAlignment - 1;
> +  int64_t Offset = StackSize;
> +  unsigned SlotSize = Subtarget->is64Bit() ? 8 : 4;
> +  if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) {
> +    // Number smaller than 12 so just add the difference.
> +    Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask));
> +  } else {
> +    // Mask out lower bits, add stackalignment once plus the 12 bytes.
> +    Offset = ((~AlignMask) & Offset) + StackAlignment +
> +      (StackAlignment-SlotSize);
>   }
> -  return StackSize;
> +  return Offset;
>  }
>
>  /// IsEligibleForTailCallElimination - Check to see whether the next instruction