[llvm] ab55cc6 - [X86] pr51000 in-register struct return tailcalling

Nathan Sidwell via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 25 10:15:57 PDT 2021


Author: Nathan Sidwell
Date: 2021-08-25T10:15:50-07:00
New Revision: ab55cc6cef270c565aa31b517533432b05903389

URL: https://github.com/llvm/llvm-project/commit/ab55cc6cef270c565aa31b517533432b05903389
DIFF: https://github.com/llvm/llvm-project/commit/ab55cc6cef270c565aa31b517533432b05903389.diff

LOG: [X86] pr51000 in-register struct return tailcalling

In-register structure returns are not special, and handled by lowering
to multiple-value tuples.  We can tail-call from non-sret fns to
structure-returning functions, except on i686 where the sret pointer
is callee-pop.

Differential Revision: https://reviews.llvm.org/D105807

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/lib/Target/X86/X86ISelLowering.h
    llvm/test/CodeGen/X86/sibcall.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index e691096692485..d11099ce2f633 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -4146,10 +4146,9 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 
   if (isTailCall && !IsMustTail) {
     // Check if it's really possible to do a tail call.
-    isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
-                    isVarArg, SR != NotStructReturn,
-                    MF.getFunction().hasStructRetAttr(), CLI.RetTy,
-                    Outs, OutVals, Ins, DAG);
+    isTailCall = IsEligibleForTailCallOptimization(
+        Callee, CallConv, SR == StackStructReturn, isVarArg, CLI.RetTy, Outs,
+        OutVals, Ins, DAG);
 
     // Sibcalls are automatically detected tailcalls which do not require
     // ABI changes.
@@ -4824,9 +4823,8 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
 /// Check whether the call is eligible for tail call optimization. Targets
 /// that want to do tail call optimization should implement this function.
 bool X86TargetLowering::IsEligibleForTailCallOptimization(
-    SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
-    bool isCalleeStructRet, bool isCallerStructRet, Type *RetTy,
-    const SmallVectorImpl<ISD::OutputArg> &Outs,
+    SDValue Callee, CallingConv::ID CalleeCC, bool IsCalleeStackStructRet,
+    bool isVarArg, Type *RetTy, const SmallVectorImpl<ISD::OutputArg> &Outs,
     const SmallVectorImpl<SDValue> &OutVals,
     const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
   if (!mayTailCallThisCC(CalleeCC))
@@ -4870,9 +4868,17 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization(
   if (RegInfo->hasStackRealignment(MF))
     return false;
 
-  // Also avoid sibcall optimization if either caller or callee uses struct
-  // return semantics.
-  if (isCalleeStructRet || isCallerStructRet)
+  // Also avoid sibcall optimization if we're an sret return fn and the callee
+  // is incompatible. See comment in LowerReturn about why hasStructRetAttr is
+  // insufficient.
+  if (MF.getInfo<X86MachineFunctionInfo>()->getSRetReturnReg()) {
+    // For a compatible tail call the callee must return our sret pointer. So it
+    // needs to be (a) an sret function itself and (b) we pass our sret as its
+    // sret. Condition #b is harder to determine.
+    return false;
+  } else if (Subtarget.is32Bit() && IsCalleeStackStructRet)
+    // In the i686 ABI, the sret pointer is callee-pop, so we cannot tail-call,
+    // as our caller doesn't expect that.
     return false;
 
   // Do not sibcall optimize vararg calls unless all arguments are passed via

diff  --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 1d4bcc7756645..be2d5db64b04a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1477,16 +1477,11 @@ namespace llvm {
 
     /// Check whether the call is eligible for tail call optimization. Targets
     /// that want to do tail call optimization should implement this function.
-    bool IsEligibleForTailCallOptimization(SDValue Callee,
-                                           CallingConv::ID CalleeCC,
-                                           bool isVarArg,
-                                           bool isCalleeStructRet,
-                                           bool isCallerStructRet,
-                                           Type *RetTy,
-                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
-                                    const SmallVectorImpl<SDValue> &OutVals,
-                                    const SmallVectorImpl<ISD::InputArg> &Ins,
-                                           SelectionDAG& DAG) const;
+    bool IsEligibleForTailCallOptimization(
+        SDValue Callee, CallingConv::ID CalleeCC, bool IsCalleeStackStructRet,
+        bool isVarArg, Type *RetTy, const SmallVectorImpl<ISD::OutputArg> &Outs,
+        const SmallVectorImpl<SDValue> &OutVals,
+        const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const;
     SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
                                     SDValue Chain, bool IsTailCall,
                                     bool Is64Bit, int FPDiff,

diff  --git a/llvm/test/CodeGen/X86/sibcall.ll b/llvm/test/CodeGen/X86/sibcall.ll
index 1859efb98f346..cc2bf308ccee5 100644
--- a/llvm/test/CodeGen/X86/sibcall.ll
+++ b/llvm/test/CodeGen/X86/sibcall.ll
@@ -657,47 +657,6 @@ define fastcc void @t21_sret_to_sret(%struct.foo* noalias sret(%struct.foo) %agg
   ret void
 }
 
-define fastcc void @t21_sret_to_sret_alloca(%struct.foo* noalias sret(%struct.foo) %agg.result) nounwind  {
-; X86-LABEL: t21_sret_to_sret_alloca:
-; X86:       # %bb.0:
-; X86-NEXT:    pushl %esi
-; X86-NEXT:    subl $24, %esp
-; X86-NEXT:    movl %ecx, %esi
-; X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    calll t21_f_sret
-; X86-NEXT:    movl %esi, %eax
-; X86-NEXT:    addl $24, %esp
-; X86-NEXT:    popl %esi
-; X86-NEXT:    retl
-;
-; X64-LABEL: t21_sret_to_sret_alloca:
-; X64:       # %bb.0:
-; X64-NEXT:    pushq %rbx
-; X64-NEXT:    subq $16, %rsp
-; X64-NEXT:    movq %rdi, %rbx
-; X64-NEXT:    movq %rsp, %rdi
-; X64-NEXT:    callq t21_f_sret
-; X64-NEXT:    movq %rbx, %rax
-; X64-NEXT:    addq $16, %rsp
-; X64-NEXT:    popq %rbx
-; X64-NEXT:    retq
-;
-; X32-LABEL: t21_sret_to_sret_alloca:
-; X32:       # %bb.0:
-; X32-NEXT:    pushq %rbx
-; X32-NEXT:    subl $16, %esp
-; X32-NEXT:    movq %rdi, %rbx
-; X32-NEXT:    movl %esp, %edi
-; X32-NEXT:    callq t21_f_sret
-; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    addl $16, %esp
-; X32-NEXT:    popq %rbx
-; X32-NEXT:    retq
-  %a = alloca %struct.foo, align 8
-  tail call fastcc void @t21_f_sret(%struct.foo* noalias sret(%struct.foo) %a) nounwind
-  ret void
-}
-
 define fastcc void @t21_sret_to_sret_more_args(%struct.foo* noalias sret(%struct.foo) %agg.result, i32 %a, i32 %b) nounwind  {
 ; X86-LABEL: t21_sret_to_sret_more_args:
 ; X86:       # %bb.0:
@@ -1016,8 +975,8 @@ define fastcc void @t21_sret_to_non_sret(%struct.foo* noalias sret(%struct.foo)
   ret void
 }
 
-
 define ccc void @t22_non_sret_to_sret(%struct.foo* %agg.result) nounwind  {
+; i686 not tailcallable, as sret is callee-pop here.
 ; X86-LABEL: t22_non_sret_to_sret:
 ; X86:       # %bb.0:
 ; X86-NEXT:    subl $12, %esp
@@ -1029,17 +988,11 @@ define ccc void @t22_non_sret_to_sret(%struct.foo* %agg.result) nounwind  {
 ;
 ; X64-LABEL: t22_non_sret_to_sret:
 ; X64:       # %bb.0:
-; X64-NEXT:    pushq %rax
-; X64-NEXT:    callq t22_f_sret at PLT
-; X64-NEXT:    popq %rax
-; X64-NEXT:    retq
+; X64-NEXT:    jmp	t22_f_sret at PLT                  # TAILCALL
 ;
 ; X32-LABEL: t22_non_sret_to_sret:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushq %rax
-; X32-NEXT:    callq t22_f_sret at PLT
-; X32-NEXT:    popq %rax
-; X32-NEXT:    retq
+; X32-NEXT:    jmp	t22_f_sret at PLT                  # TAILCALL
   tail call ccc void @t22_f_sret(%struct.foo* noalias sret(%struct.foo) %agg.result) nounwind
   ret void
 }


        


More information about the llvm-commits mailing list