[llvm] 00a648a - [X86] Elect to tail call when `sret` ptr is passed to the callee

Antonio Frighetto via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 5 00:51:27 PDT 2025


Author: Antonio Frighetto
Date: 2025-08-05T09:50:52+02:00
New Revision: 00a648ab110583ed45cc931a0c1aabf4bfcd17e3

URL: https://github.com/llvm/llvm-project/commit/00a648ab110583ed45cc931a0c1aabf4bfcd17e3
DIFF: https://github.com/llvm/llvm-project/commit/00a648ab110583ed45cc931a0c1aabf4bfcd17e3.diff

LOG: [X86] Elect to tail call when `sret` ptr is passed to the callee

We may be able to allow the callee to be tail-called when the caller
expects a `sret` pointer argument, as long as this pointer is forwarded
to the callee.

Fixes: https://github.com/llvm/llvm-project/issues/146303.

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLoweringCall.cpp
    llvm/test/CodeGen/X86/sibcall.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
index 5862c7e5bb231..7c594d04f3c23 100644
--- a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
+++ b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
@@ -2781,6 +2781,38 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
   return Bytes == MFI.getObjectSize(FI);
 }
 
+static bool
+mayBeSRetTailCallCompatible(const TargetLowering::CallLoweringInfo &CLI,
+                            Register CallerSRetReg) {
+  const auto &Outs = CLI.Outs;
+  const auto &OutVals = CLI.OutVals;
+
+  // We know the caller has a sret pointer argument (CallerSRetReg). Locate the
+  // operand index within the callee that may have a sret pointer too.
+  unsigned Pos = 0;
+  for (unsigned E = Outs.size(); Pos != E; ++Pos)
+    if (Outs[Pos].Flags.isSRet())
+      break;
+  // Bail out if the callee has not any sret argument.
+  if (Pos == Outs.size())
+    return false;
+
+  // At this point, either the caller is forwarding its sret argument to the
+  // callee, or the callee is being passed a 
diff erent sret pointer. We now look
+  // for a CopyToReg, where the callee sret argument is written into a new vreg
+  // (which should later be %rax/%eax, if this is returned).
+  SDValue SRetArgVal = OutVals[Pos];
+  for (SDNode *User : SRetArgVal->users()) {
+    if (User->getOpcode() != ISD::CopyToReg)
+      continue;
+    Register Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+    if (Reg == CallerSRetReg && User->getOperand(2) == SRetArgVal)
+      return true;
+  }
+
+  return false;
+}
+
 /// Check whether the call is eligible for tail call optimization. Targets
 /// that want to do tail call optimization should implement this function.
 /// Note that the x86 backend does not check musttail calls for eligibility! The
@@ -2802,6 +2834,7 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization(
 
   // If -tailcallopt is specified, make fastcc functions tail-callable.
   MachineFunction &MF = DAG.getMachineFunction();
+  X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
   const Function &CallerF = MF.getFunction();
 
   // If the function return type is x86_fp80 and the callee return type is not,
@@ -2838,14 +2871,15 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization(
   if (RegInfo->hasStackRealignment(MF))
     return false;
 
-  // Also avoid sibcall optimization if we're an sret return fn and the callee
-  // is incompatible. See comment in LowerReturn about why hasStructRetAttr is
-  // insufficient.
-  if (MF.getInfo<X86MachineFunctionInfo>()->getSRetReturnReg()) {
+  // Avoid sibcall optimization if we are an sret return function and the callee
+  // is incompatible, unless such premises are proven wrong. See comment in
+  // LowerReturn about why hasStructRetAttr is insufficient.
+  if (Register SRetReg = FuncInfo->getSRetReturnReg()) {
     // For a compatible tail call the callee must return our sret pointer. So it
     // needs to be (a) an sret function itself and (b) we pass our sret as its
     // sret. Condition #b is harder to determine.
-    return false;
+    if (!mayBeSRetTailCallCompatible(CLI, SRetReg))
+      return false;
   } else if (IsCalleePopSRet)
     // The callee pops an sret, so we cannot tail-call, as our caller doesn't
     // expect that.
@@ -2967,8 +3001,7 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization(
       X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,
                        MF.getTarget().Options.GuaranteedTailCallOpt);
 
-  if (unsigned BytesToPop =
-          MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) {
+  if (unsigned BytesToPop = FuncInfo->getBytesToPopOnReturn()) {
     // If we have bytes to pop, the callee must pop them.
     bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
     if (!CalleePopMatches)

diff  --git a/llvm/test/CodeGen/X86/sibcall.ll b/llvm/test/CodeGen/X86/sibcall.ll
index e36b9b895df23..2759a9883975e 100644
--- a/llvm/test/CodeGen/X86/sibcall.ll
+++ b/llvm/test/CodeGen/X86/sibcall.ll
@@ -444,21 +444,11 @@ define dso_local void @t15(ptr noalias sret(%struct.foo) %agg.result) nounwind
 ;
 ; X64-LABEL: t15:
 ; X64:       # %bb.0:
-; X64-NEXT:    pushq %rbx
-; X64-NEXT:    movq %rdi, %rbx
-; X64-NEXT:    callq f
-; X64-NEXT:    movq %rbx, %rax
-; X64-NEXT:    popq %rbx
-; X64-NEXT:    retq
+; X64-NEXT:    jmp f # TAILCALL
 ;
 ; X32-LABEL: t15:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushq %rbx
-; X32-NEXT:    movq %rdi, %rbx
-; X32-NEXT:    callq f
-; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    popq %rbx
-; X32-NEXT:    retq
+; X32-NEXT:    jmp f # TAILCALL
   tail call fastcc void @f(ptr noalias sret(%struct.foo) %agg.result) nounwind
   ret void
 }
@@ -607,32 +597,15 @@ declare dso_local fastcc double @foo20(double) nounwind
 define fastcc void @t21_sret_to_sret(ptr noalias sret(%struct.foo) %agg.result) nounwind  {
 ; X86-LABEL: t21_sret_to_sret:
 ; X86:       # %bb.0:
-; X86-NEXT:    pushl %esi
-; X86-NEXT:    subl $8, %esp
-; X86-NEXT:    movl %ecx, %esi
-; X86-NEXT:    calll t21_f_sret
-; X86-NEXT:    movl %esi, %eax
-; X86-NEXT:    addl $8, %esp
-; X86-NEXT:    popl %esi
-; X86-NEXT:    retl
+; X86-NEXT:    jmp t21_f_sret # TAILCALL
 ;
 ; X64-LABEL: t21_sret_to_sret:
 ; X64:       # %bb.0:
-; X64-NEXT:    pushq %rbx
-; X64-NEXT:    movq %rdi, %rbx
-; X64-NEXT:    callq t21_f_sret
-; X64-NEXT:    movq %rbx, %rax
-; X64-NEXT:    popq %rbx
-; X64-NEXT:    retq
+; X64-NEXT:    jmp t21_f_sret # TAILCALL
 ;
 ; X32-LABEL: t21_sret_to_sret:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushq %rbx
-; X32-NEXT:    movq %rdi, %rbx
-; X32-NEXT:    callq t21_f_sret
-; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    popq %rbx
-; X32-NEXT:    retq
+; X32-NEXT:    jmp t21_f_sret # TAILCALL
   tail call fastcc void @t21_f_sret(ptr noalias sret(%struct.foo) %agg.result) nounwind
   ret void
 }
@@ -640,34 +613,15 @@ define fastcc void @t21_sret_to_sret(ptr noalias sret(%struct.foo) %agg.result)
 define fastcc void @t21_sret_to_sret_more_args(ptr noalias sret(%struct.foo) %agg.result, i32 %a, i32 %b) nounwind  {
 ; X86-LABEL: t21_sret_to_sret_more_args:
 ; X86:       # %bb.0:
-; X86-NEXT:    pushl %esi
-; X86-NEXT:    subl $8, %esp
-; X86-NEXT:    movl %ecx, %esi
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl %eax, (%esp)
-; X86-NEXT:    calll f_sret at PLT
-; X86-NEXT:    movl %esi, %eax
-; X86-NEXT:    addl $8, %esp
-; X86-NEXT:    popl %esi
-; X86-NEXT:    retl
+; X86-NEXT:    jmp f_sret at PLT # TAILCALL
 ;
 ; X64-LABEL: t21_sret_to_sret_more_args:
 ; X64:       # %bb.0:
-; X64-NEXT:    pushq %rbx
-; X64-NEXT:    movq %rdi, %rbx
-; X64-NEXT:    callq f_sret at PLT
-; X64-NEXT:    movq %rbx, %rax
-; X64-NEXT:    popq %rbx
-; X64-NEXT:    retq
+; X64-NEXT:    jmp f_sret at PLT # TAILCALL
 ;
 ; X32-LABEL: t21_sret_to_sret_more_args:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushq %rbx
-; X32-NEXT:    movq %rdi, %rbx
-; X32-NEXT:    callq f_sret at PLT
-; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    popq %rbx
-; X32-NEXT:    retq
+; X32-NEXT:    jmp f_sret at PLT # TAILCALL
   tail call fastcc void @f_sret(ptr noalias sret(%struct.foo) %agg.result, i32 %a, i32 %b) nounwind
   ret void
 }
@@ -675,35 +629,18 @@ define fastcc void @t21_sret_to_sret_more_args(ptr noalias sret(%struct.foo) %ag
 define fastcc void @t21_sret_to_sret_second_arg_sret(ptr noalias %agg.result, ptr noalias sret(%struct.foo) %ret) nounwind  {
 ; X86-LABEL: t21_sret_to_sret_second_arg_sret:
 ; X86:       # %bb.0:
-; X86-NEXT:    pushl %esi
-; X86-NEXT:    subl $8, %esp
-; X86-NEXT:    movl %edx, %esi
 ; X86-NEXT:    movl %edx, %ecx
-; X86-NEXT:    calll t21_f_sret
-; X86-NEXT:    movl %esi, %eax
-; X86-NEXT:    addl $8, %esp
-; X86-NEXT:    popl %esi
-; X86-NEXT:    retl
+; X86-NEXT:    jmp t21_f_sret # TAILCALL
 ;
 ; X64-LABEL: t21_sret_to_sret_second_arg_sret:
 ; X64:       # %bb.0:
-; X64-NEXT:    pushq %rbx
-; X64-NEXT:    movq %rsi, %rbx
 ; X64-NEXT:    movq %rsi, %rdi
-; X64-NEXT:    callq t21_f_sret
-; X64-NEXT:    movq %rbx, %rax
-; X64-NEXT:    popq %rbx
-; X64-NEXT:    retq
+; X64-NEXT:    jmp t21_f_sret # TAILCALL
 ;
 ; X32-LABEL: t21_sret_to_sret_second_arg_sret:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushq %rbx
-; X32-NEXT:    movq %rsi, %rbx
 ; X32-NEXT:    movq %rsi, %rdi
-; X32-NEXT:    callq t21_f_sret
-; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    popq %rbx
-; X32-NEXT:    retq
+; X32-NEXT:    jmp t21_f_sret # TAILCALL
   tail call fastcc void @t21_f_sret(ptr noalias sret(%struct.foo) %ret) nounwind
   ret void
 }
@@ -725,27 +662,17 @@ define fastcc void @t21_sret_to_sret_more_args2(ptr noalias sret(%struct.foo) %a
 ;
 ; X64-LABEL: t21_sret_to_sret_more_args2:
 ; X64:       # %bb.0:
-; X64-NEXT:    pushq %rbx
 ; X64-NEXT:    movl %esi, %eax
-; X64-NEXT:    movq %rdi, %rbx
 ; X64-NEXT:    movl %edx, %esi
 ; X64-NEXT:    movl %eax, %edx
-; X64-NEXT:    callq f_sret at PLT
-; X64-NEXT:    movq %rbx, %rax
-; X64-NEXT:    popq %rbx
-; X64-NEXT:    retq
+; X64-NEXT:    jmp f_sret at PLT # TAILCALL
 ;
 ; X32-LABEL: t21_sret_to_sret_more_args2:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushq %rbx
 ; X32-NEXT:    movl %esi, %eax
-; X32-NEXT:    movq %rdi, %rbx
 ; X32-NEXT:    movl %edx, %esi
 ; X32-NEXT:    movl %eax, %edx
-; X32-NEXT:    callq f_sret at PLT
-; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    popq %rbx
-; X32-NEXT:    retq
+; X32-NEXT:    jmp f_sret at PLT # TAILCALL
   tail call fastcc void @f_sret(ptr noalias sret(%struct.foo) %agg.result, i32 %b, i32 %a) nounwind
   ret void
 }


        


More information about the llvm-commits mailing list