[llvm] ab55cc6 - [X86] pr51000 in-register struct return tailcalling
Nathan Sidwell via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 25 10:15:57 PDT 2021
Author: Nathan Sidwell
Date: 2021-08-25T10:15:50-07:00
New Revision: ab55cc6cef270c565aa31b517533432b05903389
URL: https://github.com/llvm/llvm-project/commit/ab55cc6cef270c565aa31b517533432b05903389
DIFF: https://github.com/llvm/llvm-project/commit/ab55cc6cef270c565aa31b517533432b05903389.diff
LOG: [X86] pr51000 in-register struct return tailcalling
In-register structure returns are not special, and handled by lowering
to multiple-value tuples. We can tail-call from non-sret fns to
structure-returning functions, except on i686 where the sret pointer
is callee-pop.
Differential Revision: https://reviews.llvm.org/D105807
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/lib/Target/X86/X86ISelLowering.h
llvm/test/CodeGen/X86/sibcall.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index e691096692485..d11099ce2f633 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -4146,10 +4146,9 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (isTailCall && !IsMustTail) {
// Check if it's really possible to do a tail call.
- isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
- isVarArg, SR != NotStructReturn,
- MF.getFunction().hasStructRetAttr(), CLI.RetTy,
- Outs, OutVals, Ins, DAG);
+ isTailCall = IsEligibleForTailCallOptimization(
+ Callee, CallConv, SR == StackStructReturn, isVarArg, CLI.RetTy, Outs,
+ OutVals, Ins, DAG);
// Sibcalls are automatically detected tailcalls which do not require
// ABI changes.
@@ -4824,9 +4823,8 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
/// Check whether the call is eligible for tail call optimization. Targets
/// that want to do tail call optimization should implement this function.
bool X86TargetLowering::IsEligibleForTailCallOptimization(
- SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
- bool isCalleeStructRet, bool isCallerStructRet, Type *RetTy,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
+ SDValue Callee, CallingConv::ID CalleeCC, bool IsCalleeStackStructRet,
+ bool isVarArg, Type *RetTy, const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
if (!mayTailCallThisCC(CalleeCC))
@@ -4870,9 +4868,17 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization(
if (RegInfo->hasStackRealignment(MF))
return false;
- // Also avoid sibcall optimization if either caller or callee uses struct
- // return semantics.
- if (isCalleeStructRet || isCallerStructRet)
+ // Also avoid sibcall optimization if we're an sret return fn and the callee
+ // is incompatible. See comment in LowerReturn about why hasStructRetAttr is
+ // insufficient.
+ if (MF.getInfo<X86MachineFunctionInfo>()->getSRetReturnReg()) {
+ // For a compatible tail call the callee must return our sret pointer. So it
+ // needs to be (a) an sret function itself and (b) we pass our sret as its
+ // sret. Condition #b is harder to determine.
+ return false;
+ } else if (Subtarget.is32Bit() && IsCalleeStackStructRet)
+ // In the i686 ABI, the sret pointer is callee-pop, so we cannot tail-call,
+ // as our caller doesn't expect that.
return false;
// Do not sibcall optimize vararg calls unless all arguments are passed via
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 1d4bcc7756645..be2d5db64b04a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1477,16 +1477,11 @@ namespace llvm {
/// Check whether the call is eligible for tail call optimization. Targets
/// that want to do tail call optimization should implement this function.
- bool IsEligibleForTailCallOptimization(SDValue Callee,
- CallingConv::ID CalleeCC,
- bool isVarArg,
- bool isCalleeStructRet,
- bool isCallerStructRet,
- Type *RetTy,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- SelectionDAG& DAG) const;
+ bool IsEligibleForTailCallOptimization(
+ SDValue Callee, CallingConv::ID CalleeCC, bool IsCalleeStackStructRet,
+ bool isVarArg, Type *RetTy, const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const;
SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
SDValue Chain, bool IsTailCall,
bool Is64Bit, int FPDiff,
diff --git a/llvm/test/CodeGen/X86/sibcall.ll b/llvm/test/CodeGen/X86/sibcall.ll
index 1859efb98f346..cc2bf308ccee5 100644
--- a/llvm/test/CodeGen/X86/sibcall.ll
+++ b/llvm/test/CodeGen/X86/sibcall.ll
@@ -657,47 +657,6 @@ define fastcc void @t21_sret_to_sret(%struct.foo* noalias sret(%struct.foo) %agg
ret void
}
-define fastcc void @t21_sret_to_sret_alloca(%struct.foo* noalias sret(%struct.foo) %agg.result) nounwind {
-; X86-LABEL: t21_sret_to_sret_alloca:
-; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl %ecx, %esi
-; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: calll t21_f_sret
-; X86-NEXT: movl %esi, %eax
-; X86-NEXT: addl $24, %esp
-; X86-NEXT: popl %esi
-; X86-NEXT: retl
-;
-; X64-LABEL: t21_sret_to_sret_alloca:
-; X64: # %bb.0:
-; X64-NEXT: pushq %rbx
-; X64-NEXT: subq $16, %rsp
-; X64-NEXT: movq %rdi, %rbx
-; X64-NEXT: movq %rsp, %rdi
-; X64-NEXT: callq t21_f_sret
-; X64-NEXT: movq %rbx, %rax
-; X64-NEXT: addq $16, %rsp
-; X64-NEXT: popq %rbx
-; X64-NEXT: retq
-;
-; X32-LABEL: t21_sret_to_sret_alloca:
-; X32: # %bb.0:
-; X32-NEXT: pushq %rbx
-; X32-NEXT: subl $16, %esp
-; X32-NEXT: movq %rdi, %rbx
-; X32-NEXT: movl %esp, %edi
-; X32-NEXT: callq t21_f_sret
-; X32-NEXT: movl %ebx, %eax
-; X32-NEXT: addl $16, %esp
-; X32-NEXT: popq %rbx
-; X32-NEXT: retq
- %a = alloca %struct.foo, align 8
- tail call fastcc void @t21_f_sret(%struct.foo* noalias sret(%struct.foo) %a) nounwind
- ret void
-}
-
define fastcc void @t21_sret_to_sret_more_args(%struct.foo* noalias sret(%struct.foo) %agg.result, i32 %a, i32 %b) nounwind {
; X86-LABEL: t21_sret_to_sret_more_args:
; X86: # %bb.0:
@@ -1016,8 +975,8 @@ define fastcc void @t21_sret_to_non_sret(%struct.foo* noalias sret(%struct.foo)
ret void
}
-
define ccc void @t22_non_sret_to_sret(%struct.foo* %agg.result) nounwind {
+; i686 not tailcallable, as sret is callee-pop here.
; X86-LABEL: t22_non_sret_to_sret:
; X86: # %bb.0:
; X86-NEXT: subl $12, %esp
@@ -1029,17 +988,11 @@ define ccc void @t22_non_sret_to_sret(%struct.foo* %agg.result) nounwind {
;
; X64-LABEL: t22_non_sret_to_sret:
; X64: # %bb.0:
-; X64-NEXT: pushq %rax
-; X64-NEXT: callq t22_f_sret at PLT
-; X64-NEXT: popq %rax
-; X64-NEXT: retq
+; X64-NEXT: jmp t22_f_sret at PLT # TAILCALL
;
; X32-LABEL: t22_non_sret_to_sret:
; X32: # %bb.0:
-; X32-NEXT: pushq %rax
-; X32-NEXT: callq t22_f_sret at PLT
-; X32-NEXT: popq %rax
-; X32-NEXT: retq
+; X32-NEXT: jmp t22_f_sret at PLT # TAILCALL
tail call ccc void @t22_f_sret(%struct.foo* noalias sret(%struct.foo) %agg.result) nounwind
ret void
}
More information about the llvm-commits
mailing list