[llvm] r265506 - [ppc64] Enable sibling call optimization on ppc64 ELFv1/ELFv2 abi
Chuang-Yu Cheng via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 5 19:04:39 PDT 2016
Author: cycheng
Date: Tue Apr 5 21:04:38 2016
New Revision: 265506
URL: http://llvm.org/viewvc/llvm-project?rev=265506&view=rev
Log:
[ppc64] Enable sibling call optimization on ppc64 ELFv1/ELFv2 abi
This patch enable sibling call optimization on ppc64 ELFv1/ELFv2 abi, and
add a couple of test cases. This patch also passed llvm/clang bootstrap
test, and spec2006 build/run/result validation.
Original issue: https://llvm.org/bugs/show_bug.cgi?id=25617
Great thanks to Tom's (tjablin) help, he contributed a lot to this patch.
Thanks Hal and Kit's invaluable opinions!
Reviewers: hfinkel kbarton
http://reviews.llvm.org/D16315
Added:
llvm/trunk/test/CodeGen/PowerPC/ppc64-sibcall-shrinkwrap.ll
llvm/trunk/test/CodeGen/PowerPC/ppc64-sibcall.ll
Modified:
llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h
llvm/trunk/test/CodeGen/PowerPC/ppc64-calls.ll
Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp?rev=265506&r1=265505&r2=265506&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp Tue Apr 5 21:04:38 2016
@@ -19,6 +19,7 @@
#include "PPCTargetMachine.h"
#include "PPCTargetObjectFile.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/CallingConvLower.h"
@@ -36,12 +37,15 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
+#define DEBUG_TYPE "ppc-lowering"
+
static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
@@ -51,6 +55,12 @@ cl::desc("disable setting the node sched
static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
+static cl::opt<bool> DisableSCO("disable-ppc-sco",
+cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
+
+STATISTIC(NumTailCalls, "Number of tail calls");
+STATISTIC(NumSiblingCalls, "Number of sibling calls");
+
// FIXME: Remove this once the bug has been fixed!
extern cl::opt<bool> ANDIGlueBug;
@@ -3842,6 +3852,176 @@ static int CalculateTailCallSPDiff(Selec
return SPDiff;
}
+static bool isFunctionGlobalAddress(SDValue Callee);
+
+static bool
+resideInSameModule(SDValue Callee, Reloc::Model RelMod) {
+ // If !G, Callee can be an external symbol.
+ GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
+ if (!G) return false;
+
+ const GlobalValue *GV = G->getGlobal();
+
+ if (GV->isDeclaration()) return false;
+
+ switch(GV->getLinkage()) {
+ default: llvm_unreachable("unknow linkage type");
+ case GlobalValue::AvailableExternallyLinkage:
+ case GlobalValue::ExternalWeakLinkage:
+ return false;
+
+ // Callee with weak linkage is allowed if it has hidden or protected
+ // visibility
+ case GlobalValue::LinkOnceAnyLinkage:
+ case GlobalValue::LinkOnceODRLinkage: // e.g. c++ inline functions
+ case GlobalValue::WeakAnyLinkage:
+ case GlobalValue::WeakODRLinkage: // e.g. c++ template instantiation
+ if (GV->hasDefaultVisibility())
+ return false;
+
+ case GlobalValue::ExternalLinkage:
+ case GlobalValue::InternalLinkage:
+ case GlobalValue::PrivateLinkage:
+ break;
+ }
+
+ // With '-fPIC', calling default visiblity function need insert 'nop' after
+ // function call, no matter that function resides in same module or not, so
+ // we treat it as in different module.
+ if (RelMod == Reloc::PIC_ && GV->hasDefaultVisibility())
+ return false;
+
+ return true;
+}
+
+static bool
+needStackSlotPassParameters(const PPCSubtarget &Subtarget,
+ const SmallVectorImpl<ISD::OutputArg> &Outs) {
+ assert(Subtarget.isSVR4ABI() && Subtarget.isPPC64());
+
+ const unsigned PtrByteSize = 8;
+ const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
+
+ static const MCPhysReg GPR[] = {
+ PPC::X3, PPC::X4, PPC::X5, PPC::X6,
+ PPC::X7, PPC::X8, PPC::X9, PPC::X10,
+ };
+ static const MCPhysReg VR[] = {
+ PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
+ PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
+ };
+
+ const unsigned NumGPRs = array_lengthof(GPR);
+ const unsigned NumFPRs = 13;
+ const unsigned NumVRs = array_lengthof(VR);
+ const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
+
+ unsigned NumBytes = LinkageSize;
+ unsigned AvailableFPRs = NumFPRs;
+ unsigned AvailableVRs = NumVRs;
+
+ for (const ISD::OutputArg& Param : Outs) {
+ if (Param.Flags.isNest()) continue;
+
+ if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags,
+ PtrByteSize, LinkageSize, ParamAreaSize,
+ NumBytes, AvailableFPRs, AvailableVRs,
+ Subtarget.hasQPX()))
+ return true;
+ }
+ return false;
+}
+
+static bool
+hasSameArgumentList(const Function *CallerFn, ImmutableCallSite *CS) {
+ if (CS->arg_size() != CallerFn->getArgumentList().size())
+ return false;
+
+ ImmutableCallSite::arg_iterator CalleeArgIter = CS->arg_begin();
+ ImmutableCallSite::arg_iterator CalleeArgEnd = CS->arg_end();
+ Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
+
+ for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
+ const Value* CalleeArg = *CalleeArgIter;
+ const Value* CallerArg = &(*CallerArgIter);
+ if (CalleeArg == CallerArg)
+ continue;
+
+ // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
+ // tail call @callee([4 x i64] undef, [4 x i64] %b)
+ // }
+ // 1st argument of callee is undef and has the same type as caller.
+ if (CalleeArg->getType() == CallerArg->getType() &&
+ isa<UndefValue>(CalleeArg))
+ continue;
+
+ return false;
+ }
+
+ return true;
+}
+
+bool
+PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
+ SDValue Callee,
+ CallingConv::ID CalleeCC,
+ ImmutableCallSite *CS,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SelectionDAG& DAG) const {
+ bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
+
+ if (DisableSCO && !TailCallOpt) return false;
+
+ // Variadic argument functions are not supported.
+ if (isVarArg) return false;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ CallingConv::ID CallerCC = MF.getFunction()->getCallingConv();
+
+ // Tail or Sibling call optimization (TCO/SCO) needs callee and caller has
+ // the same calling convention
+ if (CallerCC != CalleeCC) return false;
+
+ // SCO support C calling convention
+ if (CalleeCC != CallingConv::Fast && CalleeCC != CallingConv::C)
+ return false;
+
+ // Functions containing by val parameters are not supported.
+ if (std::any_of(Ins.begin(), Ins.end(),
+ [](const ISD::InputArg& IA) { return IA.Flags.isByVal(); }))
+ return false;
+
+ // No TCO/SCO on indirect call because Caller have to restore its TOC
+ if (!isFunctionGlobalAddress(Callee) &&
+ !isa<ExternalSymbolSDNode>(Callee))
+ return false;
+
+ // Check if Callee resides in the same module, because for now, PPC64 SVR4 ABI
+ // (ELFv1/ELFv2) doesn't allow tail calls to a symbol resides in another
+ // module.
+ // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
+ if (!resideInSameModule(Callee, getTargetMachine().getRelocationModel()))
+ return false;
+
+ // TCO allows altering callee ABI, so we don't have to check further.
+ if (CalleeCC == CallingConv::Fast && TailCallOpt)
+ return true;
+
+ if (DisableSCO) return false;
+
+ // If callee use the same argument list that caller is using, then we can
+ // apply SCO on this case. If it is not, then we need to check if callee needs
+ // stack for passing arguments.
+ if (!hasSameArgumentList(MF.getFunction(), CS) &&
+ needStackSlotPassParameters(Subtarget, Outs)) {
+ return false;
+ }
+
+ return true;
+}
+
/// IsEligibleForTailCallOptimization - Check whether the call is eligible
/// for tail call optimization. Targets which want to do tail call
/// optimization should implement this function.
@@ -4479,9 +4659,32 @@ PPCTargetLowering::LowerCall(TargetLower
bool IsPatchPoint = CLI.IsPatchPoint;
ImmutableCallSite *CS = CLI.CS;
- if (isTailCall)
- isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
- Ins, DAG);
+ if (isTailCall) {
+ if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
+ isTailCall =
+ IsEligibleForTailCallOptimization_64SVR4(Callee, CallConv, CS,
+ isVarArg, Outs, Ins, DAG);
+ else
+ isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
+ Ins, DAG);
+ if (isTailCall) {
+ ++NumTailCalls;
+ if (!getTargetMachine().Options.GuaranteedTailCallOpt)
+ ++NumSiblingCalls;
+
+ assert(isa<GlobalAddressSDNode>(Callee) &&
+ "Callee should be an llvm::Function object.");
+ DEBUG(
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
+ const unsigned Width = 80 - strlen("TCO caller: ")
+ - strlen(", callee linkage: 0, 0");
+ dbgs() << "TCO caller: "
+ << left_justify(DAG.getMachineFunction().getName(), Width)
+ << ", callee linkage: "
+ << GV->getVisibility() << ", " << GV->getLinkage() << "\n"
+ );
+ }
+ }
if (!isTailCall && CS && CS->isMustTailCall())
report_fatal_error("failed to perform tail call elimination on a call "
@@ -4760,12 +4963,16 @@ PPCTargetLowering::LowerCall_64SVR4(SDVa
bool isLittleEndian = Subtarget.isLittleEndian();
unsigned NumOps = Outs.size();
bool hasNest = false;
+ bool IsSibCall = false;
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
unsigned PtrByteSize = 8;
MachineFunction &MF = DAG.getMachineFunction();
+ if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
+ IsSibCall = true;
+
// Mark this function as potentially containing a function that contains a
// tail call. As a consequence the frame pointer will be used for dynamicalloc
// and restoring the callers stack pointer in this functions epilog. This is
@@ -4885,9 +5092,12 @@ PPCTargetLowering::LowerCall_64SVR4(SDVa
CallConv == CallingConv::Fast)
NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
+ int SPDiff = 0;
+
// Calculate by how many bytes the stack has to be adjusted in case of tail
// call optimization.
- int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
+ if (!IsSibCall)
+ SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
// To protect arguments on the stack from being clobbered in a tail call,
// force all the loads to happen before doing any other lowering.
@@ -4896,8 +5106,9 @@ PPCTargetLowering::LowerCall_64SVR4(SDVa
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
- Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
- dl);
+ if (!IsSibCall)
+ Chain = DAG.getCALLSEQ_START(Chain,
+ DAG.getIntPtrConstant(NumBytes, dl, true), dl);
SDValue CallSeqStart = Chain;
// Load the return address and frame pointer so it can be move somewhere else
@@ -5366,7 +5577,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDVa
InFlag = Chain.getValue(1);
}
- if (isTailCall)
+ if (isTailCall && !IsSibCall)
PrepareTailCall(DAG, InFlag, Chain, dl, true, SPDiff, NumBytes, LROp,
FPOp, true, TailCallArguments);
Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h?rev=265506&r1=265505&r2=265506&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h Tue Apr 5 21:04:38 2016
@@ -713,6 +713,16 @@ namespace llvm {
const SmallVectorImpl<ISD::InputArg> &Ins,
SelectionDAG& DAG) const;
+ bool
+ IsEligibleForTailCallOptimization_64SVR4(
+ SDValue Callee,
+ CallingConv::ID CalleeCC,
+ ImmutableCallSite *CS,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SelectionDAG& DAG) const;
+
SDValue EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
int SPDiff,
SDValue Chain,
Modified: llvm/trunk/test/CodeGen/PowerPC/ppc64-calls.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/ppc64-calls.ll?rev=265506&r1=265505&r2=265506&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/ppc64-calls.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/ppc64-calls.ll Tue Apr 5 21:04:38 2016
@@ -14,7 +14,8 @@ define weak void @foo_weak() nounwind {
define void @test_direct() nounwind readnone {
; CHECK-LABEL: test_direct:
tail call void @foo() nounwind
-; CHECK: bl foo
+; Because of tail call optimization, it can be 'b' instruction.
+; CHECK: [[BR:b[l]?]] foo
; CHECK-NOT: nop
ret void
}
Added: llvm/trunk/test/CodeGen/PowerPC/ppc64-sibcall-shrinkwrap.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/ppc64-sibcall-shrinkwrap.ll?rev=265506&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/ppc64-sibcall-shrinkwrap.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/ppc64-sibcall-shrinkwrap.ll Tue Apr 5 21:04:38 2016
@@ -0,0 +1,46 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu --enable-shrink-wrap=false | FileCheck %s -check-prefix=CHECK-SCO-ONLY
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu --enable-shrink-wrap=true | FileCheck %s -check-prefix=CHECK-SCO-SHRK
+; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu --enable-shrink-wrap=false | FileCheck %s -check-prefix=CHECK-SCO-ONLY
+; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu --enable-shrink-wrap=true | FileCheck %s -check-prefix=CHECK-SCO-SHRK
+
+%"class.clang::NamedDecl" = type { i32 }
+declare void @__assert_fail();
+
+define i8 @_ZNK5clang9NamedDecl23getLinkageAndVisibilityEv(
+ %"class.clang::NamedDecl"* %this) {
+entry:
+ %tobool = icmp eq %"class.clang::NamedDecl"* %this, null
+ br i1 %tobool, label %cond.false, label %exit
+
+cond.false:
+ tail call void @__assert_fail()
+ unreachable
+
+exit:
+ %DeclKind = getelementptr inbounds
+ %"class.clang::NamedDecl",
+ %"class.clang::NamedDecl"* %this, i64 0, i32 0
+ %bf.load = load i32, i32* %DeclKind, align 4
+ %call.i = tail call i8 @LVComputationKind(
+ %"class.clang::NamedDecl"* %this,
+ i32 %bf.load)
+ ret i8 %call.i
+
+; CHECK-SCO-SHRK-LABEL: _ZNK5clang9NamedDecl23getLinkageAndVisibilityEv:
+; CHECK-SCO-SHRK: b LVComputationKind
+; CHECK-SCO-SHRK: #TC_RETURNd8
+; CHECK-SCO-SHRK: stdu 1, -{{[0-9]+}}(1)
+; CHECK-SCO-SHRK: bl __assert_fail
+;
+; CHECK-SCO-ONLY-LABEL: _ZNK5clang9NamedDecl23getLinkageAndVisibilityEv:
+; CHECK-SCO-ONLY: stdu 1, -{{[0-9]+}}(1)
+; CHECK-SCO-ONLY: b LVComputationKind
+; CHECK-SCO-ONLY: #TC_RETURNd8
+; CHECK-SCO-ONLY: bl __assert_fail
+}
+
+define fastcc i8 @LVComputationKind(
+ %"class.clang::NamedDecl"* %D,
+ i32 %computation) {
+ ret i8 0
+}
Added: llvm/trunk/test/CodeGen/PowerPC/ppc64-sibcall.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/ppc64-sibcall.ll?rev=265506&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/ppc64-sibcall.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/ppc64-sibcall.ll Tue Apr 5 21:04:38 2016
@@ -0,0 +1,191 @@
+; RUN: llc < %s -O1 -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s -check-prefix=CHECK-SCO
+; RUN: llc < %s -O1 -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s -check-prefix=CHECK-SCO-HASQPX
+; RUN: llc < %s -O1 -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s -check-prefix=CHECK-SCO-HASQPX
+
+; No combination of "powerpc64le-unknown-linux-gnu" + "CHECK-SCO", because
+; only Power8 (and later) fully support LE.
+
+%S_56 = type { [13 x i32], i32 }
+%S_64 = type { [15 x i32], i32 }
+%S_32 = type { [7 x i32], i32 }
+
+; Function Attrs: noinline nounwind
+define void @callee_56_copy([7 x i64] %a, %S_56* %b) #0 { ret void }
+define void @callee_64_copy([8 x i64] %a, %S_64* %b) #0 { ret void }
+
+; Function Attrs: nounwind
+define void @caller_56_reorder_copy(%S_56* %b, [7 x i64] %a) #1 {
+ tail call void @callee_56_copy([7 x i64] %a, %S_56* %b)
+ ret void
+
+; CHECK-SCO-LABEL: caller_56_reorder_copy:
+; CHECK-SCO-NOT: stdu 1
+; CHECK-SCO: TC_RETURNd8 callee_56_copy
+}
+
+define void @caller_64_reorder_copy(%S_64* %b, [8 x i64] %a) #1 {
+ tail call void @callee_64_copy([8 x i64] %a, %S_64* %b)
+ ret void
+
+; CHECK-SCO-LABEL: caller_64_reorder_copy:
+; CHECK-SCO: bl callee_64_copy
+}
+
+define void @callee_64_64_copy([8 x i64] %a, [8 x i64] %b) #0 { ret void }
+define void @caller_64_64_copy([8 x i64] %a, [8 x i64] %b) #1 {
+ tail call void @callee_64_64_copy([8 x i64] %a, [8 x i64] %b)
+ ret void
+
+; CHECK-SCO-LABEL: caller_64_64_copy:
+; CHECK-SCO: b callee_64_64_copy
+}
+
+define void @caller_64_64_reorder_copy([8 x i64] %a, [8 x i64] %b) #1 {
+ tail call void @callee_64_64_copy([8 x i64] %b, [8 x i64] %a)
+ ret void
+
+; CHECK-SCO-LABEL: caller_64_64_reorder_copy:
+; CHECK-SCO: bl callee_64_64_copy
+}
+
+define void @caller_64_64_undef_copy([8 x i64] %a, [8 x i64] %b) #1 {
+ tail call void @callee_64_64_copy([8 x i64] %a, [8 x i64] undef)
+ ret void
+
+; CHECK-SCO-LABEL: caller_64_64_undef_copy:
+; CHECK-SCO: b callee_64_64_copy
+}
+
+define void @arg8_callee(
+ float %a, i32 signext %b, float %c, i32* %d,
+ i8 zeroext %e, float %f, i32* %g, i32 signext %h)
+{
+ ret void
+}
+
+define void @arg8_caller(float %a, i32 signext %b, i8 zeroext %c, i32* %d) {
+entry:
+ tail call void @arg8_callee(float undef, i32 signext undef, float undef,
+ i32* %d, i8 zeroext undef, float undef,
+ i32* undef, i32 signext undef)
+ ret void
+
+; CHECK-SCO-LABEL: arg8_caller:
+; CHECK-SCO: b arg8_callee
+}
+
+; Struct return test
+
+; Function Attrs: noinline nounwind
+define void @callee_sret_56(%S_56* noalias sret %agg.result) #0 { ret void }
+define void @callee_sret_32(%S_32* noalias sret %agg.result) #0 { ret void }
+
+; Function Attrs: nounwind
+define void @caller_do_something_sret_32(%S_32* noalias sret %agg.result) #1 {
+ %1 = alloca %S_56, align 4
+ %2 = bitcast %S_56* %1 to i8*
+ call void @callee_sret_56(%S_56* nonnull sret %1)
+ tail call void @callee_sret_32(%S_32* sret %agg.result)
+ ret void
+
+; CHECK-SCO-LABEL: caller_do_something_sret_32:
+; CHECK-SCO: stdu 1
+; CHECK-SCO: bl callee_sret_56
+; CHECK-SCO: addi 1
+; CHECK-SCO: TC_RETURNd8 callee_sret_32
+}
+
+define void @caller_local_sret_32(%S_32* %a) #1 {
+ %tmp = alloca %S_32, align 4
+ tail call void @callee_sret_32(%S_32* nonnull sret %tmp)
+ ret void
+
+; CHECK-SCO-LABEL: caller_local_sret_32:
+; CHECK-SCO: bl callee_sret_32
+}
+
+attributes #0 = { noinline nounwind }
+attributes #1 = { nounwind }
+
+; vector <4 x i1> test
+
+define void @callee_v4i1(i8 %a, <4 x i1> %b, <4 x i1> %c) { ret void }
+define void @caller_v4i1_reorder(i8 %a, <4 x i1> %b, <4 x i1> %c) {
+ tail call void @callee_v4i1(i8 %a, <4 x i1> %c, <4 x i1> %b)
+ ret void
+
+; <4 x i1> is 32 bytes aligned, if subtarget doesn't support qpx, then we can't
+; place b, c to qpx register, so we can't do sco on caller_v4i1_reorder
+
+; CHECK-SCO-LABEL: caller_v4i1_reorder:
+; CHECK-SCO: bl callee_v4i1
+
+; CHECK-SCO-HASQPX-LABEL: caller_v4i1_reorder:
+; CHECK-SCO-HASQPX: b callee_v4i1
+}
+
+define void @f128_callee(i32* %ptr, ppc_fp128 %a, ppc_fp128 %b) { ret void }
+define void @f128_caller(i32* %ptr, ppc_fp128 %a, ppc_fp128 %b) {
+ tail call void @f128_callee(i32* %ptr, ppc_fp128 %a, ppc_fp128 %b)
+ ret void
+
+; CHECK-SCO-LABEL: f128_caller:
+; CHECK-SCO: b f128_callee
+}
+
+; weak linkage test
+%class.T = type { [2 x i8] }
+
+define weak_odr hidden void @wo_hcallee(%class.T* %this, i8* %c) { ret void }
+define void @wo_hcaller(%class.T* %this, i8* %c) {
+ tail call void @wo_hcallee(%class.T* %this, i8* %c)
+ ret void
+
+; CHECK-SCO-LABEL: wo_hcaller:
+; CHECK-SCO: b wo_hcallee
+}
+
+define weak_odr protected void @wo_pcallee(%class.T* %this, i8* %c) { ret void }
+define void @wo_pcaller(%class.T* %this, i8* %c) {
+ tail call void @wo_pcallee(%class.T* %this, i8* %c)
+ ret void
+
+; CHECK-SCO-LABEL: wo_pcaller:
+; CHECK-SCO: b wo_pcallee
+}
+
+define weak_odr void @wo_callee(%class.T* %this, i8* %c) { ret void }
+define void @wo_caller(%class.T* %this, i8* %c) {
+ tail call void @wo_callee(%class.T* %this, i8* %c)
+ ret void
+
+; CHECK-SCO-LABEL: wo_caller:
+; CHECK-SCO: bl wo_callee
+}
+
+define weak protected void @w_pcallee(i8* %ptr) { ret void }
+define void @w_pcaller(i8* %ptr) {
+ tail call void @w_pcallee(i8* %ptr)
+ ret void
+
+; CHECK-SCO-LABEL: w_pcaller:
+; CHECK-SCO: b w_pcallee
+}
+
+define weak hidden void @w_hcallee(i8* %ptr) { ret void }
+define void @w_hcaller(i8* %ptr) {
+ tail call void @w_hcallee(i8* %ptr)
+ ret void
+
+; CHECK-SCO-LABEL: w_hcaller:
+; CHECK-SCO: b w_hcallee
+}
+
+define weak void @w_callee(i8* %ptr) { ret void }
+define void @w_caller(i8* %ptr) {
+ tail call void @w_callee(i8* %ptr)
+ ret void
+
+; CHECK-SCO-LABEL: w_caller:
+; CHECK-SCO: bl w_callee
+}
More information about the llvm-commits
mailing list