[llvm-commits] [llvm] r127953 - in /llvm/trunk: include/llvm/Target/TargetLowering.h lib/Target/ARM/ARMISelLowering.cpp lib/Target/ARM/ARMISelLowering.h lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86ISelLowering.h lib/Transforms/Scalar/CodeGenPrepare.cpp test/CodeGen/X86/tailcall-cgp-dup.ll
Evan Cheng
evan.cheng at apple.com
Sat Mar 19 10:17:40 PDT 2011
Author: evancheng
Date: Sat Mar 19 12:17:39 2011
New Revision: 127953
URL: http://llvm.org/viewvc/llvm-project?rev=127953&view=rev
Log:
SimplifyCFG has stopped duplicating returns into predecessors to canonicalize IR
to have single return block (at least getting there) for optimizations. This
is general goodness but it would prevent some tailcall optimizations.
One specific case is code like this:
int f1(void);
int f2(void);
int f3(void);
int f4(void);
int f5(void);
int f6(void);
int foo(int x) {
switch(x) {
case 1: return f1();
case 2: return f2();
case 3: return f3();
case 4: return f4();
case 5: return f5();
case 6: return f6();
}
}
=>
LBB0_2: ## %sw.bb
callq _f1
popq %rbp
ret
LBB0_3: ## %sw.bb1
callq _f2
popq %rbp
ret
LBB0_4: ## %sw.bb3
callq _f3
popq %rbp
ret
This patch teaches codegenprep to duplicate returns when the return value
is a phi and where the phi operands are produced by tail calls followed by
an unconditional branch:
sw.bb7: ; preds = %entry
%call8 = tail call i32 @f5() nounwind
br label %return
sw.bb9: ; preds = %entry
%call10 = tail call i32 @f6() nounwind
br label %return
return:
%retval.0 = phi i32 [ %call10, %sw.bb9 ], [ %call8, %sw.bb7 ], ... [ 0, %entry ]
ret i32 %retval.0
This allows codegen to generate better code like this:
LBB0_2: ## %sw.bb
jmp _f1 ## TAILCALL
LBB0_3: ## %sw.bb1
jmp _f2 ## TAILCALL
LBB0_4: ## %sw.bb3
jmp _f3 ## TAILCALL
rdar://9147433
Added:
llvm/trunk/test/CodeGen/X86/tailcall-cgp-dup.ll
Modified:
llvm/trunk/include/llvm/Target/TargetLowering.h
llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
llvm/trunk/lib/Target/ARM/ARMISelLowering.h
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86ISelLowering.h
llvm/trunk/lib/Transforms/Scalar/CodeGenPrepare.cpp
Modified: llvm/trunk/include/llvm/Target/TargetLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetLowering.h?rev=127953&r1=127952&r2=127953&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Target/TargetLowering.h (original)
+++ llvm/trunk/include/llvm/Target/TargetLowering.h Sat Mar 19 12:17:39 2011
@@ -1287,6 +1287,14 @@
return false;
}
+ /// mayBeEmittedAsTailCall - Return true if the target may be able emit the
+ /// call instruction as a tail call. This is used by optimization passes to
+ /// determine if it's profitable to duplicate return instructions to enable
+ /// tailcall optimization.
+ virtual bool mayBeEmittedAsTailCall(CallInst *CI) const {
+ return false;
+ }
+
/// getTypeForExtArgOrReturn - Return the type that should be used to zero or
/// sign extend a zeroext/signext integer argument or return value.
/// FIXME: Most C calling convention requires the return type to be promoted,
Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=127953&r1=127952&r2=127953&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Sat Mar 19 12:17:39 2011
@@ -1805,6 +1805,16 @@
return HasRet;
}
+bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
+ if (!EnableARMTailCalls)
+ return false;
+
+ if (!CI->isTailCall())
+ return false;
+
+ return !Subtarget->isThumb1Only();
+}
+
// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
// one of the above mentioned nodes. It has to be wrapped because otherwise
Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.h?rev=127953&r1=127952&r2=127953&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.h (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.h Sat Mar 19 12:17:39 2011
@@ -457,6 +457,8 @@
virtual bool isUsedByReturnOnly(SDNode *N) const;
+ virtual bool mayBeEmittedAsTailCall(CallInst *CI) const;
+
SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
SDValue &ARMcc, SelectionDAG &DAG, DebugLoc dl) const;
SDValue getVFPCmp(SDValue LHS, SDValue RHS,
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=127953&r1=127952&r2=127953&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Mar 19 12:17:39 2011
@@ -45,6 +45,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/VectorExtras.h"
+#include "llvm/Support/CallSite.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/ErrorHandling.h"
@@ -1595,6 +1596,18 @@
return (CC == CallingConv::Fast || CC == CallingConv::GHC);
}
+bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
+ if (!CI->isTailCall())
+ return false;
+
+ CallSite CS(CI);
+ CallingConv::ID CalleeCC = CS.getCallingConv();
+ if (!IsTailCallConvention(CalleeCC) && CalleeCC != CallingConv::C)
+ return false;
+
+ return true;
+}
+
/// FuncIsMadeTailCallSafe - Return true if the function is being made into
/// a tailcall target by changing its ABI.
static bool FuncIsMadeTailCallSafe(CallingConv::ID CC) {
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=127953&r1=127952&r2=127953&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Sat Mar 19 12:17:39 2011
@@ -843,6 +843,8 @@
virtual bool isUsedByReturnOnly(SDNode *N) const;
+ virtual bool mayBeEmittedAsTailCall(CallInst *CI) const;
+
virtual EVT
getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT,
ISD::NodeType ExtendKind) const;
Modified: llvm/trunk/lib/Transforms/Scalar/CodeGenPrepare.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/CodeGenPrepare.cpp?rev=127953&r1=127952&r2=127953&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/CodeGenPrepare.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/CodeGenPrepare.cpp Sat Mar 19 12:17:39 2011
@@ -47,16 +47,17 @@
using namespace llvm::PatternMatch;
STATISTIC(NumBlocksElim, "Number of blocks eliminated");
-STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated");
-STATISTIC(NumGEPsElim, "Number of GEPs converted to casts");
+STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated");
+STATISTIC(NumGEPsElim, "Number of GEPs converted to casts");
STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "
"sunken Cmps");
STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "
"of sunken Casts");
STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
"computations were sunk");
-STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads");
-STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized");
+STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads");
+STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized");
+STATISTIC(NumRetsDup, "Number of return instructions duplicated");
static cl::opt<bool> DisableBranchOpts(
"disable-cgp-branch-opts", cl::Hidden, cl::init(false),
@@ -104,6 +105,7 @@
bool OptimizeCallInst(CallInst *CI);
bool MoveExtToFormExtLoad(Instruction *I);
bool OptimizeExtUses(Instruction *I);
+ bool DupRetToEnableTailCallOpts(ReturnInst *RI);
};
}
@@ -547,6 +549,96 @@
return Simplifier.fold(CI, TD);
}
+/// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return
+/// instructions to the predecessor to enable tail call optimizations. The
+/// case it is currently looking for is:
+/// bb0:
+/// %tmp0 = tail call i32 @f0()
+/// br label %return
+/// bb1:
+/// %tmp1 = tail call i32 @f1()
+/// br label %return
+/// bb2:
+/// %tmp2 = tail call i32 @f2()
+/// br label %return
+/// return:
+/// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
+/// ret i32 %retval
+///
+/// =>
+///
+/// bb0:
+/// %tmp0 = tail call i32 @f0()
+/// ret i32 %tmp0
+/// bb1:
+/// %tmp1 = tail call i32 @f1()
+/// ret i32 %tmp1
+/// bb2:
+/// %tmp2 = tail call i32 @f2()
+/// ret i32 %tmp2
+///
+bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) {
+ Value *V = RI->getReturnValue();
+ if (!V)
+ return false;
+
+ if (PHINode *PN = dyn_cast<PHINode>(V)) {
+ BasicBlock *BB = RI->getParent();
+ if (PN->getParent() != BB)
+ return false;
+
+ // It's not safe to eliminate the sign / zero extension of the return value.
+ // See llvm::isInTailCallPosition().
+ const Function *F = BB->getParent();
+ unsigned CallerRetAttr = F->getAttributes().getRetAttributes();
+ if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt))
+ return false;
+
+ // Make sure there are no instructions between PHI and return.
+ BasicBlock::iterator BI = PN;
+ do { ++BI; } while (isa<DbgInfoIntrinsic>(BI));
+ if (&*BI != RI)
+ return false;
+
+ /// Only dup the ReturnInst if the CallInst is likely to be emitted as a
+ /// tail call.
+ SmallVector<CallInst*, 4> TailCalls;
+ for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
+ CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I));
+ if (CI && TLI->mayBeEmittedAsTailCall(CI))
+ TailCalls.push_back(CI);
+ }
+
+ bool Changed = false;
+ for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) {
+ CallInst *CI = TailCalls[i];
+ CallSite CS(CI);
+
+ // Conservatively require the attributes of the call to match those of
+ // the return. Ignore noalias because it doesn't affect the call sequence.
+ unsigned CalleeRetAttr = CS.getAttributes().getRetAttributes();
+ if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias)
+ continue;
+
+ // Make sure the call instruction is followed by an unconditional branch
+ // to the return block.
+ BasicBlock *CallBB = CI->getParent();
+ BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator());
+ if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)
+ continue;
+
+ // Duplicate the return into CallBB.
+ (void)FoldReturnIntoUncondBranch(RI, BB, CallBB);
+ Changed = true;
+ ++NumRetsDup;
+ }
+
+ return Changed;
+ }
+
+ return false;
+}
+
//===----------------------------------------------------------------------===//
// Memory Optimization
//===----------------------------------------------------------------------===//
@@ -970,6 +1062,9 @@
if (CallInst *CI = dyn_cast<CallInst>(I))
return OptimizeCallInst(CI);
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(I))
+ return DupRetToEnableTailCallOpts(RI);
+
return false;
}
Added: llvm/trunk/test/CodeGen/X86/tailcall-cgp-dup.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/tailcall-cgp-dup.ll?rev=127953&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/tailcall-cgp-dup.ll (added)
+++ llvm/trunk/test/CodeGen/X86/tailcall-cgp-dup.ll Sat Mar 19 12:17:39 2011
@@ -0,0 +1,63 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+
+; Teach CGP to dup returns to enable tail call optimization.
+; rdar://9147433
+
+define i32 @foo(i32 %x) nounwind ssp {
+; CHECK: foo:
+entry:
+ switch i32 %x, label %return [
+ i32 1, label %sw.bb
+ i32 2, label %sw.bb1
+ i32 3, label %sw.bb3
+ i32 4, label %sw.bb5
+ i32 5, label %sw.bb7
+ i32 6, label %sw.bb9
+ ]
+
+sw.bb: ; preds = %entry
+; CHECK: jmp _f1
+ %call = tail call i32 @f1() nounwind
+ br label %return
+
+sw.bb1: ; preds = %entry
+; CHECK: jmp _f2
+ %call2 = tail call i32 @f2() nounwind
+ br label %return
+
+sw.bb3: ; preds = %entry
+; CHECK: jmp _f3
+ %call4 = tail call i32 @f3() nounwind
+ br label %return
+
+sw.bb5: ; preds = %entry
+; CHECK: jmp _f4
+ %call6 = tail call i32 @f4() nounwind
+ br label %return
+
+sw.bb7: ; preds = %entry
+; CHECK: jmp _f5
+ %call8 = tail call i32 @f5() nounwind
+ br label %return
+
+sw.bb9: ; preds = %entry
+; CHECK: jmp _f6
+ %call10 = tail call i32 @f6() nounwind
+ br label %return
+
+return: ; preds = %entry, %sw.bb9, %sw.bb7, %sw.bb5, %sw.bb3, %sw.bb1, %sw.bb
+ %retval.0 = phi i32 [ %call10, %sw.bb9 ], [ %call8, %sw.bb7 ], [ %call6, %sw.bb5 ], [ %call4, %sw.bb3 ], [ %call2, %sw.bb1 ], [ %call, %sw.bb ], [ 0, %entry ]
+ ret i32 %retval.0
+}
+
+declare i32 @f1()
+
+declare i32 @f2()
+
+declare i32 @f3()
+
+declare i32 @f4()
+
+declare i32 @f5()
+
+declare i32 @f6()
More information about the llvm-commits
mailing list