[llvm] 911e06f - [ICP] Handling must tail calls in indirect call promotion
Hongtao Yu via llvm-commits
llvm-commits at lists.llvm.org
Sun May 3 10:42:50 PDT 2020
Author: Hongtao Yu
Date: 2020-05-03T10:42:22-07:00
New Revision: 911e06f5eba66516e87c73d8e76016e549051cd7
URL: https://github.com/llvm/llvm-project/commit/911e06f5eba66516e87c73d8e76016e549051cd7
DIFF: https://github.com/llvm/llvm-project/commit/911e06f5eba66516e87c73d8e76016e549051cd7.diff
LOG: [ICP] Handling must tail calls in indirect call promotion
Per the IR convention, a musttail call must precede a ret with an optional bitcast. This was violated by the indirect call promotion optimization which could result an IR like:
; <label>:2192:
br i1 %2198, label %2199, label %2201, !dbg !226012, !prof !229483
; <label>:2199: ; preds = %2192
musttail call fastcc void @foo(i8* %2195), !dbg !226012
br label %2202, !dbg !226012
; <label>:2201: ; preds = %2192
musttail call fastcc void %2197(i8* %2195), !dbg !226012
br label %2202, !dbg !226012
; <label>:2202: ; preds = %605, %2201, %2199
ret void, !dbg !229485
This is being fixed in this change where the return statement goes together with the promoted indirect call. The code generated is like:
; <label>:2192:
br i1 %2198, label %2199, label %2201, !dbg !226012, !prof !229483
; <label>:2199: ; preds = %2192
musttail call fastcc void @foo(i8* %2195), !dbg !226012
ret void, !dbg !229485
; <label>:2201: ; preds = %2192
musttail call fastcc void %2197(i8* %2195), !dbg !226012
ret void, !dbg !229485
Differential Revision: https://reviews.llvm.org/D79258
Added:
llvm/test/Transforms/PGOProfile/indirect_call_promotion_musttail.ll
Modified:
llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
index ac61603e3f34..a7a0290ee475 100644
--- a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
+++ b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
@@ -255,6 +255,34 @@ static void createRetBitCast(CallBase &CB, Type *RetTy, CastInst **RetBitCast) {
/// %t2 = phi i32 [ %t0, %else_bb ], [ %t1, %then_bb ]
/// br %normal_dst
///
+/// An indirect musttail call is processed slightly
diff erently in that:
+/// 1. No merge block needed for the orginal and the cloned callsite, since
+/// either one ends the flow. No phi node is needed either.
+/// 2. The return statement following the original call site is duplicated too
+/// and placed immediately after the cloned call site per the IR convention.
+///
+/// For example, the musttail call instruction below:
+///
+/// orig_bb:
+/// %t0 = musttail call i32 %ptr()
+/// ...
+///
+/// Is replaced by the following:
+///
+/// cond_bb:
+/// %cond = icmp eq i32 ()* %ptr, @func
+/// br i1 %cond, %then_bb, %orig_bb
+///
+/// then_bb:
+/// ; The clone of the original call instruction is placed in the "then"
+/// ; block. It is not yet promoted.
+/// %t1 = musttail call i32 %ptr()
+/// ret %t1
+///
+/// orig_bb:
+/// ; The original call instruction stays in its original block.
+/// %t0 = musttail call i32 %ptr()
+/// ret %t0
static CallBase &versionCallSite(CallBase &CB, Value *Callee,
MDNode *BranchWeights) {
@@ -268,6 +296,44 @@ static CallBase &versionCallSite(CallBase &CB, Value *Callee,
Callee = Builder.CreateBitCast(Callee, CB.getCalledOperand()->getType());
auto *Cond = Builder.CreateICmpEQ(CB.getCalledOperand(), Callee);
+ if (OrigInst->isMustTailCall()) {
+ // Create an if-then structure. The original instruction stays in its block,
+ // and a clone of the original instruction is placed in the "then" block.
+ Instruction *ThenTerm =
+ SplitBlockAndInsertIfThen(Cond, &CB, false, BranchWeights);
+ BasicBlock *ThenBlock = ThenTerm->getParent();
+ ThenBlock->setName("if.true.direct_targ");
+ CallBase *NewInst = cast<CallBase>(OrigInst->clone());
+ NewInst->insertBefore(ThenTerm);
+
+ // Place a clone of the optional bitcast after the new call site.
+ Value *NewRetVal = NewInst;
+ auto Next = OrigInst->getNextNode();
+ if (auto *BitCast = dyn_cast_or_null<BitCastInst>(Next)) {
+ assert(BitCast->getOperand(0) == OrigInst &&
+ "bitcast following musttail call must use the call");
+ auto NewBitCast = BitCast->clone();
+ NewBitCast->replaceUsesOfWith(OrigInst, NewInst);
+ NewBitCast->insertBefore(ThenTerm);
+ NewRetVal = NewBitCast;
+ Next = BitCast->getNextNode();
+ }
+
+ // Place a clone of the return instruction after the new call site.
+ ReturnInst *Ret = dyn_cast_or_null<ReturnInst>(Next);
+ assert(Ret && "musttail call must precede a ret with an optional bitcast");
+ auto NewRet = Ret->clone();
+ if (Ret->getReturnValue())
+ NewRet->replaceUsesOfWith(Ret->getReturnValue(), NewRetVal);
+ NewRet->insertBefore(ThenTerm);
+
+ // A return instructions is terminating, so we don't need the terminator
+ // instruction just created.
+ ThenTerm->eraseFromParent();
+
+ return *NewInst;
+ }
+
// Create an if-then-else structure. The original instruction is moved into
// the "else" block, and a clone of the original instruction is placed in the
// "then" block.
diff --git a/llvm/test/Transforms/PGOProfile/indirect_call_promotion_musttail.ll b/llvm/test/Transforms/PGOProfile/indirect_call_promotion_musttail.ll
new file mode 100644
index 000000000000..e79e533c4af2
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/indirect_call_promotion_musttail.ll
@@ -0,0 +1,68 @@
+; RUN: opt < %s -pgo-icall-prom -S | FileCheck %s --check-prefix=ICALL-PROM
+; RUN: opt < %s -passes=pgo-icall-prom -S | FileCheck %s --check-prefix=ICALL-PROM
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at foo = common global i32* ()* null, align 8
+
+declare i32* @func1()
+
+declare i32* @func2()
+
+declare i32* @func3()
+
+declare i32* @func4()
+
+define i32* @bar() {
+entry:
+ %tmp = load i32* ()*, i32* ()** @foo, align 8
+; ICALL-PROM: [[CMP1:%[0-9]+]] = icmp eq i32* ()* %tmp, @func4
+; ICALL-PROM: br i1 [[CMP1]], label %if.true.direct_targ, label %[[L1:[0-9]+]], !prof [[BRANCH_WEIGHT1:![0-9]+]]
+; ICALL-PROM: if.true.direct_targ:
+; ICALL-PROM: [[DIRCALL_RET1:%[0-9]+]] = musttail call i32* @func4()
+; ICALL-PROM: ret i32* [[DIRCALL_RET1]]
+; ICALL-PROM: [[L1]]:
+; ICALL-PROM: [[CMP2:%[0-9]+]] = icmp eq i32* ()* %tmp, @func2
+; ICALL-PROM: br i1 [[CMP2]], label %if.true.direct_targ1, label %[[L2:[0-9]+]], !prof [[BRANCH_WEIGHT2:![0-9]+]]
+; ICALL-PROM: if.true.direct_targ1:
+; ICALL-PROM: [[DIRCALL_RET2:%[0-9]+]] = musttail call i32* @func2()
+; ICALL-PROM: ret i32* [[DIRCALL_RET2]]
+; ICALL-PROM: [[L2]]:
+; ICALL-PROM: [[CMP3:%[0-9]+]] = icmp eq i32* ()* %tmp, @func3
+; ICALL-PROM: br i1 [[CMP3]], label %if.true.direct_targ2, label %[[L3:[0-9]+]], !prof [[BRANCH_WEIGHT3:![0-9]+]]
+; ICALL-PROM: if.true.direct_targ2:
+; ICALL-PROM: [[DIRCALL_RET3:%[0-9]+]] = musttail call i32* @func3()
+; ICALL-PROM: ret i32* [[DIRCALL_RET3]]
+; ICALL-PROM: [[L3]]:
+; ICALL-PROM: %call = musttail call i32* %tmp()
+; ICALL-PROM: ret i32* %call
+ %call = musttail call i32* %tmp(), !prof !1
+ ret i32* %call
+}
+
+define i64* @bar2() {
+entry:
+ %tmp = load i32* ()*, i32* ()** @foo, align 8
+; ICALL-PROM: [[CMP1:%[0-9]+]] = icmp eq i32* ()* %tmp, @func4
+; ICALL-PROM: br i1 [[CMP1]], label %if.true.direct_targ, label %[[L4:[0-9]+]], !prof [[BRANCH_WEIGHT4:![0-9]+]]
+; ICALL-PROM: if.true.direct_targ:
+; ICALL-PROM: [[DIRCALL_RET1:%[0-9]+]] = musttail call i32* @func4()
+; ICALL-PROM: [[DIRCALL_RET2:%[0-9]+]] = bitcast i32* [[DIRCALL_RET1]] to i64*
+; ICALL-PROM: ret i64* [[DIRCALL_RET2]]
+; ICALL-PROM: [[L4]]:
+; ICALL-PROM: %call = musttail call i32* %tmp()
+; ICALL-PROM: %rv = bitcast i32* %call to i64*
+; ICALL-PROM: ret i64* %rv
+ %call = musttail call i32* %tmp(), !prof !2
+ %rv = bitcast i32* %call to i64*
+ ret i64* %rv
+}
+
+!1 = !{!"VP", i32 0, i64 1600, i64 7651369219802541373, i64 1030, i64 -4377547752858689819, i64 410, i64 -6929281286627296573, i64 150, i64 -2545542355363006406, i64 10}
+!2 = !{!"VP", i32 0, i64 100, i64 7651369219802541373, i64 100}
+
+; ICALL-PROM: [[BRANCH_WEIGHT1]] = !{!"branch_weights", i32 1030, i32 570}
+; ICALL-PROM: [[BRANCH_WEIGHT2]] = !{!"branch_weights", i32 410, i32 160}
+; ICALL-PROM: [[BRANCH_WEIGHT3]] = !{!"branch_weights", i32 150, i32 10}
+; ICALL-PROM: [[BRANCH_WEIGHT4]] = !{!"branch_weights", i32 100, i32 0}
More information about the llvm-commits
mailing list