[llvm] 911e06f - [ICP] Handling must tail calls in indirect call promotion

Hongtao Yu via llvm-commits llvm-commits at lists.llvm.org
Sun May 3 10:42:50 PDT 2020


Author: Hongtao Yu
Date: 2020-05-03T10:42:22-07:00
New Revision: 911e06f5eba66516e87c73d8e76016e549051cd7

URL: https://github.com/llvm/llvm-project/commit/911e06f5eba66516e87c73d8e76016e549051cd7
DIFF: https://github.com/llvm/llvm-project/commit/911e06f5eba66516e87c73d8e76016e549051cd7.diff

LOG: [ICP] Handling must tail calls in indirect call promotion

Per the IR convention, a musttail call must precede a ret with an optional bitcast. This was violated by the indirect call promotion optimization which could result an IR like:

    ; <label>:2192:
      br i1 %2198, label %2199, label %2201, !dbg !226012, !prof !229483

    ; <label>:2199:                                   ; preds = %2192
      musttail call fastcc void @foo(i8* %2195), !dbg !226012
      br label %2202, !dbg !226012

    ; <label>:2201:                                   ; preds = %2192
      musttail call fastcc void %2197(i8* %2195), !dbg !226012
      br label %2202, !dbg !226012

    ; <label>:2202:                                   ; preds = %605, %2201, %2199
      ret void, !dbg !229485

This is being fixed in this change where the return statement goes together with the promoted indirect call. The code generated is like:

    ; <label>:2192:
      br i1 %2198, label %2199, label %2201, !dbg !226012, !prof !229483

    ; <label>:2199:                                   ; preds = %2192
      musttail call fastcc void @foo(i8* %2195), !dbg !226012
      ret void, !dbg !229485

    ; <label>:2201:                                   ; preds = %2192
      musttail call fastcc void %2197(i8* %2195), !dbg !226012
      ret void, !dbg !229485

Differential Revision: https://reviews.llvm.org/D79258

Added: 
    llvm/test/Transforms/PGOProfile/indirect_call_promotion_musttail.ll

Modified: 
    llvm/lib/Transforms/Utils/CallPromotionUtils.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
index ac61603e3f34..a7a0290ee475 100644
--- a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
+++ b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
@@ -255,6 +255,34 @@ static void createRetBitCast(CallBase &CB, Type *RetTy, CastInst **RetBitCast) {
 ///     %t2 = phi i32 [ %t0, %else_bb ], [ %t1, %then_bb ]
 ///     br %normal_dst
 ///
+/// An indirect musttail call is processed slightly 
diff erently in that:
+/// 1. No merge block needed for the orginal and the cloned callsite, since
+///    either one ends the flow. No phi node is needed either.
+/// 2. The return statement following the original call site is duplicated too
+///    and placed immediately after the cloned call site per the IR convention.
+///
+/// For example, the musttail call instruction below:
+///
+///   orig_bb:
+///     %t0 = musttail call i32 %ptr()
+///     ...
+///
+/// Is replaced by the following:
+///
+///   cond_bb:
+///     %cond = icmp eq i32 ()* %ptr, @func
+///     br i1 %cond, %then_bb, %orig_bb
+///
+///   then_bb:
+///     ; The clone of the original call instruction is placed in the "then"
+///     ; block. It is not yet promoted.
+///     %t1 = musttail call i32 %ptr()
+///     ret %t1
+///
+///   orig_bb:
+///     ; The original call instruction stays in its original block.
+///     %t0 = musttail call i32 %ptr()
+///     ret %t0
 static CallBase &versionCallSite(CallBase &CB, Value *Callee,
                                  MDNode *BranchWeights) {
 
@@ -268,6 +296,44 @@ static CallBase &versionCallSite(CallBase &CB, Value *Callee,
     Callee = Builder.CreateBitCast(Callee, CB.getCalledOperand()->getType());
   auto *Cond = Builder.CreateICmpEQ(CB.getCalledOperand(), Callee);
 
+  if (OrigInst->isMustTailCall()) {
+    // Create an if-then structure. The original instruction stays in its block,
+    // and a clone of the original instruction is placed in the "then" block.
+    Instruction *ThenTerm =
+        SplitBlockAndInsertIfThen(Cond, &CB, false, BranchWeights);
+    BasicBlock *ThenBlock = ThenTerm->getParent();
+    ThenBlock->setName("if.true.direct_targ");
+    CallBase *NewInst = cast<CallBase>(OrigInst->clone());
+    NewInst->insertBefore(ThenTerm);
+
+    // Place a clone of the optional bitcast after the new call site.
+    Value *NewRetVal = NewInst;
+    auto Next = OrigInst->getNextNode();
+    if (auto *BitCast = dyn_cast_or_null<BitCastInst>(Next)) {
+      assert(BitCast->getOperand(0) == OrigInst &&
+             "bitcast following musttail call must use the call");
+      auto NewBitCast = BitCast->clone();
+      NewBitCast->replaceUsesOfWith(OrigInst, NewInst);
+      NewBitCast->insertBefore(ThenTerm);
+      NewRetVal = NewBitCast;
+      Next = BitCast->getNextNode();
+    }
+
+    // Place a clone of the return instruction after the new call site.
+    ReturnInst *Ret = dyn_cast_or_null<ReturnInst>(Next);
+    assert(Ret && "musttail call must precede a ret with an optional bitcast");
+    auto NewRet = Ret->clone();
+    if (Ret->getReturnValue())
+      NewRet->replaceUsesOfWith(Ret->getReturnValue(), NewRetVal);
+    NewRet->insertBefore(ThenTerm);
+
+    // A return instructions is terminating, so we don't need the terminator
+    // instruction just created.
+    ThenTerm->eraseFromParent();
+
+    return *NewInst;
+  }
+
   // Create an if-then-else structure. The original instruction is moved into
   // the "else" block, and a clone of the original instruction is placed in the
   // "then" block.

diff  --git a/llvm/test/Transforms/PGOProfile/indirect_call_promotion_musttail.ll b/llvm/test/Transforms/PGOProfile/indirect_call_promotion_musttail.ll
new file mode 100644
index 000000000000..e79e533c4af2
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/indirect_call_promotion_musttail.ll
@@ -0,0 +1,68 @@
+; RUN: opt < %s -pgo-icall-prom -S | FileCheck %s --check-prefix=ICALL-PROM
+; RUN: opt < %s -passes=pgo-icall-prom -S | FileCheck %s --check-prefix=ICALL-PROM
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at foo = common global i32* ()* null, align 8
+
+declare i32* @func1()
+
+declare i32* @func2()
+
+declare i32* @func3()
+
+declare i32* @func4()
+
+define i32* @bar() {
+entry:
+  %tmp = load i32* ()*, i32* ()** @foo, align 8
+; ICALL-PROM:   [[CMP1:%[0-9]+]] = icmp eq i32* ()* %tmp, @func4
+; ICALL-PROM:   br i1 [[CMP1]], label %if.true.direct_targ, label %[[L1:[0-9]+]], !prof [[BRANCH_WEIGHT1:![0-9]+]]
+; ICALL-PROM: if.true.direct_targ:
+; ICALL-PROM:   [[DIRCALL_RET1:%[0-9]+]] = musttail call i32* @func4()
+; ICALL-PROM:   ret i32* [[DIRCALL_RET1]]
+; ICALL-PROM: [[L1]]:
+; ICALL-PROM:   [[CMP2:%[0-9]+]] = icmp eq i32* ()* %tmp, @func2
+; ICALL-PROM:   br i1 [[CMP2]], label %if.true.direct_targ1, label %[[L2:[0-9]+]], !prof [[BRANCH_WEIGHT2:![0-9]+]]
+; ICALL-PROM: if.true.direct_targ1:
+; ICALL-PROM:   [[DIRCALL_RET2:%[0-9]+]] = musttail call i32* @func2()
+; ICALL-PROM:   ret i32* [[DIRCALL_RET2]]
+; ICALL-PROM: [[L2]]:
+; ICALL-PROM:   [[CMP3:%[0-9]+]] = icmp eq i32* ()* %tmp, @func3
+; ICALL-PROM:   br i1 [[CMP3]], label %if.true.direct_targ2, label %[[L3:[0-9]+]], !prof [[BRANCH_WEIGHT3:![0-9]+]]
+; ICALL-PROM: if.true.direct_targ2:
+; ICALL-PROM:   [[DIRCALL_RET3:%[0-9]+]] = musttail call i32* @func3()
+; ICALL-PROM:   ret i32* [[DIRCALL_RET3]]
+; ICALL-PROM: [[L3]]:
+; ICALL-PROM:   %call = musttail call i32* %tmp()
+; ICALL-PROM:   ret i32* %call
+  %call = musttail call i32* %tmp(), !prof !1
+  ret i32* %call
+}
+
+define i64* @bar2() {
+entry:
+  %tmp = load i32* ()*, i32* ()** @foo, align 8
+; ICALL-PROM:   [[CMP1:%[0-9]+]] = icmp eq i32* ()* %tmp, @func4
+; ICALL-PROM:   br i1 [[CMP1]], label %if.true.direct_targ, label %[[L4:[0-9]+]], !prof [[BRANCH_WEIGHT4:![0-9]+]]
+; ICALL-PROM: if.true.direct_targ:
+; ICALL-PROM:   [[DIRCALL_RET1:%[0-9]+]] = musttail call i32* @func4()
+; ICALL-PROM:   [[DIRCALL_RET2:%[0-9]+]] = bitcast i32* [[DIRCALL_RET1]] to i64*
+; ICALL-PROM:   ret i64* [[DIRCALL_RET2]]
+; ICALL-PROM: [[L4]]:
+; ICALL-PROM:   %call = musttail call i32* %tmp()
+; ICALL-PROM:   %rv = bitcast i32* %call to i64*
+; ICALL-PROM:   ret i64* %rv
+  %call = musttail call i32* %tmp(), !prof !2
+  %rv = bitcast i32* %call to i64*
+  ret i64* %rv
+}
+
+!1 = !{!"VP", i32 0, i64 1600, i64 7651369219802541373, i64 1030, i64 -4377547752858689819, i64 410, i64 -6929281286627296573, i64 150, i64 -2545542355363006406, i64 10}
+!2 = !{!"VP", i32 0, i64 100, i64 7651369219802541373, i64 100}
+
+; ICALL-PROM: [[BRANCH_WEIGHT1]] = !{!"branch_weights", i32 1030, i32 570}
+; ICALL-PROM: [[BRANCH_WEIGHT2]] = !{!"branch_weights", i32 410, i32 160}
+; ICALL-PROM: [[BRANCH_WEIGHT3]] = !{!"branch_weights", i32 150, i32 10}
+; ICALL-PROM: [[BRANCH_WEIGHT4]] = !{!"branch_weights", i32 100, i32 0}


        


More information about the llvm-commits mailing list