[llvm] 825debe - [InlineCost] Fix infinite loop in indirect call evaluation

Wed Nov 27 22:39:24 PST 2019

Author: Ehud Katz
Date: 2019-11-28T08:27:50+02:00
New Revision: 825debe847d15a5670eff54745a6691145ddfae1

URL: https://github.com/llvm/llvm-project/commit/825debe847d15a5670eff54745a6691145ddfae1
DIFF: https://github.com/llvm/llvm-project/commit/825debe847d15a5670eff54745a6691145ddfae1.diff

LOG: [InlineCost] Fix infinite loop in indirect call evaluation

Currently every time we encounter an indirect call of a known function,
we try to evaluate the inline cost of that function. In case of a
recursion, that evaluation never stops.

The solution I propose is to evaluate only the indirect call of the
function, while any further indirect calls (of a known function) will be
treated just as direct function calls, which, actually, never tries to
evaluate the call.

Fixes PR35469.

Differential Revision: https://reviews.llvm.org/D69349

Added: 
    llvm/test/Transforms/Inline/inline-indirect-chain.ll

Modified: 
    llvm/lib/Analysis/InlineCost.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index 1ba03de69890..55ce940bc3a5 100644

--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -51,7 +51,7 @@ static cl::opt<int> InlineThreshold(
     cl::desc("Control the amount of inlining to perform (default = 225)"));
 
 static cl::opt<int> HintThreshold(
-    "inlinehint-threshold", cl::Hidden, cl::init(325), cl::ZeroOrMore, 
+    "inlinehint-threshold", cl::Hidden, cl::init(325), cl::ZeroOrMore,
     cl::desc("Threshold for inlining functions with inline hint"));
 
 static cl::opt<int>
@@ -63,7 +63,7 @@ static cl::opt<int>
 // PGO before we actually hook up inliner with analysis passes such as BPI and
 // BFI.
 static cl::opt<int> ColdThreshold(
-    "inlinecold-threshold", cl::Hidden, cl::init(45), cl::ZeroOrMore, 
+    "inlinecold-threshold", cl::Hidden, cl::init(45), cl::ZeroOrMore,
     cl::desc("Threshold for inlining functions with cold attribute"));
 
 static cl::opt<int>
@@ -149,6 +149,9 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
   bool HasUninlineableIntrinsic = false;
   bool InitsVargArgs = false;
 
+  /// Attempt to evaluate indirect calls to boost its inline cost.
+  bool BoostIndirectCalls;
+
   /// Number of bytes allocated statically by the callee.
   uint64_t AllocatedSize = 0;
   unsigned NumInstructions = 0;
@@ -295,13 +298,14 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
                std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
                Optional<function_ref<BlockFrequencyInfo &(Function &)>> &GetBFI,
                ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE,
-               Function &Callee, CallBase &Call, const InlineParams &Params)
+               Function &Callee, CallBase &Call, const InlineParams &Params,
+               bool BoostIndirect = true)
       : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
         PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE),
         CandidateCall(Call), Params(Params), Threshold(Params.DefaultThreshold),
         ComputeFullInlineCost(OptComputeFullInlineCost ||
                               Params.ComputeFullInlineCost || ORE),
-        EnableLoadElimination(true) {}
+        BoostIndirectCalls(BoostIndirect), EnableLoadElimination(true) {}
 
   InlineResult analyzeCall(CallBase &Call);
 
@@ -423,9 +427,9 @@ bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) {
   Operands.push_back(GEP.getOperand(0));
   for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
     if (Constant *SimpleOp = SimplifiedValues.lookup(*I))
-       Operands.push_back(SimpleOp);
-     else
-       Operands.push_back(*I);
+      Operands.push_back(SimpleOp);
+    else
+      Operands.push_back(*I);
   return TargetTransformInfo::TCC_Free == TTI.getUserCost(&GEP, Operands);
 }
 
@@ -1239,97 +1243,93 @@ bool CallAnalyzer::visitCallBase(CallBase &Call) {
   if (isa<CallInst>(Call) && cast<CallInst>(Call).cannotDuplicate())
     ContainsNoDuplicateCall = true;
 
-  if (Function *F = Call.getCalledFunction()) {
-    // When we have a concrete function, first try to simplify it directly.
-    if (simplifyCallSite(F, Call))
-      return true;
-
-    // Next check if it is an intrinsic we know about.
-    // FIXME: Lift this into part of the InstVisitor.
-    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&Call)) {
-      switch (II->getIntrinsicID()) {
-      default:
-        if (!Call.onlyReadsMemory() && !isAssumeLikeIntrinsic(II))
-          disableLoadElimination();
-        return Base::visitCallBase(Call);
-
-      case Intrinsic::load_relative:
-        // This is normally lowered to 4 LLVM instructions.
-        addCost(3 * InlineConstants::InstrCost);
-        return false;
+  Value *Callee = Call.getCalledOperand();
+  Function *F = dyn_cast_or_null<Function>(Callee);
+  bool IsIndirectCall = !F;
+  if (IsIndirectCall) {
+    // Check if this happens to be an indirect function call to a known function
+    // in this inline context. If not, we've done all we can.
+    F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee));
+    if (!F) {
+      // Pay the price of the argument setup. We account for the average 1
+      // instruction per call argument setup here.
+      addCost(Call.arg_size() * InlineConstants::InstrCost);
 
-      case Intrinsic::memset:
-      case Intrinsic::memcpy:
-      case Intrinsic::memmove:
+      if (!Call.onlyReadsMemory())
         disableLoadElimination();
-        // SROA can usually chew through these intrinsics, but they aren't free.
-        return false;
-      case Intrinsic::icall_branch_funnel:
-      case Intrinsic::localescape:
-        HasUninlineableIntrinsic = true;
-        return false;
-      case Intrinsic::vastart:
-        InitsVargArgs = true;
-        return false;
-      }
+      return Base::visitCallBase(Call);
     }
+  }
 
-    if (F == Call.getFunction()) {
-      // This flag will fully abort the analysis, so don't bother with anything
-      // else.
-      IsRecursiveCall = true;
-      return false;
-    }
+  assert(F && "Expected a call to a known function");
 
-    if (TTI.isLoweredToCall(F)) {
-      // We account for the average 1 instruction per call argument setup
-      // here.
-      addCost(Call.arg_size() * InlineConstants::InstrCost);
+  // When we have a concrete function, first try to simplify it directly.
+  if (simplifyCallSite(F, Call))
+    return true;
 
-      // Everything other than inline ASM will also have a significant cost
-      // merely from making the call.
-      if (!isa<InlineAsm>(Call.getCalledValue()))
-        addCost(InlineConstants::CallPenalty);
-    }
+  // Next check if it is an intrinsic we know about.
+  // FIXME: Lift this into part of the InstVisitor.
+  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&Call)) {
+    switch (II->getIntrinsicID()) {
+    default:
+      if (!Call.onlyReadsMemory() && !isAssumeLikeIntrinsic(II))
+        disableLoadElimination();
+      return Base::visitCallBase(Call);
+
+    case Intrinsic::load_relative:
+      // This is normally lowered to 4 LLVM instructions.
+      addCost(3 * InlineConstants::InstrCost);
+      return false;
 
-    if (!Call.onlyReadsMemory())
+    case Intrinsic::memset:
+    case Intrinsic::memcpy:
+    case Intrinsic::memmove:
       disableLoadElimination();
-    return Base::visitCallBase(Call);
+      // SROA can usually chew through these intrinsics, but they aren't free.
+      return false;
+    case Intrinsic::icall_branch_funnel:
+    case Intrinsic::localescape:
+      HasUninlineableIntrinsic = true;
+      return false;
+    case Intrinsic::vastart:
+      InitsVargArgs = true;
+      return false;
+    }
   }
 
-  // Otherwise we're in a very special case -- an indirect function call. See
-  // if we can be particularly clever about this.
-  Value *Callee = Call.getCalledValue();
-
-  // First, pay the price of the argument setup. We account for the average
-  // 1 instruction per call argument setup here.
-  addCost(Call.arg_size() * InlineConstants::InstrCost);
-
-  // Next, check if this happens to be an indirect function call to a known
-  // function in this inline context. If not, we've done all we can.
-  Function *F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee));
-  if (!F) {
-    if (!Call.onlyReadsMemory())
-      disableLoadElimination();
-    return Base::visitCallBase(Call);
+  if (F == Call.getFunction()) {
+    // This flag will fully abort the analysis, so don't bother with anything
+    // else.
+    IsRecursiveCall = true;
+    return false;
   }
 
-  // If we have a constant that we are calling as a function, we can peer
-  // through it and see the function target. This happens not infrequently
-  // during devirtualization and so we want to give it a hefty bonus for
-  // inlining, but cap that bonus in the event that inlining wouldn't pan
-  // out. Pretend to inline the function, with a custom threshold.
-  auto IndirectCallParams = Params;
-  IndirectCallParams.DefaultThreshold = InlineConstants::IndirectCallThreshold;
-  CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, ORE, *F, Call,
-                  IndirectCallParams);
-  if (CA.analyzeCall(Call)) {
-    // We were able to inline the indirect call! Subtract the cost from the
-    // threshold to get the bonus we want to apply, but don't go below zero.
-    Cost -= std::max(0, CA.getThreshold() - CA.getCost());
+  if (TTI.isLoweredToCall(F)) {
+    // We account for the average 1 instruction per call argument setup here.
+    addCost(Call.arg_size() * InlineConstants::InstrCost);
+
+    // If we have a constant that we are calling as a function, we can peer
+    // through it and see the function target. This happens not infrequently
+    // during devirtualization and so we want to give it a hefty bonus for
+    // inlining, but cap that bonus in the event that inlining wouldn't pan out.
+    // Pretend to inline the function, with a custom threshold.
+    if (IsIndirectCall && BoostIndirectCalls) {
+      auto IndirectCallParams = Params;
+      IndirectCallParams.DefaultThreshold =
+          InlineConstants::IndirectCallThreshold;
+      CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, ORE, *F, Call,
+                      IndirectCallParams, false);
+      if (CA.analyzeCall(Call)) {
+        // We were able to inline the indirect call! Subtract the cost from the
+        // threshold to get the bonus we want to apply, but don't go below zero.
+        Cost -= std::max(0, CA.getThreshold() - CA.getCost());
+      }
+    } else
+      // Otherwise simply add the cost for merely making the call.
+      addCost(InlineConstants::CallPenalty);
   }
 
-  if (!F->onlyReadsMemory())
+  if (!(Call.onlyReadsMemory() || (IsIndirectCall && F->onlyReadsMemory())))
     disableLoadElimination();
   return Base::visitCallBase(Call);
 }
@@ -1494,7 +1494,7 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
 
   int64_t ExpectedNumberOfCompare = 3 * (int64_t)NumCaseCluster / 2 - 1;
   int64_t SwitchCost =
-      ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost;
+    ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost;
 
   addCost(SwitchCost, (int64_t)CostUpperBound);
   return false;

diff  --git a/llvm/test/Transforms/Inline/inline-indirect-chain.ll b/llvm/test/Transforms/Inline/inline-indirect-chain.ll
new file mode 100644
index 000000000000..bf73ad35dade
--- /dev/null
+++ b/llvm/test/Transforms/Inline/inline-indirect-chain.ll
@@ -0,0 +1,55 @@
+; RUN: opt -inline -early-cse < %s
+; This test used to crash (PR35469).
+
+define void @func1() {
+  %t = bitcast void ()* @func2 to void ()*
+  tail call void %t()
+  ret void
+}
+
+define void @func2() {
+  %t = bitcast void ()* @func3 to void ()*
+  tail call void %t()
+  ret void
+}
+
+define void @func3() {
+  %t = bitcast void ()* @func4 to void ()*
+  tail call void %t()
+  ret void
+}
+
+define void @func4() {
+  br i1 undef, label %left, label %right
+
+left:
+  %t = bitcast void ()* @func5 to void ()*
+  tail call void %t()
+  ret void
+
+right:
+  ret void
+}
+
+define void @func5() {
+  %t = bitcast void ()* @func6 to void ()*
+  tail call void %t()
+  ret void
+}
+
+define void @func6() {
+  %t = bitcast void ()* @func2 to void ()*
+  tail call void %t()
+  ret void
+}
+
+define void @func7() {
+  %t = bitcast void ()* @func3 to void ()*
+  tail call void @func8(void()* %t)
+  ret void
+}
+
+define void @func8(void()* %f) {
+  tail call void %f()
+  ret void
+}