[llvm] f46dd19 - [mlgo] Incrementally update FunctionPropertiesInfo during inlining

Tue May 31 17:27:40 PDT 2022

Author: Mircea Trofin
Date: 2022-05-31T17:27:32-07:00
New Revision: f46dd19b480496d2ba0a57d12935882e530f2b93

URL: https://github.com/llvm/llvm-project/commit/f46dd19b480496d2ba0a57d12935882e530f2b93
DIFF: https://github.com/llvm/llvm-project/commit/f46dd19b480496d2ba0a57d12935882e530f2b93.diff

LOG: [mlgo] Incrementally update FunctionPropertiesInfo during inlining

Re-computing FunctionPropertiesInfo after each inlining may be very time
consuming: in certain cases, e.g. large caller with lots of callsites,
and when the overall IR doesn't increase (thus not tripping a size bloat
threshold).

This patch addresses this by incrementally updating
FunctionPropertiesInfo.

Differential Revision: https://reviews.llvm.org/D125841

Added: 
    

Modified: 
    llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h
    llvm/include/llvm/Analysis/MLInlineAdvisor.h
    llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp
    llvm/lib/Analysis/MLInlineAdvisor.cpp
    llvm/unittests/Analysis/FunctionPropertiesAnalysisTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h b/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h
index b1b37ad00e93..1eb02de59b71 100644

--- a/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h
+++ b/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h
@@ -14,6 +14,9 @@
 #ifndef LLVM_ANALYSIS_FUNCTIONPROPERTIESANALYSIS_H
 #define LLVM_ANALYSIS_FUNCTIONPROPERTIESANALYSIS_H
 
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/PassManager.h"
 
 namespace llvm {
@@ -21,10 +24,23 @@ class Function;
 class LoopInfo;
 
 class FunctionPropertiesInfo {
+  friend class FunctionPropertiesUpdater;
+  void updateForBB(const BasicBlock &BB, int64_t Direction);
+  void updateAggregateStats(const Function &F, const LoopInfo &LI);
+  void reIncludeBB(const BasicBlock &BB, const LoopInfo &LI);
+
 public:
   static FunctionPropertiesInfo getFunctionPropertiesInfo(const Function &F,
                                                           const LoopInfo &LI);
 
+  bool operator==(const FunctionPropertiesInfo &FPI) const {
+    return std::memcmp(this, &FPI, sizeof(FunctionPropertiesInfo)) == 0;
+  }
+
+  bool operator!=(const FunctionPropertiesInfo &FPI) const {
+    return !(*this == FPI);
+  }
+
   void print(raw_ostream &OS) const;
 
   /// Number of basic blocks
@@ -57,6 +73,9 @@ class FunctionPropertiesInfo {
 
   // Number of Top Level Loops in the Function
   int64_t TopLevelLoopCount = 0;
+
+  // All non-debug instructions
+  int64_t TotalInstructionCount = 0;
 };
 
 // Analysis pass
@@ -82,5 +101,24 @@ class FunctionPropertiesPrinterPass
   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 };
 
+/// Correctly update FunctionPropertiesInfo post-inlining. A
+/// FunctionPropertiesUpdater keeps the state necessary for tracking the changes
+/// llvm::InlineFunction makes. The idea is that inlining will at most modify
+/// a few BBs of the Caller (maybe the entry BB and definitely the callsite BB)
+/// and potentially affect exception handling BBs in the case of invoke
+/// inlining.
+class FunctionPropertiesUpdater {
+public:
+  FunctionPropertiesUpdater(FunctionPropertiesInfo &FPI, const CallBase &CB);
+
+  void finish(const LoopInfo &LI);
+
+private:
+  FunctionPropertiesInfo &FPI;
+  const BasicBlock &CallSiteBB;
+  const Function &Caller;
+
+  DenseSet<const BasicBlock *> Successors;
+};
 } // namespace llvm
 #endif // LLVM_ANALYSIS_FUNCTIONPROPERTIESANALYSIS_H

diff  --git a/llvm/include/llvm/Analysis/MLInlineAdvisor.h b/llvm/include/llvm/Analysis/MLInlineAdvisor.h
index 1daa919c66e2..0b416ee57a94 100644
--- a/llvm/include/llvm/Analysis/MLInlineAdvisor.h
+++ b/llvm/include/llvm/Analysis/MLInlineAdvisor.h
@@ -9,6 +9,7 @@
 #ifndef LLVM_ANALYSIS_MLINLINEADVISOR_H
 #define LLVM_ANALYSIS_MLINLINEADVISOR_H
 
+#include "llvm/Analysis/FunctionPropertiesAnalysis.h"
 #include "llvm/Analysis/InlineAdvisor.h"
 #include "llvm/Analysis/LazyCallGraph.h"
 #include "llvm/Analysis/MLModelRunner.h"
@@ -33,13 +34,17 @@ class MLInlineAdvisor : public InlineAdvisor {
   void onPassEntry() override;
   void onPassExit(LazyCallGraph::SCC *SCC) override;
 
-  int64_t getIRSize(const Function &F) const { return F.getInstructionCount(); }
+  int64_t getIRSize(Function &F) const {
+    return getCachedFPI(F).TotalInstructionCount;
+  }
   void onSuccessfulInlining(const MLInlineAdvice &Advice,
                             bool CalleeWasDeleted);
 
   bool isForcedToStop() const { return ForceStop; }
   int64_t getLocalCalls(Function &F);
   const MLModelRunner &getModelRunner() const { return *ModelRunner.get(); }
+  FunctionPropertiesInfo &getCachedFPI(Function &) const;
+  const LoopInfo &getLoopInfo(Function &F) const;
 
 protected:
   std::unique_ptr<InlineAdvice> getAdviceImpl(CallBase &CB) override;
@@ -67,6 +72,8 @@ class MLInlineAdvisor : public InlineAdvisor {
        << "\n";
   }
 
+  mutable DenseMap<const Function *, FunctionPropertiesInfo> FPICache;
+
   LazyCallGraph &CG;
 
   int64_t NodeCount = 0;
@@ -86,16 +93,7 @@ class MLInlineAdvisor : public InlineAdvisor {
 class MLInlineAdvice : public InlineAdvice {
 public:
   MLInlineAdvice(MLInlineAdvisor *Advisor, CallBase &CB,
-                 OptimizationRemarkEmitter &ORE, bool Recommendation)
-      : InlineAdvice(Advisor, CB, ORE, Recommendation),
-        CallerIRSize(Advisor->isForcedToStop() ? 0
-                                               : Advisor->getIRSize(*Caller)),
-        CalleeIRSize(Advisor->isForcedToStop() ? 0
-                                               : Advisor->getIRSize(*Callee)),
-        CallerAndCalleeEdges(Advisor->isForcedToStop()
-                                 ? 0
-                                 : (Advisor->getLocalCalls(*Caller) +
-                                    Advisor->getLocalCalls(*Callee))) {}
+                 OptimizationRemarkEmitter &ORE, bool Recommendation);
   virtual ~MLInlineAdvice() = default;
 
   void recordInliningImpl() override;
@@ -112,10 +110,14 @@ class MLInlineAdvice : public InlineAdvice {
 
 private:
   void reportContextForRemark(DiagnosticInfoOptimizationBase &OR);
-
+  void updateCachedCallerFPI();
   MLInlineAdvisor *getAdvisor() const {
     return static_cast<MLInlineAdvisor *>(Advisor);
   };
+  // Make a copy of the FPI of the caller right before inlining. If inlining
+  // fails, we can just update the cache with that value.
+  const FunctionPropertiesInfo PreInlineCallerFPI;
+  Optional<FunctionPropertiesUpdater> FPU;
 };
 
 } // namespace llvm

diff  --git a/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp b/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp
index 1dc90422ca7b..66498f4914f0 100644
--- a/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp
+++ b/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp
@@ -12,49 +12,75 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/FunctionPropertiesAnalysis.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/CFG.h"
 #include "llvm/IR/Instructions.h"
+#include <deque>
 
 using namespace llvm;
 
-FunctionPropertiesInfo
-FunctionPropertiesInfo::getFunctionPropertiesInfo(const Function &F,
-                                                  const LoopInfo &LI) {
-
-  FunctionPropertiesInfo FPI;
+namespace {
+int64_t getNrBlocksFromCond(const BasicBlock &BB) {
+  int64_t Ret = 0;
+  if (const auto *BI = dyn_cast<BranchInst>(BB.getTerminator())) {
+    if (BI->isConditional())
+      Ret += BI->getNumSuccessors();
+  } else if (const auto *SI = dyn_cast<SwitchInst>(BB.getTerminator())) {
+    Ret += (SI->getNumCases() + (nullptr != SI->getDefaultDest()));
+  }
+  return Ret;
+}
 
-  FPI.Uses = ((!F.hasLocalLinkage()) ? 1 : 0) + F.getNumUses();
+int64_t getUses(const Function &F) {
+  return ((!F.hasLocalLinkage()) ? 1 : 0) + F.getNumUses();
+}
+} // namespace
 
-  for (const auto &BB : F) {
-    ++FPI.BasicBlockCount;
+void FunctionPropertiesInfo::reIncludeBB(const BasicBlock &BB,
+                                         const LoopInfo &LI) {
+  updateForBB(BB, +1);
+  MaxLoopDepth =
+      std::max(MaxLoopDepth, static_cast<int64_t>(LI.getLoopDepth(&BB)));
+}
 
-    if (const auto *BI = dyn_cast<BranchInst>(BB.getTerminator())) {
-      if (BI->isConditional())
-        FPI.BlocksReachedFromConditionalInstruction += BI->getNumSuccessors();
-    } else if (const auto *SI = dyn_cast<SwitchInst>(BB.getTerminator())) {
-      FPI.BlocksReachedFromConditionalInstruction +=
-          (SI->getNumCases() + (nullptr != SI->getDefaultDest()));
+void FunctionPropertiesInfo::updateForBB(const BasicBlock &BB,
+                                         int64_t Direction) {
+  assert(Direction == 1 || Direction == -1);
+  BasicBlockCount += Direction;
+  BlocksReachedFromConditionalInstruction +=
+      (Direction * getNrBlocksFromCond(BB));
+  for (const auto &I : BB) {
+    if (auto *CS = dyn_cast<CallBase>(&I)) {
+      const auto *Callee = CS->getCalledFunction();
+      if (Callee && !Callee->isIntrinsic() && !Callee->isDeclaration())
+        DirectCallsToDefinedFunctions += Direction;
     }
-
-    for (const auto &I : BB) {
-      if (auto *CS = dyn_cast<CallBase>(&I)) {
-        const auto *Callee = CS->getCalledFunction();
-        if (Callee && !Callee->isIntrinsic() && !Callee->isDeclaration())
-          ++FPI.DirectCallsToDefinedFunctions;
-      }
-      if (I.getOpcode() == Instruction::Load) {
-        ++FPI.LoadInstCount;
-      } else if (I.getOpcode() == Instruction::Store) {
-        ++FPI.StoreInstCount;
-      }
+    if (I.getOpcode() == Instruction::Load) {
+      LoadInstCount += Direction;
+    } else if (I.getOpcode() == Instruction::Store) {
+      StoreInstCount += Direction;
     }
-    // Loop Depth of the Basic Block
-    int64_t LoopDepth;
-    LoopDepth = LI.getLoopDepth(&BB);
-    if (FPI.MaxLoopDepth < LoopDepth)
-      FPI.MaxLoopDepth = LoopDepth;
   }
-  FPI.TopLevelLoopCount += llvm::size(LI);
+  TotalInstructionCount += Direction * BB.sizeWithoutDebug();
+}
+
+void FunctionPropertiesInfo::updateAggregateStats(const Function &F,
+                                                  const LoopInfo &LI) {
+
+  Uses = getUses(F);
+  TopLevelLoopCount = llvm::size(LI);
+}
+
+FunctionPropertiesInfo
+FunctionPropertiesInfo::getFunctionPropertiesInfo(const Function &F,
+                                                  const LoopInfo &LI) {
+
+  FunctionPropertiesInfo FPI;
+  for (const auto &BB : F)
+    if (!pred_empty(&BB) || BB.isEntryBlock())
+      FPI.reIncludeBB(BB, LI);
+  FPI.updateAggregateStats(F, LI);
   return FPI;
 }
 
@@ -68,7 +94,8 @@ void FunctionPropertiesInfo::print(raw_ostream &OS) const {
      << "LoadInstCount: " << LoadInstCount << "\n"
      << "StoreInstCount: " << StoreInstCount << "\n"
      << "MaxLoopDepth: " << MaxLoopDepth << "\n"
-     << "TopLevelLoopCount: " << TopLevelLoopCount << "\n\n";
+     << "TopLevelLoopCount: " << TopLevelLoopCount << "\n"
+     << "TotalInstructionCount: " << TotalInstructionCount << "\n\n";
 }
 
 AnalysisKey FunctionPropertiesAnalysis::Key;
@@ -87,3 +114,60 @@ FunctionPropertiesPrinterPass::run(Function &F, FunctionAnalysisManager &AM) {
   AM.getResult<FunctionPropertiesAnalysis>(F).print(OS);
   return PreservedAnalyses::all();
 }
+
+FunctionPropertiesUpdater::FunctionPropertiesUpdater(
+    FunctionPropertiesInfo &FPI, const CallBase &CB)
+    : FPI(FPI), CallSiteBB(*CB.getParent()), Caller(*CallSiteBB.getParent()) {
+
+  // For BBs that are likely to change, we subtract from feature totals their
+  // contribution. Some features, like max loop counts or depths, are left
+  // invalid, as they will be updated post-inlining.
+  SmallPtrSet<const BasicBlock *, 4> LikelyToChangeBBs;
+  // The CB BB will change - it'll either be split or the callee's body (single
+  // BB) will be pasted in.
+  LikelyToChangeBBs.insert(&CallSiteBB);
+
+  // The caller's entry BB may change due to new alloca instructions.
+  LikelyToChangeBBs.insert(&*Caller.begin());
+
+  // The successors may become unreachable in the case of `invoke` inlining.
+  // We track successors separately, too, because they form a boundary, together
+  // with the CB BB ('Entry') between which the inlined callee will be pasted.
+  Successors.insert(succ_begin(&CallSiteBB), succ_end(&CallSiteBB));
+  for (const auto *BB : Successors)
+    LikelyToChangeBBs.insert(BB);
+
+  // Commit the change. While some of the BBs accounted for above may play dual
+  // role - e.g. caller's entry BB may be the same as the callsite BB - set
+  // insertion semantics make sure we account them once. This needs to be
+  // followed in `finish`, too.
+  for (const auto *BB : LikelyToChangeBBs)
+    FPI.updateForBB(*BB, -1);
+}
+
+void FunctionPropertiesUpdater::finish(const LoopInfo &LI) {
+  DenseSet<const BasicBlock *> ReIncluded;
+  std::deque<const BasicBlock *> Worklist;
+
+  if (&CallSiteBB != &*Caller.begin()) {
+    FPI.reIncludeBB(*Caller.begin(), LI);
+    ReIncluded.insert(&*Caller.begin());
+  }
+
+  // Update feature values from the BBs that were copied from the callee, or
+  // might have been modified because of inlining. The latter have been
+  // subtracted in the FunctionPropertiesUpdater ctor.
+  Worklist.push_back(&CallSiteBB);
+  while (!Worklist.empty()) {
+    const auto *BB = Worklist.front();
+    Worklist.pop_front();
+    if (!ReIncluded.insert(BB).second)
+      continue;
+    FPI.reIncludeBB(*BB, LI);
+    if (!Successors.contains(BB))
+      for (const auto *Succ : successors(BB))
+        Worklist.push_back(Succ);
+  }
+  FPI.updateAggregateStats(Caller, LI);
+  assert(FPI == FunctionPropertiesInfo::getFunctionPropertiesInfo(Caller, LI));
+}

diff  --git a/llvm/lib/Analysis/MLInlineAdvisor.cpp b/llvm/lib/Analysis/MLInlineAdvisor.cpp
index cc454dd3687d..b3bfe5efb401 100644
--- a/llvm/lib/Analysis/MLInlineAdvisor.cpp
+++ b/llvm/lib/Analysis/MLInlineAdvisor.cpp
@@ -19,6 +19,7 @@
 #include "llvm/Analysis/InlineCost.h"
 #include "llvm/Analysis/InlineModelFeatureMaps.h"
 #include "llvm/Analysis/LazyCallGraph.h"
+#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/MLModelRunner.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
@@ -132,6 +133,7 @@ unsigned MLInlineAdvisor::getInitialFunctionLevel(const Function &F) const {
 }
 
 void MLInlineAdvisor::onPassEntry() {
+  FPICache.clear();
   // Function passes executed between InlinerPass runs may have changed the
   // module-wide features.
   // The cgscc pass manager rules are such that:
@@ -170,6 +172,8 @@ void MLInlineAdvisor::onPassEntry() {
 }
 
 void MLInlineAdvisor::onPassExit(LazyCallGraph::SCC *LastSCC) {
+  // No need to keep this around - function passes will invalidate it.
+  FPICache.clear();
   if (!LastSCC)
     return;
   // Keep track of the nodes and edges we last saw. Then, in onPassEntry,
@@ -187,8 +191,7 @@ void MLInlineAdvisor::onPassExit(LazyCallGraph::SCC *LastSCC) {
 }
 
 int64_t MLInlineAdvisor::getLocalCalls(Function &F) {
-  return FAM.getResult<FunctionPropertiesAnalysis>(F)
-      .DirectCallsToDefinedFunctions;
+  return getCachedFPI(F).DirectCallsToDefinedFunctions;
 }
 
 // Update the internal state of the advisor, and force invalidate feature
@@ -220,15 +223,13 @@ void MLInlineAdvisor::onSuccessfulInlining(const MLInlineAdvice &Advice,
   // For edges, we 'forget' the edges that the caller and callee used to have
   // before inlining, and add back what they currently have together.
   int64_t NewCallerAndCalleeEdges =
-      FAM.getResult<FunctionPropertiesAnalysis>(*Caller)
-          .DirectCallsToDefinedFunctions;
+      getCachedFPI(*Caller).DirectCallsToDefinedFunctions;
 
   if (CalleeWasDeleted)
     --NodeCount;
   else
     NewCallerAndCalleeEdges +=
-        FAM.getResult<FunctionPropertiesAnalysis>(*Callee)
-            .DirectCallsToDefinedFunctions;
+        getCachedFPI(*Callee).DirectCallsToDefinedFunctions;
   EdgeCount += (NewCallerAndCalleeEdges - Advice.CallerAndCalleeEdges);
   assert(CurrentIRSize >= 0 && EdgeCount >= 0 && NodeCount >= 0);
 }
@@ -241,6 +242,15 @@ int64_t MLInlineAdvisor::getModuleIRSize() const {
   return Ret;
 }
 
+FunctionPropertiesInfo &MLInlineAdvisor::getCachedFPI(Function &F) const {
+  auto InsertPair =
+      FPICache.insert(std::make_pair(&F, FunctionPropertiesInfo()));
+  if (!InsertPair.second)
+    return InsertPair.first->second;
+  InsertPair.first->second = FAM.getResult<FunctionPropertiesAnalysis>(F);
+  return InsertPair.first->second;
+}
+
 std::unique_ptr<InlineAdvice> MLInlineAdvisor::getAdviceImpl(CallBase &CB) {
   auto &Caller = *CB.getCaller();
   auto &Callee = *CB.getCalledFunction();
@@ -300,8 +310,8 @@ std::unique_ptr<InlineAdvice> MLInlineAdvisor::getAdviceImpl(CallBase &CB) {
     NrCtantParams += (isa<Constant>(*I));
   }
 
-  auto &CallerBefore = FAM.getResult<FunctionPropertiesAnalysis>(Caller);
-  auto &CalleeBefore = FAM.getResult<FunctionPropertiesAnalysis>(Callee);
+  auto &CallerBefore = getCachedFPI(Caller);
+  auto &CalleeBefore = getCachedFPI(Callee);
 
   *ModelRunner->getTensor<int64_t>(FeatureIndex::CalleeBasicBlockCount) =
       CalleeBefore.BasicBlockCount;
@@ -354,11 +364,30 @@ std::unique_ptr<InlineAdvice> MLInlineAdvisor::getMandatoryAdvice(CallBase &CB,
   return std::make_unique<InlineAdvice>(this, CB, getCallerORE(CB), Advice);
 }
 
+const LoopInfo &MLInlineAdvisor::getLoopInfo(Function &F) const {
+  return FAM.getResult<LoopAnalysis>(F);
+}
+
 std::unique_ptr<MLInlineAdvice>
 MLInlineAdvisor::getMandatoryAdviceImpl(CallBase &CB) {
   return std::make_unique<MLInlineAdvice>(this, CB, getCallerORE(CB), true);
 }
 
+MLInlineAdvice::MLInlineAdvice(MLInlineAdvisor *Advisor, CallBase &CB,
+                               OptimizationRemarkEmitter &ORE,
+                               bool Recommendation)
+    : InlineAdvice(Advisor, CB, ORE, Recommendation),
+      CallerIRSize(Advisor->isForcedToStop() ? 0 : Advisor->getIRSize(*Caller)),
+      CalleeIRSize(Advisor->isForcedToStop() ? 0 : Advisor->getIRSize(*Callee)),
+      CallerAndCalleeEdges(Advisor->isForcedToStop()
+                               ? 0
+                               : (Advisor->getLocalCalls(*Caller) +
+                                  Advisor->getLocalCalls(*Callee))),
+      PreInlineCallerFPI(Advisor->getCachedFPI(*Caller)) {
+  if (Recommendation)
+    FPU.emplace(Advisor->getCachedFPI(*getCaller()), CB);
+}
+
 void MLInlineAdvice::reportContextForRemark(
     DiagnosticInfoOptimizationBase &OR) {
   using namespace ore;
@@ -369,7 +398,12 @@ void MLInlineAdvice::reportContextForRemark(
   OR << NV("ShouldInline", isInliningRecommended());
 }
 
+void MLInlineAdvice::updateCachedCallerFPI() {
+  FPU->finish(getAdvisor()->getLoopInfo(*Caller));
+}
+
 void MLInlineAdvice::recordInliningImpl() {
+  updateCachedCallerFPI();
   ORE.emit([&]() {
     OptimizationRemark R(DEBUG_TYPE, "InliningSuccess", DLoc, Block);
     reportContextForRemark(R);
@@ -379,6 +413,7 @@ void MLInlineAdvice::recordInliningImpl() {
 }
 
 void MLInlineAdvice::recordInliningWithCalleeDeletedImpl() {
+  updateCachedCallerFPI();
   ORE.emit([&]() {
     OptimizationRemark R(DEBUG_TYPE, "InliningSuccessWithCalleeDeleted", DLoc,
                          Block);
@@ -390,6 +425,7 @@ void MLInlineAdvice::recordInliningWithCalleeDeletedImpl() {
 
 void MLInlineAdvice::recordUnsuccessfulInliningImpl(
     const InlineResult &Result) {
+  getAdvisor()->getCachedFPI(*Caller) = PreInlineCallerFPI;
   ORE.emit([&]() {
     OptimizationRemarkMissed R(DEBUG_TYPE, "InliningAttemptedAndUnsuccessful",
                                DLoc, Block);
@@ -398,6 +434,7 @@ void MLInlineAdvice::recordUnsuccessfulInliningImpl(
   });
 }
 void MLInlineAdvice::recordUnattemptedInliningImpl() {
+  assert(!FPU);
   ORE.emit([&]() {
     OptimizationRemarkMissed R(DEBUG_TYPE, "IniningNotAttempted", DLoc, Block);
     reportContextForRemark(R);

diff  --git a/llvm/unittests/Analysis/FunctionPropertiesAnalysisTest.cpp b/llvm/unittests/Analysis/FunctionPropertiesAnalysisTest.cpp
index 6ed7657f08f7..76a63c00a9a1 100644
--- a/llvm/unittests/Analysis/FunctionPropertiesAnalysisTest.cpp
+++ b/llvm/unittests/Analysis/FunctionPropertiesAnalysisTest.cpp
@@ -7,14 +7,20 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/FunctionPropertiesAnalysis.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/AsmParser/Parser.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Passes/PassBuilder.h"
 #include "llvm/Support/SourceMgr.h"
+#include "llvm/Transforms/Utils/Cloning.h"
 #include "gtest/gtest.h"
+#include <cstring>
 
 using namespace llvm;
 namespace {
@@ -37,6 +43,15 @@ class FunctionPropertiesAnalysisTest : public testing::Test {
       Err.print("MLAnalysisTests", errs());
     return Mod;
   }
+  
+  CallBase* findCall(Function& F, const char* Name = nullptr) {
+    for (auto &BB : F)
+      for (auto &I : BB )
+        if (auto *CB = dyn_cast<CallBase>(&I))
+          if (!Name || CB->getName() == Name)
+            return CB;
+    return nullptr;
+  }
 };
 
 TEST_F(FunctionPropertiesAnalysisTest, BasicTest) {
@@ -91,4 +106,441 @@ define internal i32 @top() {
   EXPECT_EQ(BranchesFeatures.MaxLoopDepth, 0);
   EXPECT_EQ(BranchesFeatures.TopLevelLoopCount, 0);
 }
+
+TEST_F(FunctionPropertiesAnalysisTest, InlineSameBBSimple) {
+  LLVMContext C;
+  std::unique_ptr<Module> M = makeLLVMModule(C,
+                                             R"IR(
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux-gnu"
+define i32 @f1(i32 %a) {
+  %b = call i32 @f2(i32 %a)
+  %c = add i32 %b, 2
+  ret i32 %c
+}
+
+define i32 @f2(i32 %a) {
+  %b = add i32 %a, 1
+  ret i32 %b
+}
+)IR");
+
+  Function *F1 = M->getFunction("f1");
+  CallBase* CB = findCall(*F1, "b");
+  EXPECT_NE(CB, nullptr);
+
+  FunctionPropertiesInfo ExpectedInitial;
+  ExpectedInitial.BasicBlockCount = 1;
+  ExpectedInitial.TotalInstructionCount = 3;
+  ExpectedInitial.Uses = 1;
+  ExpectedInitial.DirectCallsToDefinedFunctions = 1;
+
+  FunctionPropertiesInfo ExpectedFinal = ExpectedInitial;
+  ExpectedFinal.DirectCallsToDefinedFunctions = 0;
+
+  auto FPI = buildFPI(*F1);
+  EXPECT_EQ(FPI, ExpectedInitial);
+
+  FunctionPropertiesUpdater FPU(FPI, *CB);
+  InlineFunctionInfo IFI;
+  auto IR = llvm::InlineFunction(*CB, IFI);
+  EXPECT_TRUE(IR.isSuccess());
+  FPU.finish(*LI);
+  EXPECT_EQ(FPI, ExpectedFinal);
+}
+
+TEST_F(FunctionPropertiesAnalysisTest, InlineSameBBLargerCFG) {
+  LLVMContext C;
+  std::unique_ptr<Module> M = makeLLVMModule(C,
+                                             R"IR(
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux-gnu"
+define i32 @f1(i32 %a) {
+entry:
+  %i = icmp slt i32 %a, 0
+  br i1 %i, label %if.then, label %if.else
+if.then:
+  %b = call i32 @f2(i32 %a)
+  %c1 = add i32 %b, 2
+  br label %end
+if.else:
+  %c2 = add i32 %a, 1
+  br label %end
+end:
+  %ret = phi i32 [%c1, %if.then],[%c2, %if.else]
+  ret i32 %ret
+}
+
+define i32 @f2(i32 %a) {
+  %b = add i32 %a, 1
+  ret i32 %b
+}
+)IR");
+
+  Function *F1 = M->getFunction("f1");
+  CallBase* CB = findCall(*F1, "b");
+  EXPECT_NE(CB, nullptr);
+
+  FunctionPropertiesInfo ExpectedInitial;
+  ExpectedInitial.BasicBlockCount = 4;
+  ExpectedInitial.BlocksReachedFromConditionalInstruction = 2;
+  ExpectedInitial.TotalInstructionCount = 9;
+  ExpectedInitial.Uses = 1;
+  ExpectedInitial.DirectCallsToDefinedFunctions = 1;
+
+  FunctionPropertiesInfo ExpectedFinal = ExpectedInitial;
+  ExpectedFinal.DirectCallsToDefinedFunctions = 0;
+
+  auto FPI = buildFPI(*F1);
+  EXPECT_EQ(FPI, ExpectedInitial);
+
+  FunctionPropertiesUpdater FPU(FPI, *CB);
+  InlineFunctionInfo IFI;
+  auto IR = llvm::InlineFunction(*CB, IFI);
+  EXPECT_TRUE(IR.isSuccess());
+  FPU.finish(*LI);
+  EXPECT_EQ(FPI, ExpectedFinal);
+}
+
+TEST_F(FunctionPropertiesAnalysisTest, InlineSameBBLoops) {
+  LLVMContext C;
+  std::unique_ptr<Module> M = makeLLVMModule(C,
+                                             R"IR(
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux-gnu"
+define i32 @f1(i32 %a) {
+entry:
+  %i = icmp slt i32 %a, 0
+  br i1 %i, label %if.then, label %if.else
+if.then:
+  %b = call i32 @f2(i32 %a)
+  %c1 = add i32 %b, 2
+  br label %end
+if.else:
+  %c2 = add i32 %a, 1
+  br label %end
+end:
+  %ret = phi i32 [%c1, %if.then],[%c2, %if.else]
+  ret i32 %ret
+}
+
+define i32 @f2(i32 %a) {
+entry:
+  br label %loop
+loop:
+  %indvar = phi i32 [%indvar.next, %loop], [0, %entry]
+  %b = add i32 %a, %indvar
+  %indvar.next = add i32 %indvar, 1
+  %cond = icmp slt i32 %indvar.next, %a
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret i32 %b
+}
+)IR");
+
+  Function *F1 = M->getFunction("f1");
+  CallBase* CB = findCall(*F1, "b");
+  EXPECT_NE(CB, nullptr);
+
+  FunctionPropertiesInfo ExpectedInitial;
+  ExpectedInitial.BasicBlockCount = 4;
+  ExpectedInitial.BlocksReachedFromConditionalInstruction = 2;
+  ExpectedInitial.TotalInstructionCount = 9;
+  ExpectedInitial.Uses = 1;
+  ExpectedInitial.DirectCallsToDefinedFunctions = 1;
+
+  FunctionPropertiesInfo ExpectedFinal;
+  ExpectedFinal.BasicBlockCount = 6;
+  ExpectedFinal.BlocksReachedFromConditionalInstruction = 4;
+  ExpectedFinal.Uses = 1;
+  ExpectedFinal.MaxLoopDepth = 1;
+  ExpectedFinal.TopLevelLoopCount = 1;
+  ExpectedFinal.TotalInstructionCount = 14;
+
+  auto FPI = buildFPI(*F1);
+  EXPECT_EQ(FPI, ExpectedInitial);
+  FunctionPropertiesUpdater FPU(FPI, *CB);
+  InlineFunctionInfo IFI;
+
+  auto IR = llvm::InlineFunction(*CB, IFI);
+  EXPECT_TRUE(IR.isSuccess());
+  DominatorTree DTNew(*F1);
+  LoopInfo LINew(DTNew);
+  FPU.finish(LINew);
+  EXPECT_EQ(FPI, ExpectedFinal);
+}
+
+TEST_F(FunctionPropertiesAnalysisTest, InvokeSimple) {
+  LLVMContext C;
+  std::unique_ptr<Module> M = makeLLVMModule(C,
+                                             R"IR(
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux-gnu"
+declare void @might_throw()
+
+define internal void @callee() {
+entry:
+  call void @might_throw()
+  ret void
+}
+
+define i32 @caller() personality i32 (...)* @__gxx_personality_v0 {
+entry:
+  invoke void @callee()
+      to label %cont unwind label %exc
+
+cont:
+  ret i32 0
+
+exc:
+  %exn = landingpad {i8*, i32}
+         cleanup
+  ret i32 1
+}
+
+declare i32 @__gxx_personality_v0(...)
+)IR");
+
+  Function *F1 = M->getFunction("caller");
+  CallBase* CB = findCall(*F1);
+  EXPECT_NE(CB, nullptr);
+
+  auto FPI = buildFPI(*F1);
+  FunctionPropertiesUpdater FPU(FPI, *CB);
+  InlineFunctionInfo IFI;
+  auto IR = llvm::InlineFunction(*CB, IFI);
+  EXPECT_TRUE(IR.isSuccess());
+  DominatorTree DTNew(*F1);
+  LoopInfo LINew(DTNew);
+  FPU.finish(LINew);
+  EXPECT_EQ(static_cast<size_t>(FPI.BasicBlockCount),
+            F1->getBasicBlockList().size());
+  EXPECT_EQ(static_cast<size_t>(FPI.TotalInstructionCount),
+            F1->getInstructionCount());
+}
+
+TEST_F(FunctionPropertiesAnalysisTest, InvokeUnreachableHandler) {
+  LLVMContext C;
+  std::unique_ptr<Module> M = makeLLVMModule(C,
+                                             R"IR(
+declare void @might_throw()
+
+define internal i32 @callee() personality i32 (...)* @__gxx_personality_v0 {
+entry:
+  invoke void @might_throw()
+      to label %cont unwind label %exc
+
+cont:
+  ret i32 0
+
+exc:
+  %exn = landingpad {i8*, i32}
+         cleanup
+  resume { i8*, i32 } %exn
+}
+
+define i32 @caller() personality i32 (...)* @__gxx_personality_v0 {
+entry:
+  %X = invoke i32 @callee()
+           to label %cont unwind label %Handler
+
+cont:
+  ret i32 %X
+
+Handler:
+  %exn = landingpad {i8*, i32}
+         cleanup
+  ret i32 1
+}
+
+declare i32 @__gxx_personality_v0(...)
+)IR");
+
+  Function *F1 = M->getFunction("caller");
+  CallBase* CB = findCall(*F1);
+  EXPECT_NE(CB, nullptr);
+
+  auto FPI = buildFPI(*F1);
+  FunctionPropertiesUpdater FPU(FPI, *CB);
+  InlineFunctionInfo IFI;
+  auto IR = llvm::InlineFunction(*CB, IFI);
+  EXPECT_TRUE(IR.isSuccess());
+  DominatorTree DTNew(*F1);
+  LoopInfo LINew(DTNew);
+  FPU.finish(LINew);
+  EXPECT_EQ(static_cast<size_t>(FPI.BasicBlockCount),
+            F1->getBasicBlockList().size() - 1);
+  EXPECT_EQ(static_cast<size_t>(FPI.TotalInstructionCount),
+            F1->getInstructionCount() - 2);
+  EXPECT_EQ(FPI, FunctionPropertiesInfo::getFunctionPropertiesInfo(*F1, LINew));
+}
+
+TEST_F(FunctionPropertiesAnalysisTest, Rethrow) {
+  LLVMContext C;
+  std::unique_ptr<Module> M = makeLLVMModule(C,
+                                             R"IR(
+declare void @might_throw()
+
+define internal i32 @callee() personality i32 (...)* @__gxx_personality_v0 {
+entry:
+  invoke void @might_throw()
+      to label %cont unwind label %exc
+
+cont:
+  ret i32 0
+
+exc:
+  %exn = landingpad {i8*, i32}
+         cleanup
+  resume { i8*, i32 } %exn
+}
+
+define i32 @caller() personality i32 (...)* @__gxx_personality_v0 {
+entry:
+  %X = invoke i32 @callee()
+           to label %cont unwind label %Handler
+
+cont:
+  ret i32 %X
+
+Handler:
+  %exn = landingpad {i8*, i32}
+         cleanup
+  ret i32 1
+}
+
+declare i32 @__gxx_personality_v0(...)
+)IR");
+
+  Function *F1 = M->getFunction("caller");
+  CallBase* CB = findCall(*F1);
+  EXPECT_NE(CB, nullptr);
+
+  auto FPI = buildFPI(*F1);
+  FunctionPropertiesUpdater FPU(FPI, *CB);
+  InlineFunctionInfo IFI;
+  auto IR = llvm::InlineFunction(*CB, IFI);
+  EXPECT_TRUE(IR.isSuccess());
+  DominatorTree DTNew(*F1);
+  LoopInfo LINew(DTNew);
+  FPU.finish(LINew);
+  EXPECT_EQ(static_cast<size_t>(FPI.BasicBlockCount),
+            F1->getBasicBlockList().size() - 1);
+  EXPECT_EQ(static_cast<size_t>(FPI.TotalInstructionCount),
+            F1->getInstructionCount() - 2);
+  EXPECT_EQ(FPI, FunctionPropertiesInfo::getFunctionPropertiesInfo(*F1, LINew));
+}
+
+TEST_F(FunctionPropertiesAnalysisTest, LPadChanges) {
+  LLVMContext C;
+  std::unique_ptr<Module> M = makeLLVMModule(C,
+                                             R"IR(
+declare void @external_func()
+
+ at exception_type1 = external global i8
+ at exception_type2 = external global i8
+
+
+define internal void @inner() personality i8* null {
+  invoke void @external_func()
+      to label %cont unwind label %lpad
+cont:
+  ret void
+lpad:
+  %lp = landingpad i32
+      catch i8* @exception_type1
+  resume i32 %lp
+}
+
+define void @outer() personality i8* null {
+  invoke void @inner()
+      to label %cont unwind label %lpad
+cont:
+  ret void
+lpad:
+  %lp = landingpad i32
+      cleanup
+      catch i8* @exception_type2
+  resume i32 %lp
+}
+
+)IR");
+
+  Function *F1 = M->getFunction("outer");
+  CallBase* CB = findCall(*F1);
+  EXPECT_NE(CB, nullptr);
+
+  auto FPI = buildFPI(*F1);
+  FunctionPropertiesUpdater FPU(FPI, *CB);
+  InlineFunctionInfo IFI;
+  auto IR = llvm::InlineFunction(*CB, IFI);
+  EXPECT_TRUE(IR.isSuccess());
+  DominatorTree DTNew(*F1);
+  LoopInfo LINew(DTNew);
+  FPU.finish(LINew);
+  EXPECT_EQ(static_cast<size_t>(FPI.BasicBlockCount),
+            F1->getBasicBlockList().size() - 1);
+  EXPECT_EQ(static_cast<size_t>(FPI.TotalInstructionCount),
+            F1->getInstructionCount() - 2);
+  EXPECT_EQ(FPI, FunctionPropertiesInfo::getFunctionPropertiesInfo(*F1, LINew));
+}
+
+TEST_F(FunctionPropertiesAnalysisTest, LPadChangesConditional) {
+  LLVMContext C;
+  std::unique_ptr<Module> M = makeLLVMModule(C,
+                                             R"IR(
+declare void @external_func()
+
+ at exception_type1 = external global i8
+ at exception_type2 = external global i8
+
+
+define internal void @inner() personality i8* null {
+  invoke void @external_func()
+      to label %cont unwind label %lpad
+cont:
+  ret void
+lpad:
+  %lp = landingpad i32
+      catch i8* @exception_type1
+  resume i32 %lp
+}
+
+define void @outer(i32 %a) personality i8* null {
+entry:
+  %i = icmp slt i32 %a, 0
+  br i1 %i, label %if.then, label %cont
+if.then:
+  invoke void @inner()
+      to label %cont unwind label %lpad
+cont:
+  ret void
+lpad:
+  %lp = landingpad i32
+      cleanup
+      catch i8* @exception_type2
+  resume i32 %lp
+}
+
+)IR");
+
+  Function *F1 = M->getFunction("outer");
+  CallBase* CB = findCall(*F1);
+  EXPECT_NE(CB, nullptr);
+
+  auto FPI = buildFPI(*F1);
+  FunctionPropertiesUpdater FPU(FPI, *CB);
+  InlineFunctionInfo IFI;
+  auto IR = llvm::InlineFunction(*CB, IFI);
+  EXPECT_TRUE(IR.isSuccess());
+  DominatorTree DTNew(*F1);
+  LoopInfo LINew(DTNew);
+  FPU.finish(LINew);
+  EXPECT_EQ(static_cast<size_t>(FPI.BasicBlockCount),
+            F1->getBasicBlockList().size() - 1);
+  EXPECT_EQ(static_cast<size_t>(FPI.TotalInstructionCount),
+            F1->getInstructionCount() - 2);
+  EXPECT_EQ(FPI, FunctionPropertiesInfo::getFunctionPropertiesInfo(*F1, LINew));
+}
+
 } // end anonymous namespace