[llvm] r348639 - [HotColdSplitting] Outline more than once per function

Vedant Kumar via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 7 12:23:52 PST 2018


Author: vedantk
Date: Fri Dec  7 12:23:52 2018
New Revision: 348639

URL: http://llvm.org/viewvc/llvm-project?rev=348639&view=rev
Log:
[HotColdSplitting] Outline more than once per function

Algorithm: Identify maximal cold regions and put them in a worklist. If
a candidate region overlaps with another, discard it. While the worklist
is full, remove a single-entry sub-region from the worklist and attempt
to outline it. By the non-overlap property, this should not invalidate
parts of the domtree pertaining to other outlining regions.

Testing: LNT results on X86 are clean. With test-suite + externals, llvm
outlines 134KB pre-patch, and 352KB post-patch (+ ~2.6x). The file
483.xalancbmk/src/Constants.cpp stands out as an extreme case where llvm
outlines over 100 times in some functions (mostly EH paths). There was
not a significant performance impact pre vs. post-patch.

Differential Revision: https://reviews.llvm.org/D53887

Added:
    llvm/trunk/test/Transforms/HotColdSplit/eh-pads.ll
    llvm/trunk/test/Transforms/HotColdSplit/extraction-subregion-breaks-phis.ll
    llvm/trunk/test/Transforms/HotColdSplit/forward-dfs-reaches-marked-block.ll
    llvm/trunk/test/Transforms/HotColdSplit/mark-the-whole-func-cold.ll
    llvm/trunk/test/Transforms/HotColdSplit/outline-disjoint-diamonds.ll
    llvm/trunk/test/Transforms/HotColdSplit/outline-multiple-entry-region.ll
    llvm/trunk/test/Transforms/HotColdSplit/phi-with-distinct-outlined-values.ll
    llvm/trunk/test/Transforms/HotColdSplit/region-overlap.ll
    llvm/trunk/test/Transforms/HotColdSplit/succ-block-with-self-edge.ll
Modified:
    llvm/trunk/lib/Transforms/IPO/HotColdSplitting.cpp
    llvm/trunk/test/Transforms/HotColdSplit/outline-while-loop.ll

Modified: llvm/trunk/lib/Transforms/IPO/HotColdSplitting.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/HotColdSplitting.cpp?rev=348639&r1=348638&r2=348639&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/IPO/HotColdSplitting.cpp (original)
+++ llvm/trunk/lib/Transforms/IPO/HotColdSplitting.cpp Fri Dec  7 12:23:52 2018
@@ -13,6 +13,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
@@ -135,11 +136,14 @@ static bool mayExtractBlock(const BasicB
   return !BB.hasAddressTaken();
 }
 
-/// Check whether \p BB is profitable to outline (i.e. its code size cost meets
-/// the threshold set in \p MinOutliningThreshold).
-static bool isProfitableToOutline(const BasicBlock &BB,
+/// Check whether \p Region is profitable to outline.
+static bool isProfitableToOutline(const BlockSequence &Region,
                                   TargetTransformInfo &TTI) {
+  if (Region.size() > 1)
+    return true;
+
   int Cost = 0;
+  const BasicBlock &BB = *Region[0];
   for (const Instruction &I : BB) {
     if (isa<DbgInfoIntrinsic>(&I) || &I == BB.getTerminator())
       continue;
@@ -152,151 +156,16 @@ static bool isProfitableToOutline(const
   return false;
 }
 
-/// Identify the maximal region of cold blocks which includes \p SinkBB.
-///
-/// Include all blocks post-dominated by \p SinkBB, \p SinkBB itself, and all
-/// blocks dominated by \p SinkBB. Exclude all other blocks, and blocks which
-/// cannot be outlined.
-///
-/// Return an empty sequence if the cold region is too small to outline, or if
-/// the cold region has no warm predecessors.
-static BlockSequence findMaximalColdRegion(BasicBlock &SinkBB,
-                                           TargetTransformInfo &TTI,
-                                           DominatorTree &DT,
-                                           PostDomTree &PDT) {
-  // The maximal cold region.
-  BlockSequence ColdRegion = {};
-
-  // The ancestor farthest-away from SinkBB, and also post-dominated by it.
-  BasicBlock *MaxAncestor = &SinkBB;
-  unsigned MaxAncestorHeight = 0;
-
-  // Visit SinkBB's ancestors using inverse DFS.
-  auto PredIt = ++idf_begin(&SinkBB);
-  auto PredEnd = idf_end(&SinkBB);
-  while (PredIt != PredEnd) {
-    BasicBlock &PredBB = **PredIt;
-    bool SinkPostDom = PDT.dominates(&SinkBB, &PredBB);
-
-    // If SinkBB does not post-dominate a predecessor, do not mark the
-    // predecessor (or any of its predecessors) cold.
-    if (!SinkPostDom || !mayExtractBlock(PredBB)) {
-      PredIt.skipChildren();
-      continue;
-    }
-
-    // Keep track of the post-dominated ancestor farthest away from the sink.
-    unsigned AncestorHeight = PredIt.getPathLength();
-    if (AncestorHeight > MaxAncestorHeight) {
-      MaxAncestor = &PredBB;
-      MaxAncestorHeight = AncestorHeight;
-    }
-
-    ColdRegion.push_back(&PredBB);
-    ++PredIt;
-  }
-
-  // CodeExtractor requires that all blocks to be extracted must be dominated
-  // by the first block to be extracted.
-  //
-  // To avoid spurious or repeated outlining, require that the max ancestor
-  // has a predecessor. By construction this predecessor is not in the cold
-  // region, i.e. its existence implies we don't outline the whole function.
-  //
-  // TODO: If MaxAncestor has no predecessors, we may be able to outline the
-  // second largest cold region that has a predecessor.
-  if (pred_empty(MaxAncestor) ||
-      MaxAncestor->getSinglePredecessor() == MaxAncestor)
-    return {};
-
-  // Filter out predecessors not dominated by the max ancestor.
-  //
-  // TODO: Blocks not dominated by the max ancestor could be extracted as
-  // other cold regions. Marking outlined calls as noreturn when appropriate
-  // and outlining more than once per function could achieve most of the win.
-  auto EraseIt = remove_if(ColdRegion, [&](BasicBlock *PredBB) {
-    return PredBB != MaxAncestor && !DT.dominates(MaxAncestor, PredBB);
-  });
-  ColdRegion.erase(EraseIt, ColdRegion.end());
-
-  // Add SinkBB to the cold region.
-  ColdRegion.push_back(&SinkBB);
-
-  // Ensure that the first extracted block is the max ancestor.
-  if (ColdRegion[0] != MaxAncestor) {
-    auto AncestorIt = find(ColdRegion, MaxAncestor);
-    *AncestorIt = ColdRegion[0];
-    ColdRegion[0] = MaxAncestor;
-  }
-
-  // Find all successors of SinkBB dominated by SinkBB using DFS.
-  auto SuccIt = ++df_begin(&SinkBB);
-  auto SuccEnd = df_end(&SinkBB);
-  while (SuccIt != SuccEnd) {
-    BasicBlock &SuccBB = **SuccIt;
-    bool SinkDom = DT.dominates(&SinkBB, &SuccBB);
-
-    // If SinkBB does not dominate a successor, do not mark the successor (or
-    // any of its successors) cold.
-    if (!SinkDom || !mayExtractBlock(SuccBB)) {
-      SuccIt.skipChildren();
-      continue;
-    }
-
-    ColdRegion.push_back(&SuccBB);
-    ++SuccIt;
-  }
-
-  if (ColdRegion.size() == 1 && !isProfitableToOutline(*ColdRegion[0], TTI))
-    return {};
-
-  return ColdRegion;
-}
-
-/// Get the largest cold region in \p F.
-static BlockSequence getLargestColdRegion(Function &F, ProfileSummaryInfo &PSI,
-                                          BlockFrequencyInfo *BFI,
-                                          TargetTransformInfo &TTI,
-                                          DominatorTree &DT, PostDomTree &PDT) {
-  // Keep track of the largest cold region.
-  BlockSequence LargestColdRegion = {};
-
-  for (BasicBlock &BB : F) {
-    // Identify cold blocks.
-    if (!mayExtractBlock(BB))
-      continue;
-    bool Cold =
-        PSI.isColdBlock(&BB, BFI) || (EnableStaticAnalyis && unlikelyExecuted(BB));
-    if (!Cold)
-      continue;
-
-    LLVM_DEBUG({
-      dbgs() << "Found cold block:\n";
-      BB.dump();
-    });
-
-    // Find a maximal cold region we can outline.
-    BlockSequence ColdRegion = findMaximalColdRegion(BB, TTI, DT, PDT);
-    if (ColdRegion.empty()) {
-      LLVM_DEBUG(dbgs() << "  Skipping (block not profitable to extract)\n");
-      continue;
-    }
-
-    ++NumColdRegionsFound;
-
-    LLVM_DEBUG({
-      llvm::dbgs() << "Identified cold region with " << ColdRegion.size()
-                   << " blocks:\n";
-      for (BasicBlock *BB : ColdRegion)
-        BB->dump();
-    });
-
-    // TODO: Outline more than one region.
-    if (ColdRegion.size() > LargestColdRegion.size())
-      LargestColdRegion = std::move(ColdRegion);
+/// Mark \p F cold. Return true if it's changed.
+static bool markEntireFunctionCold(Function &F) {
+  assert(!F.hasFnAttribute(Attribute::OptimizeNone) && "Can't mark this cold");
+  bool Changed = false;
+  if (!F.hasFnAttribute(Attribute::MinSize)) {
+    F.addFnAttr(Attribute::MinSize);
+    Changed = true;
   }
-
-  return LargestColdRegion;
+  // TODO: Move this function into a cold section.
+  return Changed;
 }
 
 class HotColdSplitting {
@@ -310,6 +179,10 @@ public:
 
 private:
   bool shouldOutlineFrom(const Function &F) const;
+  bool outlineColdRegions(Function &F, ProfileSummaryInfo &PSI,
+                          BlockFrequencyInfo *BFI, TargetTransformInfo &TTI,
+                          DominatorTree &DT, PostDomTree &PDT,
+                          OptimizationRemarkEmitter &ORE);
   Function *extractColdRegion(const BlockSequence &Region, DominatorTree &DT,
                               BlockFrequencyInfo *BFI, TargetTransformInfo &TTI,
                               OptimizationRemarkEmitter &ORE, unsigned Count);
@@ -375,8 +248,6 @@ Function *HotColdSplitting::extractColdR
                                               OptimizationRemarkEmitter &ORE,
                                               unsigned Count) {
   assert(!Region.empty());
-  LLVM_DEBUG(for (auto *BB : Region)
-          llvm::dbgs() << "\nExtracting: " << *BB;);
 
   // TODO: Pass BFI and BPI to update profile information.
   CodeExtractor CE(Region, &DT, /* AggregateArgs */ false, /* BFI */ nullptr,
@@ -408,9 +279,7 @@ Function *HotColdSplitting::extractColdR
 
     // Try to make the outlined code as small as possible on the assumption
     // that it's cold.
-    assert(!OutF->hasFnAttribute(Attribute::OptimizeNone) &&
-           "An outlined function should never be marked optnone");
-    OutF->addFnAttr(Attribute::MinSize);
+    markEntireFunctionCold(*OutF);
 
     LLVM_DEBUG(llvm::dbgs() << "Outlined Region: " << *OutF);
     ORE.emit([&]() {
@@ -431,32 +300,285 @@ Function *HotColdSplitting::extractColdR
   return nullptr;
 }
 
+/// A pair of (basic block, score).
+using BlockTy = std::pair<BasicBlock *, unsigned>;
+
+/// A maximal outlining region. This contains all blocks post-dominated by a
+/// sink block, the sink block itself, and all blocks dominated by the sink.
+class OutliningRegion {
+  /// A list of (block, score) pairs. A block's score is non-zero iff it's a
+  /// viable sub-region entry point. Blocks with higher scores are better entry
+  /// points (i.e. they are more distant ancestors of the sink block).
+  SmallVector<BlockTy, 0> Blocks = {};
+
+  /// The suggested entry point into the region. If the region has multiple
+  /// entry points, all blocks within the region may not be reachable from this
+  /// entry point.
+  BasicBlock *SuggestedEntryPoint = nullptr;
+
+  /// Whether the entire function is cold.
+  bool EntireFunctionCold = false;
+
+  /// Whether or not \p BB could be the entry point of an extracted region.
+  static bool isViableEntryPoint(BasicBlock &BB) { return !BB.isEHPad(); }
+
+  /// If \p BB is a viable entry point, return \p Score. Return 0 otherwise.
+  static unsigned getEntryPointScore(BasicBlock &BB, unsigned Score) {
+    return isViableEntryPoint(BB) ? Score : 0;
+  }
+
+  /// These scores should be lower than the score for predecessor blocks,
+  /// because regions starting at predecessor blocks are typically larger.
+  static constexpr unsigned ScoreForSuccBlock = 1;
+  static constexpr unsigned ScoreForSinkBlock = 1;
+
+  OutliningRegion(const OutliningRegion &) = delete;
+  OutliningRegion &operator=(const OutliningRegion &) = delete;
+
+public:
+  OutliningRegion() = default;
+  OutliningRegion(OutliningRegion &&) = default;
+  OutliningRegion &operator=(OutliningRegion &&) = default;
+
+  static OutliningRegion create(BasicBlock &SinkBB, const DominatorTree &DT,
+                                const PostDomTree &PDT) {
+    OutliningRegion ColdRegion;
+
+    SmallPtrSet<BasicBlock *, 4> RegionBlocks;
+
+    auto addBlockToRegion = [&](BasicBlock *BB, unsigned Score) {
+      RegionBlocks.insert(BB);
+      ColdRegion.Blocks.emplace_back(BB, Score);
+      assert(RegionBlocks.size() == ColdRegion.Blocks.size() && "Duplicate BB");
+    };
+
+    // The ancestor farthest-away from SinkBB, and also post-dominated by it.
+    unsigned SinkScore = getEntryPointScore(SinkBB, ScoreForSinkBlock);
+    ColdRegion.SuggestedEntryPoint = (SinkScore > 0) ? &SinkBB : nullptr;
+    unsigned BestScore = SinkScore;
+
+    // Visit SinkBB's ancestors using inverse DFS.
+    auto PredIt = ++idf_begin(&SinkBB);
+    auto PredEnd = idf_end(&SinkBB);
+    while (PredIt != PredEnd) {
+      BasicBlock &PredBB = **PredIt;
+      bool SinkPostDom = PDT.dominates(&SinkBB, &PredBB);
+
+      // If the predecessor is cold and has no predecessors, the entire
+      // function must be cold.
+      if (SinkPostDom && pred_empty(&PredBB)) {
+        ColdRegion.EntireFunctionCold = true;
+        return ColdRegion;
+      }
+
+      // If SinkBB does not post-dominate a predecessor, do not mark the
+      // predecessor (or any of its predecessors) cold.
+      if (!SinkPostDom || !mayExtractBlock(PredBB)) {
+        PredIt.skipChildren();
+        continue;
+      }
+
+      // Keep track of the post-dominated ancestor farthest away from the sink.
+      // The path length is always >= 2, ensuring that predecessor blocks are
+      // considered as entry points before the sink block.
+      unsigned PredScore = getEntryPointScore(PredBB, PredIt.getPathLength());
+      if (PredScore > BestScore) {
+        ColdRegion.SuggestedEntryPoint = &PredBB;
+        BestScore = PredScore;
+      }
+
+      addBlockToRegion(&PredBB, PredScore);
+      ++PredIt;
+    }
+
+    // Add SinkBB to the cold region. It's considered as an entry point before
+    // any sink-successor blocks.
+    addBlockToRegion(&SinkBB, SinkScore);
+
+    // Find all successors of SinkBB dominated by SinkBB using DFS.
+    auto SuccIt = ++df_begin(&SinkBB);
+    auto SuccEnd = df_end(&SinkBB);
+    while (SuccIt != SuccEnd) {
+      BasicBlock &SuccBB = **SuccIt;
+      bool SinkDom = DT.dominates(&SinkBB, &SuccBB);
+
+      // Don't allow the backwards & forwards DFSes to mark the same block.
+      bool DuplicateBlock = RegionBlocks.count(&SuccBB);
+
+      // If SinkBB does not dominate a successor, do not mark the successor (or
+      // any of its successors) cold.
+      if (DuplicateBlock || !SinkDom || !mayExtractBlock(SuccBB)) {
+        SuccIt.skipChildren();
+        continue;
+      }
+
+      unsigned SuccScore = getEntryPointScore(SuccBB, ScoreForSuccBlock);
+      if (SuccScore > BestScore) {
+        ColdRegion.SuggestedEntryPoint = &SuccBB;
+        BestScore = SuccScore;
+      }
+
+      addBlockToRegion(&SuccBB, SuccScore);
+      ++SuccIt;
+    }
+
+    return ColdRegion;
+  }
+
+  /// Whether this region has nothing to extract.
+  bool empty() const { return !SuggestedEntryPoint; }
+
+  /// The blocks in this region.
+  ArrayRef<std::pair<BasicBlock *, unsigned>> blocks() const { return Blocks; }
+
+  /// Whether the entire function containing this region is cold.
+  bool isEntireFunctionCold() const { return EntireFunctionCold; }
+
+  /// Remove a sub-region from this region and return it as a block sequence.
+  BlockSequence takeSingleEntrySubRegion(DominatorTree &DT) {
+    assert(!empty() && !isEntireFunctionCold() && "Nothing to extract");
+
+    // Remove blocks dominated by the suggested entry point from this region.
+    // During the removal, identify the next best entry point into the region.
+    // Ensure that the first extracted block is the suggested entry point.
+    BlockSequence SubRegion = {SuggestedEntryPoint};
+    BasicBlock *NextEntryPoint = nullptr;
+    unsigned NextScore = 0;
+    auto RegionEndIt = Blocks.end();
+    auto RegionStartIt = remove_if(Blocks, [&](const BlockTy &Block) {
+      BasicBlock *BB = Block.first;
+      unsigned Score = Block.second;
+      bool InSubRegion =
+          BB == SuggestedEntryPoint || DT.dominates(SuggestedEntryPoint, BB);
+      if (!InSubRegion && Score > NextScore) {
+        NextEntryPoint = BB;
+        NextScore = Score;
+      }
+      if (InSubRegion && BB != SuggestedEntryPoint)
+        SubRegion.push_back(BB);
+      return InSubRegion;
+    });
+    Blocks.erase(RegionStartIt, RegionEndIt);
+
+    // Update the suggested entry point.
+    SuggestedEntryPoint = NextEntryPoint;
+
+    return SubRegion;
+  }
+};
+
+bool HotColdSplitting::outlineColdRegions(Function &F, ProfileSummaryInfo &PSI,
+                                          BlockFrequencyInfo *BFI,
+                                          TargetTransformInfo &TTI,
+                                          DominatorTree &DT, PostDomTree &PDT,
+                                          OptimizationRemarkEmitter &ORE) {
+  bool Changed = false;
+
+  // The set of cold blocks.
+  SmallPtrSet<BasicBlock *, 4> ColdBlocks;
+
+  // The worklist of non-intersecting regions left to outline.
+  SmallVector<OutliningRegion, 2> OutliningWorklist;
+
+  // Set up an RPO traversal. Experimentally, this performs better (outlines
+  // more) than a PO traversal, because we prevent region overlap by keeping
+  // the first region to contain a block.
+  ReversePostOrderTraversal<Function *> RPOT(&F);
+
+  // Find all cold regions.
+  for (BasicBlock *BB : RPOT) {
+    // Skip blocks which can't be outlined.
+    if (!mayExtractBlock(*BB))
+      continue;
+
+    // This block is already part of some outlining region.
+    if (ColdBlocks.count(BB))
+      continue;
+
+    bool Cold = PSI.isColdBlock(BB, BFI) ||
+                (EnableStaticAnalyis && unlikelyExecuted(*BB));
+    if (!Cold)
+      continue;
+
+    LLVM_DEBUG({
+      dbgs() << "Found a cold block:\n";
+      BB->dump();
+    });
+
+    auto Region = OutliningRegion::create(*BB, DT, PDT);
+    if (Region.empty())
+      continue;
+
+    if (Region.isEntireFunctionCold()) {
+      LLVM_DEBUG(dbgs() << "Entire function is cold\n");
+      return markEntireFunctionCold(F);
+    }
+
+    // If this outlining region intersects with another, drop the new region.
+    //
+    // TODO: It's theoretically possible to outline more by only keeping the
+    // largest region which contains a block, but the extra bookkeeping to do
+    // this is tricky/expensive.
+    bool RegionsOverlap = any_of(Region.blocks(), [&](const BlockTy &Block) {
+      return !ColdBlocks.insert(Block.first).second;
+    });
+    if (RegionsOverlap)
+      continue;
+
+    OutliningWorklist.emplace_back(std::move(Region));
+    ++NumColdRegionsFound;
+  }
+
+  // Outline single-entry cold regions, splitting up larger regions as needed.
+  unsigned OutlinedFunctionID = 1;
+  while (!OutliningWorklist.empty()) {
+    OutliningRegion Region = OutliningWorklist.pop_back_val();
+    assert(!Region.empty() && "Empty outlining region in worklist");
+    do {
+      BlockSequence SubRegion = Region.takeSingleEntrySubRegion(DT);
+      if (!isProfitableToOutline(SubRegion, TTI)) {
+        LLVM_DEBUG({
+          dbgs() << "Skipping outlining; not profitable to outline\n";
+          SubRegion[0]->dump();
+        });
+        continue;
+      }
+
+      LLVM_DEBUG({
+        dbgs() << "Hot/cold splitting attempting to outline these blocks:\n";
+        for (BasicBlock *BB : SubRegion)
+          BB->dump();
+      });
+
+      Function *Outlined =
+          extractColdRegion(SubRegion, DT, BFI, TTI, ORE, OutlinedFunctionID);
+      if (Outlined) {
+        ++OutlinedFunctionID;
+        OutlinedFunctions.insert(Outlined);
+        Changed = true;
+      }
+    } while (!Region.empty());
+  }
+
+  return Changed;
+}
+
 bool HotColdSplitting::run(Module &M) {
   bool Changed = false;
+  OutlinedFunctions.clear();
   for (auto &F : M) {
     if (!shouldOutlineFrom(F)) {
-      LLVM_DEBUG(llvm::dbgs() << "Not outlining in " << F.getName() << "\n");
+      LLVM_DEBUG(llvm::dbgs() << "Skipping " << F.getName() << "\n");
       continue;
     }
-
     LLVM_DEBUG(llvm::dbgs() << "Outlining in " << F.getName() << "\n");
     DominatorTree DT(F);
     PostDomTree PDT(F);
     PDT.recalculate(F);
     BlockFrequencyInfo *BFI = GetBFI(F);
     TargetTransformInfo &TTI = GetTTI(F);
-
-    BlockSequence ColdRegion = getLargestColdRegion(F, *PSI, BFI, TTI, DT, PDT);
-    if (ColdRegion.empty())
-      continue;
-
     OptimizationRemarkEmitter &ORE = (*GetORE)(F);
-    Function *Outlined =
-        extractColdRegion(ColdRegion, DT, BFI, TTI, ORE, /*Count=*/1);
-    if (Outlined) {
-      OutlinedFunctions.insert(Outlined);
-      Changed = true;
-    }
+    Changed |= outlineColdRegions(F, *PSI, BFI, TTI, DT, PDT, ORE);
   }
   return Changed;
 }

Added: llvm/trunk/test/Transforms/HotColdSplit/eh-pads.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/HotColdSplit/eh-pads.ll?rev=348639&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/HotColdSplit/eh-pads.ll (added)
+++ llvm/trunk/test/Transforms/HotColdSplit/eh-pads.ll Fri Dec  7 12:23:52 2018
@@ -0,0 +1,39 @@
+; RUN: opt -S -hotcoldsplit < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.14.0"
+
+; CHECK-LABEL: define {{.*}}@foo(
+; CHECK: landingpad
+; CHECK: sideeffect(i32 2)
+
+; CHECK-LABEL: define {{.*}}@foo.cold.1(
+; CHECK: sideeffect(i32 0)
+; CHECK: sideeffect(i32 1)
+; CHECK: sink
+
+define void @foo(i32 %cond) personality i8 0 {
+entry:
+  invoke void @llvm.donothing() to label %normal unwind label %exception
+
+exception:
+  ; Note: EH pads are not candidates for region entry points.
+  %cleanup = landingpad i8 cleanup
+  br label %continue_exception
+
+continue_exception:
+  call void @sideeffect(i32 0)
+  call void @sideeffect(i32 1)
+  call void @sink()
+  ret void
+
+normal:
+  call void @sideeffect(i32 2)
+  ret void
+}
+
+declare void @sideeffect(i32)
+
+declare void @sink() cold
+
+declare void @llvm.donothing() nounwind readnone

Added: llvm/trunk/test/Transforms/HotColdSplit/extraction-subregion-breaks-phis.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/HotColdSplit/extraction-subregion-breaks-phis.ll?rev=348639&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/HotColdSplit/extraction-subregion-breaks-phis.ll (added)
+++ llvm/trunk/test/Transforms/HotColdSplit/extraction-subregion-breaks-phis.ll Fri Dec  7 12:23:52 2018
@@ -0,0 +1,63 @@
+; RUN: opt -S -hotcoldsplit < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.14.0"
+
+; CHECK-LABEL: define {{.*}}@foo(
+; CHECK: call {{.*}}@foo.cold.1(
+; CHECK: unreachable
+
+; CHECK-LABEL: define {{.*}}@foo.cold.1(
+; CHECK: switch i32 undef, label %sw.epilog.i
+define void @foo(i32 %QMM) {
+entry:
+  switch i32 %QMM, label %entry.if.end16_crit_edge [
+    i32 1, label %if.then
+  ]
+
+entry.if.end16_crit_edge:                         ; preds = %entry
+  br label %if.end16
+
+if.then:                                          ; preds = %entry
+  br i1 undef, label %cond.true.i.i, label %_ZN10StringView8popFrontEv.exit.i
+
+cond.true.i.i:                                    ; preds = %if.then
+  ret void
+
+_ZN10StringView8popFrontEv.exit.i:                ; preds = %if.then
+  switch i32 undef, label %sw.epilog.i [
+    i32 81, label %if.end16
+    i32 82, label %sw.bb4.i
+    i32 83, label %sw.bb8.i
+    i32 84, label %sw.bb12.i
+    i32 65, label %if.end16
+    i32 66, label %sw.bb20.i
+    i32 67, label %sw.bb24.i
+    i32 68, label %sw.bb28.i
+  ]
+
+sw.bb4.i:                                         ; preds = %_ZN10StringView8popFrontEv.exit.i
+  br label %if.end16
+
+sw.bb8.i:                                         ; preds = %_ZN10StringView8popFrontEv.exit.i
+  br label %if.end16
+
+sw.bb12.i:                                        ; preds = %_ZN10StringView8popFrontEv.exit.i
+  br label %if.end16
+
+sw.bb20.i:                                        ; preds = %_ZN10StringView8popFrontEv.exit.i
+  br label %if.end16
+
+sw.bb24.i:                                        ; preds = %_ZN10StringView8popFrontEv.exit.i
+  br label %if.end16
+
+sw.bb28.i:                                        ; preds = %_ZN10StringView8popFrontEv.exit.i
+  br label %if.end16
+
+sw.epilog.i:                                      ; preds = %_ZN10StringView8popFrontEv.exit.i
+  br label %if.end16
+
+if.end16:                                         ; preds = %sw.epilog.i, %sw.bb28.i, %sw.bb24.i, %sw.bb20.i, %sw.bb12.i, %sw.bb8.i, %sw.bb4.i, %_ZN10StringView8popFrontEv.exit.i, %_ZN10StringView8popFrontEv.exit.i, %entry.if.end16_crit_edge
+  %0 = phi i8 [ 0, %entry.if.end16_crit_edge ], [ 0, %_ZN10StringView8popFrontEv.exit.i ], [ 0, %_ZN10StringView8popFrontEv.exit.i ], [ 1, %sw.bb4.i ], [ 2, %sw.bb8.i ], [ 3, %sw.bb12.i ], [ 1, %sw.bb20.i ], [ 2, %sw.bb24.i ], [ 3, %sw.bb28.i ], [ 0, %sw.epilog.i ]
+  unreachable
+}

Added: llvm/trunk/test/Transforms/HotColdSplit/forward-dfs-reaches-marked-block.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/HotColdSplit/forward-dfs-reaches-marked-block.ll?rev=348639&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/HotColdSplit/forward-dfs-reaches-marked-block.ll (added)
+++ llvm/trunk/test/Transforms/HotColdSplit/forward-dfs-reaches-marked-block.ll Fri Dec  7 12:23:52 2018
@@ -0,0 +1,29 @@
+; RUN: opt -hotcoldsplit -S < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.14.0"
+
+; CHECK-LABEL: define {{.*}}@fun
+; CHECK: call {{.*}}@fun.cold.1(
+define void @fun() {
+entry:
+  br i1 undef, label %if.then, label %if.else
+
+if.then:
+  ; This will be marked by the inverse DFS on sink-predecesors.
+  br label %sink
+
+sink:
+  call void @sink()
+
+  ; Do not allow the forward-DFS on sink-successors to mark the block again.
+  br i1 undef, label %if.then, label %if.then.exit
+
+if.then.exit:
+  ret void
+
+if.else:
+  ret void
+}
+
+declare void @sink() cold

Added: llvm/trunk/test/Transforms/HotColdSplit/mark-the-whole-func-cold.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/HotColdSplit/mark-the-whole-func-cold.ll?rev=348639&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/HotColdSplit/mark-the-whole-func-cold.ll (added)
+++ llvm/trunk/test/Transforms/HotColdSplit/mark-the-whole-func-cold.ll Fri Dec  7 12:23:52 2018
@@ -0,0 +1,64 @@
+; RUN: opt -S -hotcoldsplit < %s | FileCheck %s
+
+; Source:
+; 
+; extern __attribute__((cold)) void sink();
+; extern void sideeffect(int);
+; void foo(int cond1, int cond2) {
+;     if (cond1) {
+;         if (cond2) {
+;             sideeffect(0);
+;         } else {
+;             sideeffect(1);
+;         }
+;         sink();
+;     } else {
+;         sideeffect(2);
+;     }
+;     sink();
+; }
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.14.0"
+
+; CHECK: define {{.*}}@_Z3fooii{{.*}}#[[outlined_func_attr:[0-9]+]]
+; CHECK-NOT: _Z3fooii.cold
+; CHECK: attributes #[[outlined_func_attr]] = { {{.*}}minsize
+define void @_Z3fooii(i32, i32) {
+  %3 = alloca i32, align 4
+  %4 = alloca i32, align 4
+  store i32 %0, i32* %3, align 4
+  store i32 %1, i32* %4, align 4
+  %5 = load i32, i32* %3, align 4
+  %6 = icmp ne i32 %5, 0
+  br i1 %6, label %7, label %13
+
+; <label>:7:                                      ; preds = %2
+  %8 = load i32, i32* %4, align 4
+  %9 = icmp ne i32 %8, 0
+  br i1 %9, label %10, label %11
+
+; <label>:10:                                     ; preds = %7
+  call void @_Z10sideeffecti(i32 0)
+  br label %12
+
+; <label>:11:                                     ; preds = %7
+  call void @_Z10sideeffecti(i32 1)
+  br label %12
+
+; <label>:12:                                     ; preds = %11, %10
+  call void @_Z4sinkv() #3
+  br label %14
+
+; <label>:13:                                     ; preds = %2
+  call void @_Z10sideeffecti(i32 2)
+  br label %14
+
+; <label>:14:                                     ; preds = %13, %12
+  call void @_Z4sinkv() #3
+  ret void
+}
+
+declare void @_Z10sideeffecti(i32)
+
+declare void @_Z4sinkv() cold

Added: llvm/trunk/test/Transforms/HotColdSplit/outline-disjoint-diamonds.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/HotColdSplit/outline-disjoint-diamonds.ll?rev=348639&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/HotColdSplit/outline-disjoint-diamonds.ll (added)
+++ llvm/trunk/test/Transforms/HotColdSplit/outline-disjoint-diamonds.ll Fri Dec  7 12:23:52 2018
@@ -0,0 +1,57 @@
+; RUN: opt -S -hotcoldsplit < %s 2>&1 | FileCheck %s
+
+; CHECK-LABEL: define {{.*}}@fun
+; CHECK: call {{.*}}@fun.cold.2(
+; CHECK-NEXT: ret void
+; CHECK: call {{.*}}@fun.cold.1(
+; CHECK-NEXT: ret void
+define void @fun() {
+entry:
+  br i1 undef, label %A.then, label %A.else
+
+A.else:
+  br label %A.then4
+
+A.then4:
+  br i1 undef, label %A.then5, label %A.end
+
+A.then5:
+  br label %A.cleanup
+
+A.end:
+  br label %A.cleanup
+
+A.cleanup:
+  %A.cleanup.dest.slot.0 = phi i32 [ 1, %A.then5 ], [ 0, %A.end ]
+  unreachable
+
+A.then:
+  br i1 undef, label %B.then, label %B.else
+
+B.then:
+  ret void
+
+B.else:
+  br label %B.then4
+
+B.then4:
+  br i1 undef, label %B.then5, label %B.end
+
+B.then5:
+  br label %B.cleanup
+
+B.end:
+  br label %B.cleanup
+
+B.cleanup:
+  %B.cleanup.dest.slot.0 = phi i32 [ 1, %B.then5 ], [ 0, %B.end ]
+  unreachable
+}
+
+; CHECK-LABEL: define {{.*}}@fun.cold.1(
+; CHECK: %B.cleanup.dest.slot.0 = phi i32 [ 1, %B.then5 ], [ 0, %B.end ]
+; CHECK-NEXT: unreachable
+
+; CHECK-LABEL: define {{.*}}@fun.cold.2(
+; CHECK: %A.cleanup.dest.slot.0 = phi i32 [ 1, %A.then5 ], [ 0, %A.end ]
+; CHECK-NEXT: unreachable

Added: llvm/trunk/test/Transforms/HotColdSplit/outline-multiple-entry-region.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/HotColdSplit/outline-multiple-entry-region.ll?rev=348639&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/HotColdSplit/outline-multiple-entry-region.ll (added)
+++ llvm/trunk/test/Transforms/HotColdSplit/outline-multiple-entry-region.ll Fri Dec  7 12:23:52 2018
@@ -0,0 +1,81 @@
+; RUN: opt -S -hotcoldsplit < %s | FileCheck %s
+
+; Source:
+;
+; extern __attribute__((cold)) void sink();
+; extern void sideeffect(int);
+; void foo(int cond1, int cond2) {
+;     while (true) {
+;         if (cond1) {
+;             sideeffect(0); // This is cold (it reaches sink()).
+;             break;
+;         }
+;         if (cond2) {
+;             sideeffect(1); // This is cold (it reaches sink()).
+;             break;
+;         }
+;         sideeffect(2);
+;         return;
+;     }
+;     sink();
+;     sideeffect(3);
+; }
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.14.0"
+
+; CHECK-LABEL: define {{.*}}@_Z3fooii.cold.1
+; CHECK: call void @_Z10sideeffecti(i32 1)
+; CHECK: call void @_Z10sideeffecti(i32 11)
+
+; CHECK-LABEL: define {{.*}}@_Z3fooii.cold.2
+; CHECK: call void @_Z10sideeffecti(i32 0)
+; CHECK: call void @_Z10sideeffecti(i32 10)
+
+; CHECK-LABEL: define {{.*}}@_Z3fooii.cold.3
+; CHECK: call void @_Z4sinkv
+; CHECK: call void @_Z10sideeffecti(i32 3)
+
+define void @_Z3fooii(i32, i32) {
+  %3 = alloca i32, align 4
+  %4 = alloca i32, align 4
+  store i32 %0, i32* %3, align 4
+  store i32 %1, i32* %4, align 4
+  br label %5
+
+; <label>:5:                                      ; preds = %2
+  %6 = load i32, i32* %3, align 4
+  %7 = icmp ne i32 %6, 0
+  br i1 %7, label %8, label %9
+
+; <label>:8:                                      ; preds = %5
+  call void @_Z10sideeffecti(i32 0)
+  call void @_Z10sideeffecti(i32 10)
+  br label %14
+
+; <label>:9:                                      ; preds = %5
+  %10 = load i32, i32* %4, align 4
+  %11 = icmp ne i32 %10, 0
+  br i1 %11, label %12, label %13
+
+; <label>:12:                                     ; preds = %9
+  call void @_Z10sideeffecti(i32 1)
+  call void @_Z10sideeffecti(i32 11)
+  br label %14
+
+; <label>:13:                                     ; preds = %9
+  call void @_Z10sideeffecti(i32 2)
+  br label %15
+
+; <label>:14:                                     ; preds = %12, %8
+  call void @_Z4sinkv() #3
+  call void @_Z10sideeffecti(i32 3)
+  br label %15
+
+; <label>:15:                                     ; preds = %14, %13
+  ret void
+}
+
+declare void @_Z10sideeffecti(i32)
+
+declare void @_Z4sinkv() cold

Modified: llvm/trunk/test/Transforms/HotColdSplit/outline-while-loop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/HotColdSplit/outline-while-loop.ll?rev=348639&r1=348638&r2=348639&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/HotColdSplit/outline-while-loop.ll (original)
+++ llvm/trunk/test/Transforms/HotColdSplit/outline-while-loop.ll Fri Dec  7 12:23:52 2018
@@ -55,10 +55,59 @@ if.end:
   ret void
 }
 
+; This is the same as @foo, but the while loop comes after the sink block.
+; CHECK-LABEL: define {{.*}}@while_loop_after_sink(
+; CHECK: br i1 {{.*}}, label %if.end, label %codeRepl
+; CHECK-LABEL: codeRepl:
+; CHECK-NEXT: call void @while_loop_after_sink.cold.1
+; CHECK-LABEL: if.end:
+; CHECK: call void @sideeffect(i32 1)
+define void @while_loop_after_sink(i32 %cond) {
+entry:
+  %tobool = icmp eq i32 %cond, 0
+  br i1 %tobool, label %if.end, label %sink
+
+sink:
+  tail call void (...) @sink()
+  br label %while.cond.preheader
+
+while.cond.preheader:
+  %cmp3 = icmp sgt i32 %cond, 10
+  br i1 %cmp3, label %while.body.preheader, label %while.end
+
+while.body.preheader:                             ; preds = %while.cond.preheader
+  br label %while.body
+
+while.body:                                       ; preds = %while.body.preheader, %while.body
+  %cond.addr.04 = phi i32 [ %dec, %while.body ], [ %cond, %while.body.preheader ]
+  %dec = add nsw i32 %cond.addr.04, -1
+  tail call void @sideeffect(i32 0) #3
+  %cmp = icmp sgt i32 %dec, 10
+  br i1 %cmp, label %while.body, label %while.end.loopexit
+
+while.end.loopexit:                               ; preds = %while.body
+  br label %while.end
+
+while.end:                                        ; preds = %while.end.loopexit, %while.cond.preheader
+  ret void
+
+if.end:                                           ; preds = %entry
+  tail call void @sideeffect(i32 1)
+  ret void
+}
+
 ; CHECK-LABEL: define {{.*}}@foo.cold.1
 ; CHECK: phi i32
 ; CHECK-NEXT: add nsw i32
 ; CHECK-NEXT: call {{.*}}@sideeffect
+; CHECK-NEXT: icmp
+; CHECK-NEXT: br
+
+; CHECK-LABEL: define {{.*}}@while_loop_after_sink.cold.1
+; CHECK: call {{.*}}@sink
+; CHECK: phi i32
+; CHECK-NEXT: add nsw i32
+; CHECK-NEXT: call {{.*}}@sideeffect
 ; CHECK-NEXT: icmp
 ; CHECK-NEXT: br
 

Added: llvm/trunk/test/Transforms/HotColdSplit/phi-with-distinct-outlined-values.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/HotColdSplit/phi-with-distinct-outlined-values.ll?rev=348639&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/HotColdSplit/phi-with-distinct-outlined-values.ll (added)
+++ llvm/trunk/test/Transforms/HotColdSplit/phi-with-distinct-outlined-values.ll Fri Dec  7 12:23:52 2018
@@ -0,0 +1,35 @@
+; RUN: opt -S -hotcoldsplit < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.14.0"
+
+; CHECK-LABEL: define {{.*}}@foo(
+; CHECK: phi i32 [ 0, %entry ], [ %p.ce.reload, %codeRepl ]
+
+; CHECK-LABEL: define {{.*}}@foo.cold.1(
+; CHECK: call {{.*}}@sink
+; CHECK: %p.ce = phi i32 [ 1, %coldbb ], [ 3, %coldbb2 ]
+; CHECK-NEXT: store i32 %p.ce, i32* %p.ce.out 
+
+define void @foo(i32 %cond) {
+entry:
+  %tobool = icmp eq i32 %cond, 0
+  br i1 %tobool, label %if.end, label %coldbb
+
+coldbb:
+  call void @sink()
+  call void @sideeffect()
+  call void @sideeffect()
+  br i1 undef, label %if.end, label %coldbb2
+
+coldbb2:
+  br label %if.end
+
+if.end:
+  %p = phi i32 [0, %entry], [1, %coldbb], [3, %coldbb2]
+  ret void
+}
+
+declare void @sink() cold
+
+declare void @sideeffect()

Added: llvm/trunk/test/Transforms/HotColdSplit/region-overlap.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/HotColdSplit/region-overlap.ll?rev=348639&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/HotColdSplit/region-overlap.ll (added)
+++ llvm/trunk/test/Transforms/HotColdSplit/region-overlap.ll Fri Dec  7 12:23:52 2018
@@ -0,0 +1,65 @@
+; RUN: opt -S -hotcoldsplit < %s | FileCheck %s
+
+; Source:
+;
+; __attribute__((cold)) extern void sink(int);
+; extern void sideeffect(int);
+; void foo(int cond1, int cond2) {
+;     if (cond1) {
+;         if (cond2) { // This is the first cold region we visit.
+;             sideeffect(0);
+;             sideeffect(10);
+;             sink(0);
+;         }
+;
+;         // There's a larger, overlapping cold region here. But we ignore it.
+;         // This could be improved.
+;         sideeffect(1);
+;         sideeffect(11);
+;         sink(1);
+;     }
+; }
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.14.0"
+
+; CHECK-LABEL: define {{.*}}@_Z3fooii
+; CHECK: call {{.*}}@_Z3fooii.cold.1
+; CHECK-NOT: _Z3fooii.cold
+define void @_Z3fooii(i32, i32) {
+  %3 = alloca i32, align 4
+  %4 = alloca i32, align 4
+  store i32 %0, i32* %3, align 4
+  store i32 %1, i32* %4, align 4
+  %5 = load i32, i32* %3, align 4
+  %6 = icmp ne i32 %5, 0
+  br i1 %6, label %7, label %12
+
+; <label>:7:                                      ; preds = %2
+  %8 = load i32, i32* %4, align 4
+  %9 = icmp ne i32 %8, 0
+  br i1 %9, label %10, label %11
+
+; <label>:10:                                     ; preds = %7
+  call void @_Z10sideeffecti(i32 0)
+  call void @_Z10sideeffecti(i32 10)
+  call void @_Z4sinki(i32 0) #3
+  br label %11
+
+; <label>:11:                                     ; preds = %10, %7
+  call void @_Z10sideeffecti(i32 1)
+  call void @_Z10sideeffecti(i32 11)
+  call void @_Z4sinki(i32 1) #3
+  br label %12
+
+; <label>:12:                                     ; preds = %11, %2
+  ret void
+}
+
+; CHECK-LABEL: define {{.*}}@_Z3fooii.cold.1
+; CHECK: call void @_Z10sideeffecti(i32 0)
+; CHECK: call void @_Z10sideeffecti(i32 10)
+
+declare void @_Z10sideeffecti(i32)
+
+declare void @_Z4sinki(i32) cold

Added: llvm/trunk/test/Transforms/HotColdSplit/succ-block-with-self-edge.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/HotColdSplit/succ-block-with-self-edge.ll?rev=348639&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/HotColdSplit/succ-block-with-self-edge.ll (added)
+++ llvm/trunk/test/Transforms/HotColdSplit/succ-block-with-self-edge.ll Fri Dec  7 12:23:52 2018
@@ -0,0 +1,56 @@
+; RUN: opt -S -hotcoldsplit < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.14.0"
+
+; CHECK-LABEL: define {{.*}}@exit_block_with_same_incoming_vals
+; CHECK: call {{.*}}@exit_block_with_same_incoming_vals.cold.1(
+; CHECK-NOT: br i1 undef
+; CHECK: phi i32 [ 0, %entry ], [ %p.ce.reload, %codeRepl ]
+define void @exit_block_with_same_incoming_vals(i32 %cond) {
+entry:
+  %tobool = icmp eq i32 %cond, 0
+  br i1 %tobool, label %if.end, label %coldbb
+
+coldbb:
+  call void @sink()
+  call void @sideeffect()
+  call void @sideeffect()
+  br i1 undef, label %if.end, label %coldbb2
+
+coldbb2:
+  %p2 = phi i32 [0, %coldbb], [1, %coldbb2]
+  br i1 undef, label %if.end, label %coldbb2
+
+if.end:
+  %p = phi i32 [0, %entry], [1, %coldbb], [1, %coldbb2]
+  ret void
+}
+
+; CHECK-LABEL: define {{.*}}@exit_block_with_distinct_incoming_vals
+; CHECK: call {{.*}}@exit_block_with_distinct_incoming_vals.cold.1(
+; CHECK-NOT: br i1 undef
+; CHECK: phi i32 [ 0, %entry ], [ %p.ce.reload, %codeRepl ]
+define void @exit_block_with_distinct_incoming_vals(i32 %cond) {
+entry:
+  %tobool = icmp eq i32 %cond, 0
+  br i1 %tobool, label %if.end, label %coldbb
+
+coldbb:
+  call void @sink()
+  call void @sideeffect()
+  call void @sideeffect()
+  br i1 undef, label %if.end, label %coldbb2
+
+coldbb2:
+  %p2 = phi i32 [0, %coldbb], [1, %coldbb2]
+  br i1 undef, label %if.end, label %coldbb2
+
+if.end:
+  %p = phi i32 [0, %entry], [1, %coldbb], [2, %coldbb2]
+  ret void
+}
+
+declare void @sink() cold
+
+declare void @sideeffect()




More information about the llvm-commits mailing list