[llvm] r348639 - [HotColdSplitting] Outline more than once per function
Vedant Kumar via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 7 12:23:52 PST 2018
Author: vedantk
Date: Fri Dec 7 12:23:52 2018
New Revision: 348639
URL: http://llvm.org/viewvc/llvm-project?rev=348639&view=rev
Log:
[HotColdSplitting] Outline more than once per function
Algorithm: Identify maximal cold regions and put them in a worklist. If
a candidate region overlaps with another, discard it. While the worklist
is full, remove a single-entry sub-region from the worklist and attempt
to outline it. By the non-overlap property, this should not invalidate
parts of the domtree pertaining to other outlining regions.
Testing: LNT results on X86 are clean. With test-suite + externals, llvm
outlines 134KB pre-patch, and 352KB post-patch (+ ~2.6x). The file
483.xalancbmk/src/Constants.cpp stands out as an extreme case where llvm
outlines over 100 times in some functions (mostly EH paths). There was
not a significant performance impact pre vs. post-patch.
Differential Revision: https://reviews.llvm.org/D53887
Added:
llvm/trunk/test/Transforms/HotColdSplit/eh-pads.ll
llvm/trunk/test/Transforms/HotColdSplit/extraction-subregion-breaks-phis.ll
llvm/trunk/test/Transforms/HotColdSplit/forward-dfs-reaches-marked-block.ll
llvm/trunk/test/Transforms/HotColdSplit/mark-the-whole-func-cold.ll
llvm/trunk/test/Transforms/HotColdSplit/outline-disjoint-diamonds.ll
llvm/trunk/test/Transforms/HotColdSplit/outline-multiple-entry-region.ll
llvm/trunk/test/Transforms/HotColdSplit/phi-with-distinct-outlined-values.ll
llvm/trunk/test/Transforms/HotColdSplit/region-overlap.ll
llvm/trunk/test/Transforms/HotColdSplit/succ-block-with-self-edge.ll
Modified:
llvm/trunk/lib/Transforms/IPO/HotColdSplitting.cpp
llvm/trunk/test/Transforms/HotColdSplit/outline-while-loop.ll
Modified: llvm/trunk/lib/Transforms/IPO/HotColdSplitting.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/HotColdSplitting.cpp?rev=348639&r1=348638&r2=348639&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/IPO/HotColdSplitting.cpp (original)
+++ llvm/trunk/lib/Transforms/IPO/HotColdSplitting.cpp Fri Dec 7 12:23:52 2018
@@ -13,6 +13,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -135,11 +136,14 @@ static bool mayExtractBlock(const BasicB
return !BB.hasAddressTaken();
}
-/// Check whether \p BB is profitable to outline (i.e. its code size cost meets
-/// the threshold set in \p MinOutliningThreshold).
-static bool isProfitableToOutline(const BasicBlock &BB,
+/// Check whether \p Region is profitable to outline.
+static bool isProfitableToOutline(const BlockSequence &Region,
TargetTransformInfo &TTI) {
+ if (Region.size() > 1)
+ return true;
+
int Cost = 0;
+ const BasicBlock &BB = *Region[0];
for (const Instruction &I : BB) {
if (isa<DbgInfoIntrinsic>(&I) || &I == BB.getTerminator())
continue;
@@ -152,151 +156,16 @@ static bool isProfitableToOutline(const
return false;
}
-/// Identify the maximal region of cold blocks which includes \p SinkBB.
-///
-/// Include all blocks post-dominated by \p SinkBB, \p SinkBB itself, and all
-/// blocks dominated by \p SinkBB. Exclude all other blocks, and blocks which
-/// cannot be outlined.
-///
-/// Return an empty sequence if the cold region is too small to outline, or if
-/// the cold region has no warm predecessors.
-static BlockSequence findMaximalColdRegion(BasicBlock &SinkBB,
- TargetTransformInfo &TTI,
- DominatorTree &DT,
- PostDomTree &PDT) {
- // The maximal cold region.
- BlockSequence ColdRegion = {};
-
- // The ancestor farthest-away from SinkBB, and also post-dominated by it.
- BasicBlock *MaxAncestor = &SinkBB;
- unsigned MaxAncestorHeight = 0;
-
- // Visit SinkBB's ancestors using inverse DFS.
- auto PredIt = ++idf_begin(&SinkBB);
- auto PredEnd = idf_end(&SinkBB);
- while (PredIt != PredEnd) {
- BasicBlock &PredBB = **PredIt;
- bool SinkPostDom = PDT.dominates(&SinkBB, &PredBB);
-
- // If SinkBB does not post-dominate a predecessor, do not mark the
- // predecessor (or any of its predecessors) cold.
- if (!SinkPostDom || !mayExtractBlock(PredBB)) {
- PredIt.skipChildren();
- continue;
- }
-
- // Keep track of the post-dominated ancestor farthest away from the sink.
- unsigned AncestorHeight = PredIt.getPathLength();
- if (AncestorHeight > MaxAncestorHeight) {
- MaxAncestor = &PredBB;
- MaxAncestorHeight = AncestorHeight;
- }
-
- ColdRegion.push_back(&PredBB);
- ++PredIt;
- }
-
- // CodeExtractor requires that all blocks to be extracted must be dominated
- // by the first block to be extracted.
- //
- // To avoid spurious or repeated outlining, require that the max ancestor
- // has a predecessor. By construction this predecessor is not in the cold
- // region, i.e. its existence implies we don't outline the whole function.
- //
- // TODO: If MaxAncestor has no predecessors, we may be able to outline the
- // second largest cold region that has a predecessor.
- if (pred_empty(MaxAncestor) ||
- MaxAncestor->getSinglePredecessor() == MaxAncestor)
- return {};
-
- // Filter out predecessors not dominated by the max ancestor.
- //
- // TODO: Blocks not dominated by the max ancestor could be extracted as
- // other cold regions. Marking outlined calls as noreturn when appropriate
- // and outlining more than once per function could achieve most of the win.
- auto EraseIt = remove_if(ColdRegion, [&](BasicBlock *PredBB) {
- return PredBB != MaxAncestor && !DT.dominates(MaxAncestor, PredBB);
- });
- ColdRegion.erase(EraseIt, ColdRegion.end());
-
- // Add SinkBB to the cold region.
- ColdRegion.push_back(&SinkBB);
-
- // Ensure that the first extracted block is the max ancestor.
- if (ColdRegion[0] != MaxAncestor) {
- auto AncestorIt = find(ColdRegion, MaxAncestor);
- *AncestorIt = ColdRegion[0];
- ColdRegion[0] = MaxAncestor;
- }
-
- // Find all successors of SinkBB dominated by SinkBB using DFS.
- auto SuccIt = ++df_begin(&SinkBB);
- auto SuccEnd = df_end(&SinkBB);
- while (SuccIt != SuccEnd) {
- BasicBlock &SuccBB = **SuccIt;
- bool SinkDom = DT.dominates(&SinkBB, &SuccBB);
-
- // If SinkBB does not dominate a successor, do not mark the successor (or
- // any of its successors) cold.
- if (!SinkDom || !mayExtractBlock(SuccBB)) {
- SuccIt.skipChildren();
- continue;
- }
-
- ColdRegion.push_back(&SuccBB);
- ++SuccIt;
- }
-
- if (ColdRegion.size() == 1 && !isProfitableToOutline(*ColdRegion[0], TTI))
- return {};
-
- return ColdRegion;
-}
-
-/// Get the largest cold region in \p F.
-static BlockSequence getLargestColdRegion(Function &F, ProfileSummaryInfo &PSI,
- BlockFrequencyInfo *BFI,
- TargetTransformInfo &TTI,
- DominatorTree &DT, PostDomTree &PDT) {
- // Keep track of the largest cold region.
- BlockSequence LargestColdRegion = {};
-
- for (BasicBlock &BB : F) {
- // Identify cold blocks.
- if (!mayExtractBlock(BB))
- continue;
- bool Cold =
- PSI.isColdBlock(&BB, BFI) || (EnableStaticAnalyis && unlikelyExecuted(BB));
- if (!Cold)
- continue;
-
- LLVM_DEBUG({
- dbgs() << "Found cold block:\n";
- BB.dump();
- });
-
- // Find a maximal cold region we can outline.
- BlockSequence ColdRegion = findMaximalColdRegion(BB, TTI, DT, PDT);
- if (ColdRegion.empty()) {
- LLVM_DEBUG(dbgs() << " Skipping (block not profitable to extract)\n");
- continue;
- }
-
- ++NumColdRegionsFound;
-
- LLVM_DEBUG({
- llvm::dbgs() << "Identified cold region with " << ColdRegion.size()
- << " blocks:\n";
- for (BasicBlock *BB : ColdRegion)
- BB->dump();
- });
-
- // TODO: Outline more than one region.
- if (ColdRegion.size() > LargestColdRegion.size())
- LargestColdRegion = std::move(ColdRegion);
+/// Mark \p F cold. Return true if it's changed.
+static bool markEntireFunctionCold(Function &F) {
+ assert(!F.hasFnAttribute(Attribute::OptimizeNone) && "Can't mark this cold");
+ bool Changed = false;
+ if (!F.hasFnAttribute(Attribute::MinSize)) {
+ F.addFnAttr(Attribute::MinSize);
+ Changed = true;
}
-
- return LargestColdRegion;
+ // TODO: Move this function into a cold section.
+ return Changed;
}
class HotColdSplitting {
@@ -310,6 +179,10 @@ public:
private:
bool shouldOutlineFrom(const Function &F) const;
+ bool outlineColdRegions(Function &F, ProfileSummaryInfo &PSI,
+ BlockFrequencyInfo *BFI, TargetTransformInfo &TTI,
+ DominatorTree &DT, PostDomTree &PDT,
+ OptimizationRemarkEmitter &ORE);
Function *extractColdRegion(const BlockSequence &Region, DominatorTree &DT,
BlockFrequencyInfo *BFI, TargetTransformInfo &TTI,
OptimizationRemarkEmitter &ORE, unsigned Count);
@@ -375,8 +248,6 @@ Function *HotColdSplitting::extractColdR
OptimizationRemarkEmitter &ORE,
unsigned Count) {
assert(!Region.empty());
- LLVM_DEBUG(for (auto *BB : Region)
- llvm::dbgs() << "\nExtracting: " << *BB;);
// TODO: Pass BFI and BPI to update profile information.
CodeExtractor CE(Region, &DT, /* AggregateArgs */ false, /* BFI */ nullptr,
@@ -408,9 +279,7 @@ Function *HotColdSplitting::extractColdR
// Try to make the outlined code as small as possible on the assumption
// that it's cold.
- assert(!OutF->hasFnAttribute(Attribute::OptimizeNone) &&
- "An outlined function should never be marked optnone");
- OutF->addFnAttr(Attribute::MinSize);
+ markEntireFunctionCold(*OutF);
LLVM_DEBUG(llvm::dbgs() << "Outlined Region: " << *OutF);
ORE.emit([&]() {
@@ -431,32 +300,285 @@ Function *HotColdSplitting::extractColdR
return nullptr;
}
+/// A pair of (basic block, score).
+using BlockTy = std::pair<BasicBlock *, unsigned>;
+
+/// A maximal outlining region. This contains all blocks post-dominated by a
+/// sink block, the sink block itself, and all blocks dominated by the sink.
+class OutliningRegion {
+ /// A list of (block, score) pairs. A block's score is non-zero iff it's a
+ /// viable sub-region entry point. Blocks with higher scores are better entry
+ /// points (i.e. they are more distant ancestors of the sink block).
+ SmallVector<BlockTy, 0> Blocks = {};
+
+ /// The suggested entry point into the region. If the region has multiple
+ /// entry points, all blocks within the region may not be reachable from this
+ /// entry point.
+ BasicBlock *SuggestedEntryPoint = nullptr;
+
+ /// Whether the entire function is cold.
+ bool EntireFunctionCold = false;
+
+ /// Whether or not \p BB could be the entry point of an extracted region.
+ static bool isViableEntryPoint(BasicBlock &BB) { return !BB.isEHPad(); }
+
+ /// If \p BB is a viable entry point, return \p Score. Return 0 otherwise.
+ static unsigned getEntryPointScore(BasicBlock &BB, unsigned Score) {
+ return isViableEntryPoint(BB) ? Score : 0;
+ }
+
+ /// These scores should be lower than the score for predecessor blocks,
+ /// because regions starting at predecessor blocks are typically larger.
+ static constexpr unsigned ScoreForSuccBlock = 1;
+ static constexpr unsigned ScoreForSinkBlock = 1;
+
+ OutliningRegion(const OutliningRegion &) = delete;
+ OutliningRegion &operator=(const OutliningRegion &) = delete;
+
+public:
+ OutliningRegion() = default;
+ OutliningRegion(OutliningRegion &&) = default;
+ OutliningRegion &operator=(OutliningRegion &&) = default;
+
+ static OutliningRegion create(BasicBlock &SinkBB, const DominatorTree &DT,
+ const PostDomTree &PDT) {
+ OutliningRegion ColdRegion;
+
+ SmallPtrSet<BasicBlock *, 4> RegionBlocks;
+
+ auto addBlockToRegion = [&](BasicBlock *BB, unsigned Score) {
+ RegionBlocks.insert(BB);
+ ColdRegion.Blocks.emplace_back(BB, Score);
+ assert(RegionBlocks.size() == ColdRegion.Blocks.size() && "Duplicate BB");
+ };
+
+ // The ancestor farthest-away from SinkBB, and also post-dominated by it.
+ unsigned SinkScore = getEntryPointScore(SinkBB, ScoreForSinkBlock);
+ ColdRegion.SuggestedEntryPoint = (SinkScore > 0) ? &SinkBB : nullptr;
+ unsigned BestScore = SinkScore;
+
+ // Visit SinkBB's ancestors using inverse DFS.
+ auto PredIt = ++idf_begin(&SinkBB);
+ auto PredEnd = idf_end(&SinkBB);
+ while (PredIt != PredEnd) {
+ BasicBlock &PredBB = **PredIt;
+ bool SinkPostDom = PDT.dominates(&SinkBB, &PredBB);
+
+ // If the predecessor is cold and has no predecessors, the entire
+ // function must be cold.
+ if (SinkPostDom && pred_empty(&PredBB)) {
+ ColdRegion.EntireFunctionCold = true;
+ return ColdRegion;
+ }
+
+ // If SinkBB does not post-dominate a predecessor, do not mark the
+ // predecessor (or any of its predecessors) cold.
+ if (!SinkPostDom || !mayExtractBlock(PredBB)) {
+ PredIt.skipChildren();
+ continue;
+ }
+
+ // Keep track of the post-dominated ancestor farthest away from the sink.
+ // The path length is always >= 2, ensuring that predecessor blocks are
+ // considered as entry points before the sink block.
+ unsigned PredScore = getEntryPointScore(PredBB, PredIt.getPathLength());
+ if (PredScore > BestScore) {
+ ColdRegion.SuggestedEntryPoint = &PredBB;
+ BestScore = PredScore;
+ }
+
+ addBlockToRegion(&PredBB, PredScore);
+ ++PredIt;
+ }
+
+ // Add SinkBB to the cold region. It's considered as an entry point before
+ // any sink-successor blocks.
+ addBlockToRegion(&SinkBB, SinkScore);
+
+ // Find all successors of SinkBB dominated by SinkBB using DFS.
+ auto SuccIt = ++df_begin(&SinkBB);
+ auto SuccEnd = df_end(&SinkBB);
+ while (SuccIt != SuccEnd) {
+ BasicBlock &SuccBB = **SuccIt;
+ bool SinkDom = DT.dominates(&SinkBB, &SuccBB);
+
+ // Don't allow the backwards & forwards DFSes to mark the same block.
+ bool DuplicateBlock = RegionBlocks.count(&SuccBB);
+
+ // If SinkBB does not dominate a successor, do not mark the successor (or
+ // any of its successors) cold.
+ if (DuplicateBlock || !SinkDom || !mayExtractBlock(SuccBB)) {
+ SuccIt.skipChildren();
+ continue;
+ }
+
+ unsigned SuccScore = getEntryPointScore(SuccBB, ScoreForSuccBlock);
+ if (SuccScore > BestScore) {
+ ColdRegion.SuggestedEntryPoint = &SuccBB;
+ BestScore = SuccScore;
+ }
+
+ addBlockToRegion(&SuccBB, SuccScore);
+ ++SuccIt;
+ }
+
+ return ColdRegion;
+ }
+
+ /// Whether this region has nothing to extract.
+ bool empty() const { return !SuggestedEntryPoint; }
+
+ /// The blocks in this region.
+ ArrayRef<std::pair<BasicBlock *, unsigned>> blocks() const { return Blocks; }
+
+ /// Whether the entire function containing this region is cold.
+ bool isEntireFunctionCold() const { return EntireFunctionCold; }
+
+ /// Remove a sub-region from this region and return it as a block sequence.
+ BlockSequence takeSingleEntrySubRegion(DominatorTree &DT) {
+ assert(!empty() && !isEntireFunctionCold() && "Nothing to extract");
+
+ // Remove blocks dominated by the suggested entry point from this region.
+ // During the removal, identify the next best entry point into the region.
+ // Ensure that the first extracted block is the suggested entry point.
+ BlockSequence SubRegion = {SuggestedEntryPoint};
+ BasicBlock *NextEntryPoint = nullptr;
+ unsigned NextScore = 0;
+ auto RegionEndIt = Blocks.end();
+ auto RegionStartIt = remove_if(Blocks, [&](const BlockTy &Block) {
+ BasicBlock *BB = Block.first;
+ unsigned Score = Block.second;
+ bool InSubRegion =
+ BB == SuggestedEntryPoint || DT.dominates(SuggestedEntryPoint, BB);
+ if (!InSubRegion && Score > NextScore) {
+ NextEntryPoint = BB;
+ NextScore = Score;
+ }
+ if (InSubRegion && BB != SuggestedEntryPoint)
+ SubRegion.push_back(BB);
+ return InSubRegion;
+ });
+ Blocks.erase(RegionStartIt, RegionEndIt);
+
+ // Update the suggested entry point.
+ SuggestedEntryPoint = NextEntryPoint;
+
+ return SubRegion;
+ }
+};
+
+bool HotColdSplitting::outlineColdRegions(Function &F, ProfileSummaryInfo &PSI,
+ BlockFrequencyInfo *BFI,
+ TargetTransformInfo &TTI,
+ DominatorTree &DT, PostDomTree &PDT,
+ OptimizationRemarkEmitter &ORE) {
+ bool Changed = false;
+
+ // The set of cold blocks.
+ SmallPtrSet<BasicBlock *, 4> ColdBlocks;
+
+ // The worklist of non-intersecting regions left to outline.
+ SmallVector<OutliningRegion, 2> OutliningWorklist;
+
+ // Set up an RPO traversal. Experimentally, this performs better (outlines
+ // more) than a PO traversal, because we prevent region overlap by keeping
+ // the first region to contain a block.
+ ReversePostOrderTraversal<Function *> RPOT(&F);
+
+ // Find all cold regions.
+ for (BasicBlock *BB : RPOT) {
+ // Skip blocks which can't be outlined.
+ if (!mayExtractBlock(*BB))
+ continue;
+
+ // This block is already part of some outlining region.
+ if (ColdBlocks.count(BB))
+ continue;
+
+ bool Cold = PSI.isColdBlock(BB, BFI) ||
+ (EnableStaticAnalyis && unlikelyExecuted(*BB));
+ if (!Cold)
+ continue;
+
+ LLVM_DEBUG({
+ dbgs() << "Found a cold block:\n";
+ BB->dump();
+ });
+
+ auto Region = OutliningRegion::create(*BB, DT, PDT);
+ if (Region.empty())
+ continue;
+
+ if (Region.isEntireFunctionCold()) {
+ LLVM_DEBUG(dbgs() << "Entire function is cold\n");
+ return markEntireFunctionCold(F);
+ }
+
+ // If this outlining region intersects with another, drop the new region.
+ //
+ // TODO: It's theoretically possible to outline more by only keeping the
+ // largest region which contains a block, but the extra bookkeeping to do
+ // this is tricky/expensive.
+ bool RegionsOverlap = any_of(Region.blocks(), [&](const BlockTy &Block) {
+ return !ColdBlocks.insert(Block.first).second;
+ });
+ if (RegionsOverlap)
+ continue;
+
+ OutliningWorklist.emplace_back(std::move(Region));
+ ++NumColdRegionsFound;
+ }
+
+ // Outline single-entry cold regions, splitting up larger regions as needed.
+ unsigned OutlinedFunctionID = 1;
+ while (!OutliningWorklist.empty()) {
+ OutliningRegion Region = OutliningWorklist.pop_back_val();
+ assert(!Region.empty() && "Empty outlining region in worklist");
+ do {
+ BlockSequence SubRegion = Region.takeSingleEntrySubRegion(DT);
+ if (!isProfitableToOutline(SubRegion, TTI)) {
+ LLVM_DEBUG({
+ dbgs() << "Skipping outlining; not profitable to outline\n";
+ SubRegion[0]->dump();
+ });
+ continue;
+ }
+
+ LLVM_DEBUG({
+ dbgs() << "Hot/cold splitting attempting to outline these blocks:\n";
+ for (BasicBlock *BB : SubRegion)
+ BB->dump();
+ });
+
+ Function *Outlined =
+ extractColdRegion(SubRegion, DT, BFI, TTI, ORE, OutlinedFunctionID);
+ if (Outlined) {
+ ++OutlinedFunctionID;
+ OutlinedFunctions.insert(Outlined);
+ Changed = true;
+ }
+ } while (!Region.empty());
+ }
+
+ return Changed;
+}
+
bool HotColdSplitting::run(Module &M) {
bool Changed = false;
+ OutlinedFunctions.clear();
for (auto &F : M) {
if (!shouldOutlineFrom(F)) {
- LLVM_DEBUG(llvm::dbgs() << "Not outlining in " << F.getName() << "\n");
+ LLVM_DEBUG(llvm::dbgs() << "Skipping " << F.getName() << "\n");
continue;
}
-
LLVM_DEBUG(llvm::dbgs() << "Outlining in " << F.getName() << "\n");
DominatorTree DT(F);
PostDomTree PDT(F);
PDT.recalculate(F);
BlockFrequencyInfo *BFI = GetBFI(F);
TargetTransformInfo &TTI = GetTTI(F);
-
- BlockSequence ColdRegion = getLargestColdRegion(F, *PSI, BFI, TTI, DT, PDT);
- if (ColdRegion.empty())
- continue;
-
OptimizationRemarkEmitter &ORE = (*GetORE)(F);
- Function *Outlined =
- extractColdRegion(ColdRegion, DT, BFI, TTI, ORE, /*Count=*/1);
- if (Outlined) {
- OutlinedFunctions.insert(Outlined);
- Changed = true;
- }
+ Changed |= outlineColdRegions(F, *PSI, BFI, TTI, DT, PDT, ORE);
}
return Changed;
}
Added: llvm/trunk/test/Transforms/HotColdSplit/eh-pads.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/HotColdSplit/eh-pads.ll?rev=348639&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/HotColdSplit/eh-pads.ll (added)
+++ llvm/trunk/test/Transforms/HotColdSplit/eh-pads.ll Fri Dec 7 12:23:52 2018
@@ -0,0 +1,39 @@
+; RUN: opt -S -hotcoldsplit < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.14.0"
+
+; CHECK-LABEL: define {{.*}}@foo(
+; CHECK: landingpad
+; CHECK: sideeffect(i32 2)
+
+; CHECK-LABEL: define {{.*}}@foo.cold.1(
+; CHECK: sideeffect(i32 0)
+; CHECK: sideeffect(i32 1)
+; CHECK: sink
+
+define void @foo(i32 %cond) personality i8 0 {
+entry:
+ invoke void @llvm.donothing() to label %normal unwind label %exception
+
+exception:
+ ; Note: EH pads are not candidates for region entry points.
+ %cleanup = landingpad i8 cleanup
+ br label %continue_exception
+
+continue_exception:
+ call void @sideeffect(i32 0)
+ call void @sideeffect(i32 1)
+ call void @sink()
+ ret void
+
+normal:
+ call void @sideeffect(i32 2)
+ ret void
+}
+
+declare void @sideeffect(i32)
+
+declare void @sink() cold
+
+declare void @llvm.donothing() nounwind readnone
Added: llvm/trunk/test/Transforms/HotColdSplit/extraction-subregion-breaks-phis.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/HotColdSplit/extraction-subregion-breaks-phis.ll?rev=348639&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/HotColdSplit/extraction-subregion-breaks-phis.ll (added)
+++ llvm/trunk/test/Transforms/HotColdSplit/extraction-subregion-breaks-phis.ll Fri Dec 7 12:23:52 2018
@@ -0,0 +1,63 @@
+; RUN: opt -S -hotcoldsplit < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.14.0"
+
+; CHECK-LABEL: define {{.*}}@foo(
+; CHECK: call {{.*}}@foo.cold.1(
+; CHECK: unreachable
+
+; CHECK-LABEL: define {{.*}}@foo.cold.1(
+; CHECK: switch i32 undef, label %sw.epilog.i
+define void @foo(i32 %QMM) {
+entry:
+ switch i32 %QMM, label %entry.if.end16_crit_edge [
+ i32 1, label %if.then
+ ]
+
+entry.if.end16_crit_edge: ; preds = %entry
+ br label %if.end16
+
+if.then: ; preds = %entry
+ br i1 undef, label %cond.true.i.i, label %_ZN10StringView8popFrontEv.exit.i
+
+cond.true.i.i: ; preds = %if.then
+ ret void
+
+_ZN10StringView8popFrontEv.exit.i: ; preds = %if.then
+ switch i32 undef, label %sw.epilog.i [
+ i32 81, label %if.end16
+ i32 82, label %sw.bb4.i
+ i32 83, label %sw.bb8.i
+ i32 84, label %sw.bb12.i
+ i32 65, label %if.end16
+ i32 66, label %sw.bb20.i
+ i32 67, label %sw.bb24.i
+ i32 68, label %sw.bb28.i
+ ]
+
+sw.bb4.i: ; preds = %_ZN10StringView8popFrontEv.exit.i
+ br label %if.end16
+
+sw.bb8.i: ; preds = %_ZN10StringView8popFrontEv.exit.i
+ br label %if.end16
+
+sw.bb12.i: ; preds = %_ZN10StringView8popFrontEv.exit.i
+ br label %if.end16
+
+sw.bb20.i: ; preds = %_ZN10StringView8popFrontEv.exit.i
+ br label %if.end16
+
+sw.bb24.i: ; preds = %_ZN10StringView8popFrontEv.exit.i
+ br label %if.end16
+
+sw.bb28.i: ; preds = %_ZN10StringView8popFrontEv.exit.i
+ br label %if.end16
+
+sw.epilog.i: ; preds = %_ZN10StringView8popFrontEv.exit.i
+ br label %if.end16
+
+if.end16: ; preds = %sw.epilog.i, %sw.bb28.i, %sw.bb24.i, %sw.bb20.i, %sw.bb12.i, %sw.bb8.i, %sw.bb4.i, %_ZN10StringView8popFrontEv.exit.i, %_ZN10StringView8popFrontEv.exit.i, %entry.if.end16_crit_edge
+ %0 = phi i8 [ 0, %entry.if.end16_crit_edge ], [ 0, %_ZN10StringView8popFrontEv.exit.i ], [ 0, %_ZN10StringView8popFrontEv.exit.i ], [ 1, %sw.bb4.i ], [ 2, %sw.bb8.i ], [ 3, %sw.bb12.i ], [ 1, %sw.bb20.i ], [ 2, %sw.bb24.i ], [ 3, %sw.bb28.i ], [ 0, %sw.epilog.i ]
+ unreachable
+}
Added: llvm/trunk/test/Transforms/HotColdSplit/forward-dfs-reaches-marked-block.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/HotColdSplit/forward-dfs-reaches-marked-block.ll?rev=348639&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/HotColdSplit/forward-dfs-reaches-marked-block.ll (added)
+++ llvm/trunk/test/Transforms/HotColdSplit/forward-dfs-reaches-marked-block.ll Fri Dec 7 12:23:52 2018
@@ -0,0 +1,29 @@
+; RUN: opt -hotcoldsplit -S < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.14.0"
+
+; CHECK-LABEL: define {{.*}}@fun
+; CHECK: call {{.*}}@fun.cold.1(
+define void @fun() {
+entry:
+ br i1 undef, label %if.then, label %if.else
+
+if.then:
+ ; This will be marked by the inverse DFS on sink-predecesors.
+ br label %sink
+
+sink:
+ call void @sink()
+
+ ; Do not allow the forward-DFS on sink-successors to mark the block again.
+ br i1 undef, label %if.then, label %if.then.exit
+
+if.then.exit:
+ ret void
+
+if.else:
+ ret void
+}
+
+declare void @sink() cold
Added: llvm/trunk/test/Transforms/HotColdSplit/mark-the-whole-func-cold.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/HotColdSplit/mark-the-whole-func-cold.ll?rev=348639&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/HotColdSplit/mark-the-whole-func-cold.ll (added)
+++ llvm/trunk/test/Transforms/HotColdSplit/mark-the-whole-func-cold.ll Fri Dec 7 12:23:52 2018
@@ -0,0 +1,64 @@
+; RUN: opt -S -hotcoldsplit < %s | FileCheck %s
+
+; Source:
+;
+; extern __attribute__((cold)) void sink();
+; extern void sideeffect(int);
+; void foo(int cond1, int cond2) {
+; if (cond1) {
+; if (cond2) {
+; sideeffect(0);
+; } else {
+; sideeffect(1);
+; }
+; sink();
+; } else {
+; sideeffect(2);
+; }
+; sink();
+; }
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.14.0"
+
+; CHECK: define {{.*}}@_Z3fooii{{.*}}#[[outlined_func_attr:[0-9]+]]
+; CHECK-NOT: _Z3fooii.cold
+; CHECK: attributes #[[outlined_func_attr]] = { {{.*}}minsize
+define void @_Z3fooii(i32, i32) {
+ %3 = alloca i32, align 4
+ %4 = alloca i32, align 4
+ store i32 %0, i32* %3, align 4
+ store i32 %1, i32* %4, align 4
+ %5 = load i32, i32* %3, align 4
+ %6 = icmp ne i32 %5, 0
+ br i1 %6, label %7, label %13
+
+; <label>:7: ; preds = %2
+ %8 = load i32, i32* %4, align 4
+ %9 = icmp ne i32 %8, 0
+ br i1 %9, label %10, label %11
+
+; <label>:10: ; preds = %7
+ call void @_Z10sideeffecti(i32 0)
+ br label %12
+
+; <label>:11: ; preds = %7
+ call void @_Z10sideeffecti(i32 1)
+ br label %12
+
+; <label>:12: ; preds = %11, %10
+ call void @_Z4sinkv() #3
+ br label %14
+
+; <label>:13: ; preds = %2
+ call void @_Z10sideeffecti(i32 2)
+ br label %14
+
+; <label>:14: ; preds = %13, %12
+ call void @_Z4sinkv() #3
+ ret void
+}
+
+declare void @_Z10sideeffecti(i32)
+
+declare void @_Z4sinkv() cold
Added: llvm/trunk/test/Transforms/HotColdSplit/outline-disjoint-diamonds.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/HotColdSplit/outline-disjoint-diamonds.ll?rev=348639&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/HotColdSplit/outline-disjoint-diamonds.ll (added)
+++ llvm/trunk/test/Transforms/HotColdSplit/outline-disjoint-diamonds.ll Fri Dec 7 12:23:52 2018
@@ -0,0 +1,57 @@
+; RUN: opt -S -hotcoldsplit < %s 2>&1 | FileCheck %s
+
+; CHECK-LABEL: define {{.*}}@fun
+; CHECK: call {{.*}}@fun.cold.2(
+; CHECK-NEXT: ret void
+; CHECK: call {{.*}}@fun.cold.1(
+; CHECK-NEXT: ret void
+define void @fun() {
+entry:
+ br i1 undef, label %A.then, label %A.else
+
+A.else:
+ br label %A.then4
+
+A.then4:
+ br i1 undef, label %A.then5, label %A.end
+
+A.then5:
+ br label %A.cleanup
+
+A.end:
+ br label %A.cleanup
+
+A.cleanup:
+ %A.cleanup.dest.slot.0 = phi i32 [ 1, %A.then5 ], [ 0, %A.end ]
+ unreachable
+
+A.then:
+ br i1 undef, label %B.then, label %B.else
+
+B.then:
+ ret void
+
+B.else:
+ br label %B.then4
+
+B.then4:
+ br i1 undef, label %B.then5, label %B.end
+
+B.then5:
+ br label %B.cleanup
+
+B.end:
+ br label %B.cleanup
+
+B.cleanup:
+ %B.cleanup.dest.slot.0 = phi i32 [ 1, %B.then5 ], [ 0, %B.end ]
+ unreachable
+}
+
+; CHECK-LABEL: define {{.*}}@fun.cold.1(
+; CHECK: %B.cleanup.dest.slot.0 = phi i32 [ 1, %B.then5 ], [ 0, %B.end ]
+; CHECK-NEXT: unreachable
+
+; CHECK-LABEL: define {{.*}}@fun.cold.2(
+; CHECK: %A.cleanup.dest.slot.0 = phi i32 [ 1, %A.then5 ], [ 0, %A.end ]
+; CHECK-NEXT: unreachable
Added: llvm/trunk/test/Transforms/HotColdSplit/outline-multiple-entry-region.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/HotColdSplit/outline-multiple-entry-region.ll?rev=348639&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/HotColdSplit/outline-multiple-entry-region.ll (added)
+++ llvm/trunk/test/Transforms/HotColdSplit/outline-multiple-entry-region.ll Fri Dec 7 12:23:52 2018
@@ -0,0 +1,81 @@
+; RUN: opt -S -hotcoldsplit < %s | FileCheck %s
+
+; Source:
+;
+; extern __attribute__((cold)) void sink();
+; extern void sideeffect(int);
+; void foo(int cond1, int cond2) {
+; while (true) {
+; if (cond1) {
+; sideeffect(0); // This is cold (it reaches sink()).
+; break;
+; }
+; if (cond2) {
+; sideeffect(1); // This is cold (it reaches sink()).
+; break;
+; }
+; sideeffect(2);
+; return;
+; }
+; sink();
+; sideeffect(3);
+; }
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.14.0"
+
+; CHECK-LABEL: define {{.*}}@_Z3fooii.cold.1
+; CHECK: call void @_Z10sideeffecti(i32 1)
+; CHECK: call void @_Z10sideeffecti(i32 11)
+
+; CHECK-LABEL: define {{.*}}@_Z3fooii.cold.2
+; CHECK: call void @_Z10sideeffecti(i32 0)
+; CHECK: call void @_Z10sideeffecti(i32 10)
+
+; CHECK-LABEL: define {{.*}}@_Z3fooii.cold.3
+; CHECK: call void @_Z4sinkv
+; CHECK: call void @_Z10sideeffecti(i32 3)
+
+define void @_Z3fooii(i32, i32) {
+ %3 = alloca i32, align 4
+ %4 = alloca i32, align 4
+ store i32 %0, i32* %3, align 4
+ store i32 %1, i32* %4, align 4
+ br label %5
+
+; <label>:5: ; preds = %2
+ %6 = load i32, i32* %3, align 4
+ %7 = icmp ne i32 %6, 0
+ br i1 %7, label %8, label %9
+
+; <label>:8: ; preds = %5
+ call void @_Z10sideeffecti(i32 0)
+ call void @_Z10sideeffecti(i32 10)
+ br label %14
+
+; <label>:9: ; preds = %5
+ %10 = load i32, i32* %4, align 4
+ %11 = icmp ne i32 %10, 0
+ br i1 %11, label %12, label %13
+
+; <label>:12: ; preds = %9
+ call void @_Z10sideeffecti(i32 1)
+ call void @_Z10sideeffecti(i32 11)
+ br label %14
+
+; <label>:13: ; preds = %9
+ call void @_Z10sideeffecti(i32 2)
+ br label %15
+
+; <label>:14: ; preds = %12, %8
+ call void @_Z4sinkv() #3
+ call void @_Z10sideeffecti(i32 3)
+ br label %15
+
+; <label>:15: ; preds = %14, %13
+ ret void
+}
+
+declare void @_Z10sideeffecti(i32)
+
+declare void @_Z4sinkv() cold
Modified: llvm/trunk/test/Transforms/HotColdSplit/outline-while-loop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/HotColdSplit/outline-while-loop.ll?rev=348639&r1=348638&r2=348639&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/HotColdSplit/outline-while-loop.ll (original)
+++ llvm/trunk/test/Transforms/HotColdSplit/outline-while-loop.ll Fri Dec 7 12:23:52 2018
@@ -55,10 +55,59 @@ if.end:
ret void
}
+; This is the same as @foo, but the while loop comes after the sink block.
+; CHECK-LABEL: define {{.*}}@while_loop_after_sink(
+; CHECK: br i1 {{.*}}, label %if.end, label %codeRepl
+; CHECK-LABEL: codeRepl:
+; CHECK-NEXT: call void @while_loop_after_sink.cold.1
+; CHECK-LABEL: if.end:
+; CHECK: call void @sideeffect(i32 1)
+define void @while_loop_after_sink(i32 %cond) {
+entry:
+ %tobool = icmp eq i32 %cond, 0
+ br i1 %tobool, label %if.end, label %sink
+
+sink:
+ tail call void (...) @sink()
+ br label %while.cond.preheader
+
+while.cond.preheader:
+ %cmp3 = icmp sgt i32 %cond, 10
+ br i1 %cmp3, label %while.body.preheader, label %while.end
+
+while.body.preheader: ; preds = %while.cond.preheader
+ br label %while.body
+
+while.body: ; preds = %while.body.preheader, %while.body
+ %cond.addr.04 = phi i32 [ %dec, %while.body ], [ %cond, %while.body.preheader ]
+ %dec = add nsw i32 %cond.addr.04, -1
+ tail call void @sideeffect(i32 0) #3
+ %cmp = icmp sgt i32 %dec, 10
+ br i1 %cmp, label %while.body, label %while.end.loopexit
+
+while.end.loopexit: ; preds = %while.body
+ br label %while.end
+
+while.end: ; preds = %while.end.loopexit, %while.cond.preheader
+ ret void
+
+if.end: ; preds = %entry
+ tail call void @sideeffect(i32 1)
+ ret void
+}
+
; CHECK-LABEL: define {{.*}}@foo.cold.1
; CHECK: phi i32
; CHECK-NEXT: add nsw i32
; CHECK-NEXT: call {{.*}}@sideeffect
+; CHECK-NEXT: icmp
+; CHECK-NEXT: br
+
+; CHECK-LABEL: define {{.*}}@while_loop_after_sink.cold.1
+; CHECK: call {{.*}}@sink
+; CHECK: phi i32
+; CHECK-NEXT: add nsw i32
+; CHECK-NEXT: call {{.*}}@sideeffect
; CHECK-NEXT: icmp
; CHECK-NEXT: br
Added: llvm/trunk/test/Transforms/HotColdSplit/phi-with-distinct-outlined-values.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/HotColdSplit/phi-with-distinct-outlined-values.ll?rev=348639&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/HotColdSplit/phi-with-distinct-outlined-values.ll (added)
+++ llvm/trunk/test/Transforms/HotColdSplit/phi-with-distinct-outlined-values.ll Fri Dec 7 12:23:52 2018
@@ -0,0 +1,35 @@
+; RUN: opt -S -hotcoldsplit < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.14.0"
+
+; CHECK-LABEL: define {{.*}}@foo(
+; CHECK: phi i32 [ 0, %entry ], [ %p.ce.reload, %codeRepl ]
+
+; CHECK-LABEL: define {{.*}}@foo.cold.1(
+; CHECK: call {{.*}}@sink
+; CHECK: %p.ce = phi i32 [ 1, %coldbb ], [ 3, %coldbb2 ]
+; CHECK-NEXT: store i32 %p.ce, i32* %p.ce.out
+
+define void @foo(i32 %cond) {
+entry:
+ %tobool = icmp eq i32 %cond, 0
+ br i1 %tobool, label %if.end, label %coldbb
+
+coldbb:
+ call void @sink()
+ call void @sideeffect()
+ call void @sideeffect()
+ br i1 undef, label %if.end, label %coldbb2
+
+coldbb2:
+ br label %if.end
+
+if.end:
+ %p = phi i32 [0, %entry], [1, %coldbb], [3, %coldbb2]
+ ret void
+}
+
+declare void @sink() cold
+
+declare void @sideeffect()
Added: llvm/trunk/test/Transforms/HotColdSplit/region-overlap.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/HotColdSplit/region-overlap.ll?rev=348639&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/HotColdSplit/region-overlap.ll (added)
+++ llvm/trunk/test/Transforms/HotColdSplit/region-overlap.ll Fri Dec 7 12:23:52 2018
@@ -0,0 +1,65 @@
+; RUN: opt -S -hotcoldsplit < %s | FileCheck %s
+
+; Source:
+;
+; __attribute__((cold)) extern void sink(int);
+; extern void sideeffect(int);
+; void foo(int cond1, int cond2) {
+; if (cond1) {
+; if (cond2) { // This is the first cold region we visit.
+; sideeffect(0);
+; sideeffect(10);
+; sink(0);
+; }
+;
+; // There's a larger, overlapping cold region here. But we ignore it.
+; // This could be improved.
+; sideeffect(1);
+; sideeffect(11);
+; sink(1);
+; }
+; }
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.14.0"
+
+; CHECK-LABEL: define {{.*}}@_Z3fooii
+; CHECK: call {{.*}}@_Z3fooii.cold.1
+; CHECK-NOT: _Z3fooii.cold
+define void @_Z3fooii(i32, i32) {
+ %3 = alloca i32, align 4
+ %4 = alloca i32, align 4
+ store i32 %0, i32* %3, align 4
+ store i32 %1, i32* %4, align 4
+ %5 = load i32, i32* %3, align 4
+ %6 = icmp ne i32 %5, 0
+ br i1 %6, label %7, label %12
+
+; <label>:7: ; preds = %2
+ %8 = load i32, i32* %4, align 4
+ %9 = icmp ne i32 %8, 0
+ br i1 %9, label %10, label %11
+
+; <label>:10: ; preds = %7
+ call void @_Z10sideeffecti(i32 0)
+ call void @_Z10sideeffecti(i32 10)
+ call void @_Z4sinki(i32 0) #3
+ br label %11
+
+; <label>:11: ; preds = %10, %7
+ call void @_Z10sideeffecti(i32 1)
+ call void @_Z10sideeffecti(i32 11)
+ call void @_Z4sinki(i32 1) #3
+ br label %12
+
+; <label>:12: ; preds = %11, %2
+ ret void
+}
+
+; CHECK-LABEL: define {{.*}}@_Z3fooii.cold.1
+; CHECK: call void @_Z10sideeffecti(i32 0)
+; CHECK: call void @_Z10sideeffecti(i32 10)
+
+declare void @_Z10sideeffecti(i32)
+
+declare void @_Z4sinki(i32) cold
Added: llvm/trunk/test/Transforms/HotColdSplit/succ-block-with-self-edge.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/HotColdSplit/succ-block-with-self-edge.ll?rev=348639&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/HotColdSplit/succ-block-with-self-edge.ll (added)
+++ llvm/trunk/test/Transforms/HotColdSplit/succ-block-with-self-edge.ll Fri Dec 7 12:23:52 2018
@@ -0,0 +1,56 @@
+; RUN: opt -S -hotcoldsplit < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.14.0"
+
+; CHECK-LABEL: define {{.*}}@exit_block_with_same_incoming_vals
+; CHECK: call {{.*}}@exit_block_with_same_incoming_vals.cold.1(
+; CHECK-NOT: br i1 undef
+; CHECK: phi i32 [ 0, %entry ], [ %p.ce.reload, %codeRepl ]
+define void @exit_block_with_same_incoming_vals(i32 %cond) {
+entry:
+ %tobool = icmp eq i32 %cond, 0
+ br i1 %tobool, label %if.end, label %coldbb
+
+coldbb:
+ call void @sink()
+ call void @sideeffect()
+ call void @sideeffect()
+ br i1 undef, label %if.end, label %coldbb2
+
+coldbb2:
+ %p2 = phi i32 [0, %coldbb], [1, %coldbb2]
+ br i1 undef, label %if.end, label %coldbb2
+
+if.end:
+ %p = phi i32 [0, %entry], [1, %coldbb], [1, %coldbb2]
+ ret void
+}
+
+; CHECK-LABEL: define {{.*}}@exit_block_with_distinct_incoming_vals
+; CHECK: call {{.*}}@exit_block_with_distinct_incoming_vals.cold.1(
+; CHECK-NOT: br i1 undef
+; CHECK: phi i32 [ 0, %entry ], [ %p.ce.reload, %codeRepl ]
+define void @exit_block_with_distinct_incoming_vals(i32 %cond) {
+entry:
+ %tobool = icmp eq i32 %cond, 0
+ br i1 %tobool, label %if.end, label %coldbb
+
+coldbb:
+ call void @sink()
+ call void @sideeffect()
+ call void @sideeffect()
+ br i1 undef, label %if.end, label %coldbb2
+
+coldbb2:
+ %p2 = phi i32 [0, %coldbb], [1, %coldbb2]
+ br i1 undef, label %if.end, label %coldbb2
+
+if.end:
+ %p = phi i32 [0, %entry], [1, %coldbb], [2, %coldbb2]
+ ret void
+}
+
+declare void @sink() cold
+
+declare void @sideeffect()
More information about the llvm-commits
mailing list