[llvm] [HCS] Externd to outline overlapping sub/super cold regions (PR #80732)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 16 17:40:17 PST 2024
https://github.com/hiraditya updated https://github.com/llvm/llvm-project/pull/80732
>From 8126edf5b6c12b5cf6ffd3371187970f9aaa35b1 Mon Sep 17 00:00:00 2001
From: Shimin Cui <scui at ca.ibm.com>
Date: Mon, 5 Feb 2024 12:08:29 -0500
Subject: [PATCH] [HCS] Externd to outline overlapping sub/super cold regions
---
.../llvm/Transforms/IPO/HotColdSplitting.h | 15 +-
llvm/lib/Transforms/IPO/HotColdSplitting.cpp | 151 ++++++++++--------
.../assumption-cache-invalidation.ll | 8 +-
llvm/test/Transforms/HotColdSplit/eh-pads.ll | 7 +-
.../HotColdSplit/outline-disjoint-diamonds.ll | 9 +-
.../HotColdSplit/outline-inner-region.ll | 49 ++++++
.../HotColdSplit/outline-outer-region.ll | 52 ++++++
7 files changed, 207 insertions(+), 84 deletions(-)
create mode 100644 llvm/test/Transforms/HotColdSplit/outline-inner-region.ll
create mode 100644 llvm/test/Transforms/HotColdSplit/outline-outer-region.ll
diff --git a/llvm/include/llvm/Transforms/IPO/HotColdSplitting.h b/llvm/include/llvm/Transforms/IPO/HotColdSplitting.h
index c87c6453500c57..02196ab1df6996 100644
--- a/llvm/include/llvm/Transforms/IPO/HotColdSplitting.h
+++ b/llvm/include/llvm/Transforms/IPO/HotColdSplitting.h
@@ -24,6 +24,7 @@ class TargetTransformInfo;
class OptimizationRemarkEmitter;
class AssumptionCache;
class DominatorTree;
+class CodeExtractor;
class CodeExtractorAnalysisCache;
/// A sequence of basic blocks.
@@ -43,19 +44,17 @@ class HotColdSplitting {
private:
bool isFunctionCold(const Function &F) const;
- bool isBasicBlockCold(BasicBlock* BB,
- BranchProbability ColdProbThresh,
- SmallPtrSetImpl<BasicBlock *> &ColdBlocks,
+ bool isBasicBlockCold(BasicBlock *BB, BranchProbability ColdProbThresh,
SmallPtrSetImpl<BasicBlock *> &AnnotatedColdBlocks,
BlockFrequencyInfo *BFI) const;
bool shouldOutlineFrom(const Function &F) const;
bool outlineColdRegions(Function &F, bool HasProfileSummary);
- Function *extractColdRegion(const BlockSequence &Region,
+ bool isSplittingBeneficial(CodeExtractor &CE, const BlockSequence &Region,
+ TargetTransformInfo &TTI);
+ Function *extractColdRegion(BasicBlock *BB, CodeExtractor &CE,
const CodeExtractorAnalysisCache &CEAC,
- DominatorTree &DT, BlockFrequencyInfo *BFI,
- TargetTransformInfo &TTI,
- OptimizationRemarkEmitter &ORE,
- AssumptionCache *AC, unsigned Count);
+ BlockFrequencyInfo *BFI, TargetTransformInfo &TTI,
+ OptimizationRemarkEmitter &ORE);
ProfileSummaryInfo *PSI;
function_ref<BlockFrequencyInfo *(Function &)> GetBFI;
function_ref<TargetTransformInfo &(Function &)> GetTTI;
diff --git a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp
index fabb3c5fb921df..3e06349fc87b4b 100644
--- a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp
+++ b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp
@@ -215,15 +215,10 @@ bool HotColdSplitting::isFunctionCold(const Function &F) const {
return false;
}
-bool HotColdSplitting::isBasicBlockCold(BasicBlock *BB,
- BranchProbability ColdProbThresh,
- SmallPtrSetImpl<BasicBlock *> &ColdBlocks,
- SmallPtrSetImpl<BasicBlock *> &AnnotatedColdBlocks,
- BlockFrequencyInfo *BFI) const {
- // This block is already part of some outlining region.
- if (ColdBlocks.count(BB))
- return true;
-
+bool HotColdSplitting::isBasicBlockCold(
+ BasicBlock *BB, BranchProbability ColdProbThresh,
+ SmallPtrSetImpl<BasicBlock *> &AnnotatedColdBlocks,
+ BlockFrequencyInfo *BFI) const {
if (BFI) {
if (PSI->isColdBlock(BB, BFI))
return true;
@@ -372,18 +367,11 @@ static int getOutliningPenalty(ArrayRef<BasicBlock *> Region,
return Penalty;
}
-Function *HotColdSplitting::extractColdRegion(
- const BlockSequence &Region, const CodeExtractorAnalysisCache &CEAC,
- DominatorTree &DT, BlockFrequencyInfo *BFI, TargetTransformInfo &TTI,
- OptimizationRemarkEmitter &ORE, AssumptionCache *AC, unsigned Count) {
+bool HotColdSplitting::isSplittingBeneficial(CodeExtractor &CE,
+ const BlockSequence &Region,
+ TargetTransformInfo &TTI) {
assert(!Region.empty());
- // TODO: Pass BFI and BPI to update profile information.
- CodeExtractor CE(Region, &DT, /* AggregateArgs */ false, /* BFI */ nullptr,
- /* BPI */ nullptr, AC, /* AllowVarArgs */ false,
- /* AllowAlloca */ false, /* AllocaBlock */ nullptr,
- /* Suffix */ "cold." + std::to_string(Count));
-
// Perform a simple cost/benefit analysis to decide whether or not to permit
// splitting.
SetVector<Value *> Inputs, Outputs, Sinks;
@@ -394,9 +382,16 @@ Function *HotColdSplitting::extractColdRegion(
LLVM_DEBUG(dbgs() << "Split profitability: benefit = " << OutliningBenefit
<< ", penalty = " << OutliningPenalty << "\n");
if (!OutliningBenefit.isValid() || OutliningBenefit <= OutliningPenalty)
- return nullptr;
+ return false;
+
+ return true;
+}
- Function *OrigF = Region[0]->getParent();
+Function *HotColdSplitting::extractColdRegion(
+ BasicBlock *BB, CodeExtractor &CE, const CodeExtractorAnalysisCache &CEAC,
+ BlockFrequencyInfo *BFI, TargetTransformInfo &TTI,
+ OptimizationRemarkEmitter &ORE) {
+ Function *OrigF = BB->getParent();
if (Function *OutF = CE.extractCodeRegion(CEAC)) {
User *U = *OutF->user_begin();
CallInst *CI = cast<CallInst>(U);
@@ -418,8 +413,7 @@ Function *HotColdSplitting::extractColdRegion(
LLVM_DEBUG(llvm::dbgs() << "Outlined Region: " << *OutF);
ORE.emit([&]() {
- return OptimizationRemark(DEBUG_TYPE, "HotColdSplit",
- &*Region[0]->begin())
+ return OptimizationRemark(DEBUG_TYPE, "HotColdSplit", &*BB->begin())
<< ore::NV("Original", OrigF) << " split cold code into "
<< ore::NV("Split", OutF);
});
@@ -427,10 +421,8 @@ Function *HotColdSplitting::extractColdRegion(
}
ORE.emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
- &*Region[0]->begin())
- << "Failed to extract region at block "
- << ore::NV("Block", Region.front());
+ return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed", &*BB->begin())
+ << "Failed to extract region at block " << ore::NV("Block", BB);
});
return nullptr;
}
@@ -620,16 +612,17 @@ class OutliningRegion {
} // namespace
bool HotColdSplitting::outlineColdRegions(Function &F, bool HasProfileSummary) {
- bool Changed = false;
-
- // The set of cold blocks.
+ // The set of cold blocks outlined.
SmallPtrSet<BasicBlock *, 4> ColdBlocks;
+ // The set of cold blocks cannot be outlined.
+ SmallPtrSet<BasicBlock *, 4> CannotBeOutlinedColdBlocks;
+
// Set of cold blocks obtained with RPOT.
SmallPtrSet<BasicBlock *, 4> AnnotatedColdBlocks;
// The worklist of non-intersecting regions left to outline.
- SmallVector<OutliningRegion, 2> OutliningWorklist;
+ SmallVector<std::pair<BasicBlock *, CodeExtractor>, 2> OutliningWorklist;
// Set up an RPO traversal. Experimentally, this performs better (outlines
// more) than a PO traversal, because we prevent region overlap by keeping
@@ -655,10 +648,18 @@ bool HotColdSplitting::outlineColdRegions(Function &F, bool HasProfileSummary) {
if (ColdBranchProbDenom.getNumOccurrences())
ColdProbThresh = BranchProbability(1, ColdBranchProbDenom.getValue());
+ unsigned OutlinedFunctionID = 1;
// Find all cold regions.
for (BasicBlock *BB : RPOT) {
- if (!isBasicBlockCold(BB, ColdProbThresh, ColdBlocks, AnnotatedColdBlocks,
- BFI))
+ // This block is already part of some outlining region.
+ if (ColdBlocks.count(BB))
+ continue;
+
+ // This block is already part of some region cannot be outlined.
+ if (CannotBeOutlinedColdBlocks.count(BB))
+ continue;
+
+ if (!isBasicBlockCold(BB, ColdProbThresh, AnnotatedColdBlocks, BFI))
continue;
LLVM_DEBUG({
@@ -681,50 +682,68 @@ bool HotColdSplitting::outlineColdRegions(Function &F, bool HasProfileSummary) {
return markFunctionCold(F);
}
- // If this outlining region intersects with another, drop the new region.
- //
- // TODO: It's theoretically possible to outline more by only keeping the
- // largest region which contains a block, but the extra bookkeeping to do
- // this is tricky/expensive.
- bool RegionsOverlap = any_of(Region.blocks(), [&](const BlockTy &Block) {
- return !ColdBlocks.insert(Block.first).second;
- });
- if (RegionsOverlap)
- continue;
+ do {
+ BlockSequence SubRegion = Region.takeSingleEntrySubRegion(*DT);
+ LLVM_DEBUG({
+ dbgs() << "Hot/cold splitting attempting to outline these blocks:\n";
+ for (BasicBlock *BB : SubRegion)
+ BB->dump();
+ });
+
+ // TODO: Pass BFI and BPI to update profile information.
+ CodeExtractor CE(
+ SubRegion, &*DT, /* AggregateArgs */ false, /* BFI */ nullptr,
+ /* BPI */ nullptr, AC, /* AllowVarArgs */ false,
+ /* AllowAlloca */ false, /* AllocaBlock */ nullptr,
+ /* Suffix */ "cold." + std::to_string(OutlinedFunctionID));
+
+ if (CE.isEligible() && isSplittingBeneficial(CE, SubRegion, TTI) &&
+ // If this outlining region intersects with another, drop the new
+ // region.
+ //
+ // TODO: It's theoretically possible to outline more by only keeping
+ // the largest region which contains a block, but the extra
+ // bookkeeping to do this is tricky/expensive.
+ none_of(SubRegion, [&](BasicBlock *Block) {
+ return ColdBlocks.contains(Block);
+ })) {
+ ColdBlocks.insert(SubRegion.begin(), SubRegion.end());
+
+ for (auto *Block : SubRegion) {
+ LLVM_DEBUG(dbgs()
+ << " contains cold block:" << Block->getName() << "\n");
+ }
+
+ OutliningWorklist.emplace_back(
+ std::make_pair(SubRegion[0], std::move(CE)));
+ ++OutlinedFunctionID;
+ } else {
+ // The cold block region cannot be outlined.
+ for (auto *Block : SubRegion)
+ if ((DT->dominates(BB, Block) && PDT->dominates(Block, BB)) ||
+ (PDT->dominates(BB, Block) && DT->dominates(Block, BB)))
+ // Will skip this cold block in the loop to save the compile time
+ CannotBeOutlinedColdBlocks.insert(Block);
+ }
+ } while (!Region.empty());
- OutliningWorklist.emplace_back(std::move(Region));
++NumColdRegionsFound;
}
}
if (OutliningWorklist.empty())
- return Changed;
+ return false;
// Outline single-entry cold regions, splitting up larger regions as needed.
- unsigned OutlinedFunctionID = 1;
// Cache and recycle the CodeExtractor analysis to avoid O(n^2) compile-time.
CodeExtractorAnalysisCache CEAC(F);
- do {
- OutliningRegion Region = OutliningWorklist.pop_back_val();
- assert(!Region.empty() && "Empty outlining region in worklist");
- do {
- BlockSequence SubRegion = Region.takeSingleEntrySubRegion(*DT);
- LLVM_DEBUG({
- dbgs() << "Hot/cold splitting attempting to outline these blocks:\n";
- for (BasicBlock *BB : SubRegion)
- BB->dump();
- });
-
- Function *Outlined = extractColdRegion(SubRegion, CEAC, *DT, BFI, TTI,
- ORE, AC, OutlinedFunctionID);
- if (Outlined) {
- ++OutlinedFunctionID;
- Changed = true;
- }
- } while (!Region.empty());
- } while (!OutliningWorklist.empty());
+ for (auto &BCE : OutliningWorklist) {
+ Function *Outlined =
+ extractColdRegion(BCE.first, BCE.second, CEAC, BFI, TTI, ORE);
+ assert(Outlined && "Should be outlined");
+ }
- return Changed;
+ return true;
}
bool HotColdSplitting::run(Module &M) {
diff --git a/llvm/test/Transforms/HotColdSplit/assumption-cache-invalidation.ll b/llvm/test/Transforms/HotColdSplit/assumption-cache-invalidation.ll
index 2154fb5cb5bc1e..8bc71148352d2c 100644
--- a/llvm/test/Transforms/HotColdSplit/assumption-cache-invalidation.ll
+++ b/llvm/test/Transforms/HotColdSplit/assumption-cache-invalidation.ll
@@ -13,13 +13,13 @@ target triple = "aarch64"
; CHECK-NOT: @llvm.assume
; CHECK: }
; CHECK: declare {{.*}}@llvm.assume
-; CHECK: define {{.*}}@f.cold.1()
-; CHECK-LABEL: newFuncRoot:
-; CHECK: }
-; CHECK: define {{.*}}@f.cold.2(i64 %load1)
+; CHECK: define {{.*}}@f.cold.1(i64 %load1)
; CHECK-LABEL: newFuncRoot:
; CHECK: %cmp1 = icmp eq i64 %load1, 0
; CHECK-NOT: call void @llvm.assume
+; CHECK: define {{.*}}@f.cold.2()
+; CHECK-LABEL: newFuncRoot:
+; CHECK: }
define void @f() {
entry:
diff --git a/llvm/test/Transforms/HotColdSplit/eh-pads.ll b/llvm/test/Transforms/HotColdSplit/eh-pads.ll
index 415c7e4b2bde3b..ad7baf97f68d0c 100644
--- a/llvm/test/Transforms/HotColdSplit/eh-pads.ll
+++ b/llvm/test/Transforms/HotColdSplit/eh-pads.ll
@@ -84,13 +84,16 @@ cold4:
; CHECK: sink
; CHECK-LABEL: define {{.*}}@bar.cold.1(
+; CHECK: sideeffect(i32 0)
+
+; CHECK-LABEL: define {{.*}}@bar.cold.2(
; CHECK: sideeffect(i32 1)
; CHECK-LABEL: define {{.*}}@baz.cold.1(
-; CHECK: sideeffect(i32 1)
+; CHECK: sideeffect(i32 0)
; CHECK-LABEL: define {{.*}}@baz.cold.2(
-; CHECK: sideeffect(i32 0)
+; CHECK: sideeffect(i32 1)
declare void @sideeffect(i32)
diff --git a/llvm/test/Transforms/HotColdSplit/outline-disjoint-diamonds.ll b/llvm/test/Transforms/HotColdSplit/outline-disjoint-diamonds.ll
index 65f8aad4240667..0c055981260b2b 100644
--- a/llvm/test/Transforms/HotColdSplit/outline-disjoint-diamonds.ll
+++ b/llvm/test/Transforms/HotColdSplit/outline-disjoint-diamonds.ll
@@ -1,10 +1,10 @@
; RUN: opt -S -passes=hotcoldsplit -hotcoldsplit-threshold=-1 < %s 2>&1 | FileCheck %s
; CHECK-LABEL: define {{.*}}@fun
-; CHECK: call {{.*}}@fun.cold.2(
-; CHECK-NEXT: ret void
; CHECK: call {{.*}}@fun.cold.1(
; CHECK-NEXT: ret void
+; CHECK: call {{.*}}@fun.cold.2(
+; CHECK-NEXT: ret void
define void @fun() {
entry:
br i1 undef, label %A.then, label %A.else
@@ -49,9 +49,10 @@ B.cleanup:
}
; CHECK-LABEL: define {{.*}}@fun.cold.1(
-; CHECK: %B.cleanup.dest.slot.0 = phi i32 [ 1, %B.then5 ], [ 0, %B.end ]
+; CHECK: %A.cleanup.dest.slot.0 = phi i32 [ 1, %A.then5 ], [ 0, %A.end ]
; CHECK-NEXT: unreachable
; CHECK-LABEL: define {{.*}}@fun.cold.2(
-; CHECK: %A.cleanup.dest.slot.0 = phi i32 [ 1, %A.then5 ], [ 0, %A.end ]
+; CHECK: %B.cleanup.dest.slot.0 = phi i32 [ 1, %B.then5 ], [ 0, %B.end ]
; CHECK-NEXT: unreachable
+
diff --git a/llvm/test/Transforms/HotColdSplit/outline-inner-region.ll b/llvm/test/Transforms/HotColdSplit/outline-inner-region.ll
new file mode 100644
index 00000000000000..73398bf365ff04
--- /dev/null
+++ b/llvm/test/Transforms/HotColdSplit/outline-inner-region.ll
@@ -0,0 +1,49 @@
+; RUN: opt -S -passes=hotcoldsplit -hotcoldsplit-max-params=1 < %s | FileCheck %s
+
+target datalayout = "E-m:a-p:32:32-i64:64-n32"
+target triple = "powerpc64-ibm-aix7.2.0.0"
+
+define void @foo(i32 %cond) {
+; CHECK-LABEL: define {{.*}}@foo(
+; CHECK: if.then:
+; CHECK: br i1 {{.*}}, label %if.then1, label %codeRepl
+; CHECK-LABEL: codeRepl:
+; CHECK-NEXT: call void @foo.cold.1
+;
+entry:
+ %cond.addr = alloca i32
+ store i32 %cond, ptr %cond.addr
+ %0 = load i32, ptr %cond.addr
+ %tobool = icmp ne i32 %0, 0
+ br i1 %tobool, label %if.then, label %if.end2
+
+if.then: ; preds = %entry
+ %1 = load i32, ptr %cond.addr
+ call void @sink(i32 %0)
+ %cmp = icmp sgt i32 %1, 10
+ br i1 %cmp, label %if.then1, label %if.else
+
+if.then1: ; preds = %if.then
+ call void @sideeffect(i32 2)
+ br label %if.end
+
+if.else: ; preds = %if.then
+ call void @sink(i32 0)
+ call void @sideeffect(i32 0)
+ br label %if.end
+
+if.end: ; preds = %if.else, %if.then1
+ br label %if.end2
+
+if.end2: ; preds = %entry
+ call void @sideeffect(i32 1)
+ ret void
+}
+
+; CHECK-LABEL: define {{.*}}@foo.cold.1
+; CHECK: call {{.*}}@sink
+; CHECK-NEXT: call {{.*}}@sideeffect
+
+declare void @sideeffect(i32)
+
+declare void @sink(i32) cold
diff --git a/llvm/test/Transforms/HotColdSplit/outline-outer-region.ll b/llvm/test/Transforms/HotColdSplit/outline-outer-region.ll
new file mode 100644
index 00000000000000..4a3c96982a87b8
--- /dev/null
+++ b/llvm/test/Transforms/HotColdSplit/outline-outer-region.ll
@@ -0,0 +1,52 @@
+; RUN: opt -S -passes=hotcoldsplit -hotcoldsplit-threshold=2 < %s | FileCheck %s
+
+target datalayout = "E-m:a-p:32:32-i64:64-n32"
+target triple = "powerpc64-ibm-aix7.2.0.0"
+
+define void @foo(i32 %cond, i32 %s0, i32 %s1) {
+; CHECK-LABEL: define {{.*}}@foo(
+; CHECK: br i1 {{.*}}, label %codeRepl, label %if.end2
+; CHECK-LABEL: codeRepl:
+; CHECK-NEXT: call void @foo.cold.1
+; CHECK-LABEL: if.end2:
+; CHECK: call void @sideeffect
+;
+entry:
+ %cond.addr = alloca i32
+ store i32 %cond, ptr %cond.addr
+ %0 = load i32, ptr %cond.addr
+ %tobool = icmp ne i32 %0, 0
+ br i1 %tobool, label %if.then, label %if.end2
+
+if.then: ; preds = %entry
+ %1 = load i32, ptr %cond.addr
+ %cmp = icmp sgt i32 %1, 10
+ br i1 %cmp, label %if.then1, label %if.else
+
+if.then1: ; preds = %if.then
+ call void @sideeffect(i32 0)
+ br label %if.end
+
+if.else: ; preds = %if.then
+ call void @sink(i32 %s0)
+ call void @sideeffect(i32 1)
+ br label %if.end
+
+if.end: ; preds = %if.else, %if.then1
+ call void @sink(i32 %0)
+ ret void
+
+if.end2: ; preds = %entry
+ call void @sideeffect(i32 %s1)
+ ret void
+}
+
+; CHECK-LABEL: define {{.*}}@foo.cold.1
+; CHECK: call {{.*}}@sink
+; CHECK: call {{.*}}@sideeffect
+; CHECK: call {{.*}}@sideeffect
+; CHECK: call {{.*}}@sink
+
+declare void @sideeffect(i32)
+
+declare void @sink(i32) cold
More information about the llvm-commits
mailing list