[llvm-branch-commits] [llvm] 1ab4db0 - [HotColdSplit] Reflect full cost of parameters in split penalty
Aditya Kumar via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Dec 18 17:11:08 PST 2020
Author: Aditya Kumar
Date: 2020-12-18T17:06:17-08:00
New Revision: 1ab4db0f847fa1ddd394dbf54a5051b626eab160
URL: https://github.com/llvm/llvm-project/commit/1ab4db0f847fa1ddd394dbf54a5051b626eab160
DIFF: https://github.com/llvm/llvm-project/commit/1ab4db0f847fa1ddd394dbf54a5051b626eab160.diff
LOG: [HotColdSplit] Reflect full cost of parameters in split penalty
Make the penalty for splitting a region more accurately reflect the cost
of materializing all of the inputs/outputs to/from the region.
This almost entirely eliminates code growth within functions which
undergo splitting in key internal frameworks, and reduces the size of
those frameworks between 2.6% to 3%.
rdar://49167240
Patch by: Vedant Kumar(@vsk)
Reviewers: hiraditya,rjf,t.p.northover
Reviewed By: hiraditya,rjf
Differential Revision: https://reviews.llvm.org/D59715
Added:
Modified:
llvm/lib/Transforms/IPO/HotColdSplitting.cpp
llvm/test/Transforms/CodeExtractor/extract-assume.ll
llvm/test/Transforms/HotColdSplit/apply-penalty-for-inputs.ll
llvm/test/Transforms/HotColdSplit/apply-penalty-for-outputs.ll
llvm/test/Transforms/HotColdSplit/apply-successor-penalty.ll
llvm/test/Transforms/HotColdSplit/assumption-cache-invalidation.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp
index b25b789d1dae..aa708ee520b1 100644
--- a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp
+++ b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp
@@ -67,6 +67,7 @@
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <algorithm>
+#include <limits>
#include <cassert>
#include <string>
@@ -96,6 +97,10 @@ static cl::opt<std::string>
cl::desc("Name for the section containing cold functions "
"extracted by hot-cold splitting."));
+static cl::opt<int> MaxParametersForSplit(
+ "hotcoldsplit-max-params", cl::init(4), cl::Hidden,
+ cl::desc("Maximum number of parameters for a split function"));
+
namespace {
// Same as blockEndsInUnreachable in CodeGen/BranchFolding.cpp. Do not modify
// this function unless you modify the MBB version as well.
@@ -257,18 +262,6 @@ static int getOutliningPenalty(ArrayRef<BasicBlock *> Region,
if (SplittingThreshold <= 0)
return Penalty;
- // The typical code size cost for materializing an argument for the outlined
- // call.
- LLVM_DEBUG(dbgs() << "Applying penalty for: " << NumInputs << " inputs\n");
- const int CostForArgMaterialization = TargetTransformInfo::TCC_Basic;
- Penalty += CostForArgMaterialization * NumInputs;
-
- // The typical code size cost for an output alloca, its associated store, and
- // its associated reload.
- LLVM_DEBUG(dbgs() << "Applying penalty for: " << NumOutputs << " outputs\n");
- const int CostForRegionOutput = 3 * TargetTransformInfo::TCC_Basic;
- Penalty += CostForRegionOutput * NumOutputs;
-
// Find the number of distinct exit blocks for the region. Use a conservative
// check to determine whether control returns from the region.
bool NoBlocksReturn = true;
@@ -289,6 +282,48 @@ static int getOutliningPenalty(ArrayRef<BasicBlock *> Region,
}
}
+ // Count the number of phis in exit blocks with >= 2 incoming values from the
+ // outlining region. These phis are split (\ref severSplitPHINodesOfExits),
+ // and new outputs are created to supply the split phis. CodeExtractor can't
+ // report these new outputs until extraction begins, but it's important to
+ // factor the cost of the outputs into the cost calculation.
+ unsigned NumSplitExitPhis = 0;
+ for (BasicBlock *ExitBB : SuccsOutsideRegion) {
+ for (PHINode &PN : ExitBB->phis()) {
+ // Find all incoming values from the outlining region.
+ int NumIncomingVals = 0;
+ for (unsigned i = 0; i < PN.getNumIncomingValues(); ++i)
+ if (find(Region, PN.getIncomingBlock(i)) != Region.end()) {
+ ++NumIncomingVals;
+ if (NumIncomingVals > 1) {
+ ++NumSplitExitPhis;
+ break;
+ }
+ }
+ }
+ }
+
+ // Apply a penalty for calling the split function. Factor in the cost of
+ // materializing all of the parameters.
+ int NumOutputsAndSplitPhis = NumOutputs + NumSplitExitPhis;
+ int NumParams = NumInputs + NumOutputsAndSplitPhis;
+ if (NumParams > MaxParametersForSplit) {
+ LLVM_DEBUG(dbgs() << NumInputs << " inputs and " << NumOutputsAndSplitPhis
+ << " outputs exceeds parameter limit ("
+ << MaxParametersForSplit << ")\n");
+ return std::numeric_limits<int>::max();
+ }
+ const int CostForArgMaterialization = 2 * TargetTransformInfo::TCC_Basic;
+ LLVM_DEBUG(dbgs() << "Applying penalty for: " << NumParams << " params\n");
+ Penalty += CostForArgMaterialization * NumParams;
+
+ // Apply the typical code size cost for an output alloca and its associated
+ // reload in the caller. Also penalize the associated store in the callee.
+ LLVM_DEBUG(dbgs() << "Applying penalty for: " << NumOutputsAndSplitPhis
+ << " outputs/split phis\n");
+ const int CostForRegionOutput = 3 * TargetTransformInfo::TCC_Basic;
+ Penalty += CostForRegionOutput * NumOutputsAndSplitPhis;
+
// Apply a `noreturn` bonus.
if (NoBlocksReturn) {
LLVM_DEBUG(dbgs() << "Applying bonus for: " << Region.size()
@@ -298,7 +333,7 @@ static int getOutliningPenalty(ArrayRef<BasicBlock *> Region,
// Apply a penalty for having more than one successor outside of the region.
// This penalty accounts for the switch needed in the caller.
- if (!SuccsOutsideRegion.empty()) {
+ if (SuccsOutsideRegion.size() > 1) {
LLVM_DEBUG(dbgs() << "Applying penalty for: " << SuccsOutsideRegion.size()
<< " non-region successors\n");
Penalty += (SuccsOutsideRegion.size() - 1) * TargetTransformInfo::TCC_Basic;
diff --git a/llvm/test/Transforms/CodeExtractor/extract-assume.ll b/llvm/test/Transforms/CodeExtractor/extract-assume.ll
index bf0d2ecb2d6b..ffba771856fb 100644
--- a/llvm/test/Transforms/CodeExtractor/extract-assume.ll
+++ b/llvm/test/Transforms/CodeExtractor/extract-assume.ll
@@ -1,4 +1,4 @@
-; RUN: opt -passes="function(slp-vectorizer),module(hotcoldsplit),function(slp-vectorizer,print<assumptions>)" -disable-output %s 2>&1 | FileCheck %s
+; RUN: opt -passes="function(slp-vectorizer),module(hotcoldsplit),function(slp-vectorizer,print<assumptions>)" -hotcoldsplit-threshold=-1 -disable-output %s 2>&1 | FileCheck %s
;
; Make sure this compiles. Check that function assumption cache is refreshed
; after extracting blocks with assume calls from the function.
diff --git a/llvm/test/Transforms/HotColdSplit/apply-penalty-for-inputs.ll b/llvm/test/Transforms/HotColdSplit/apply-penalty-for-inputs.ll
index fffd6f9f5dcf..4906316816e9 100644
--- a/llvm/test/Transforms/HotColdSplit/apply-penalty-for-inputs.ll
+++ b/llvm/test/Transforms/HotColdSplit/apply-penalty-for-inputs.ll
@@ -1,5 +1,5 @@
; REQUIRES: asserts
-; RUN: opt -hotcoldsplit -debug-only=hotcoldsplit -S < %s -o /dev/null 2>&1 | FileCheck %s
+; RUN: opt -hotcoldsplit -debug-only=hotcoldsplit -hotcoldsplit-threshold=2 -hotcoldsplit-max-params=2 -S < %s -o /dev/null 2>&1 | FileCheck %s
declare void @sink(i32*, i32, i32) cold
@@ -10,10 +10,27 @@ define void @foo(i32 %arg) {
br i1 undef, label %cold, label %exit
cold:
- ; CHECK: Applying penalty for: 2 inputs
+ ; CHECK: Applying penalty for splitting: 2
+ ; CHECK-NEXT: Applying penalty for: 2 params
+ ; CHECK-NEXT: Applying penalty for: 0 outputs/split phis
+ ; CHECK-NEXT: penalty = 6
call void @sink(i32* @g, i32 %arg, i32 %local)
ret void
exit:
ret void
}
+
+define void @bar(i32* %p1, i32 %p2, i32 %p3) {
+ br i1 undef, label %cold, label %exit
+
+cold:
+ ; CHECK: Applying penalty for splitting: 2
+ ; CHECK-NEXT: 3 inputs and 0 outputs exceeds parameter limit (2)
+ ; CHECK-NEXT: penalty = 2147483647
+ call void @sink(i32* %p1, i32 %p2, i32 %p3)
+ ret void
+
+exit:
+ ret void
+}
diff --git a/llvm/test/Transforms/HotColdSplit/apply-penalty-for-outputs.ll b/llvm/test/Transforms/HotColdSplit/apply-penalty-for-outputs.ll
index a7d9f97ab030..b7bf760b90c4 100644
--- a/llvm/test/Transforms/HotColdSplit/apply-penalty-for-outputs.ll
+++ b/llvm/test/Transforms/HotColdSplit/apply-penalty-for-outputs.ll
@@ -1,5 +1,5 @@
; REQUIRES: asserts
-; RUN: opt -hotcoldsplit -debug-only=hotcoldsplit -S < %s -o /dev/null 2>&1 | FileCheck %s
+; RUN: opt -hotcoldsplit -debug-only=hotcoldsplit -hotcoldsplit-threshold=2 -S < %s -o /dev/null 2>&1 | FileCheck %s
declare void @sink() cold
@@ -10,8 +10,10 @@ entry:
br i1 undef, label %cold, label %exit
cold:
- ; CHECK: Applying penalty for: 1 output
- ; CHECK: Applying penalty for: 1 non-region successors
+ ; CHECK: Applying penalty for splitting: 2
+ ; CHECK-NEXT: Applying penalty for: 1 params
+ ; CHECK-NEXT: Applying penalty for: 1 outputs/split phis
+ ; CHECK-NEXT: penalty = 7
%local = load i32, i32* @g
call void @sink()
br label %exit
diff --git a/llvm/test/Transforms/HotColdSplit/apply-successor-penalty.ll b/llvm/test/Transforms/HotColdSplit/apply-successor-penalty.ll
index 3886d76da016..a9e3fc62e37c 100644
--- a/llvm/test/Transforms/HotColdSplit/apply-successor-penalty.ll
+++ b/llvm/test/Transforms/HotColdSplit/apply-successor-penalty.ll
@@ -1,5 +1,5 @@
; REQUIRES: asserts
-; RUN: opt -hotcoldsplit -debug-only=hotcoldsplit -S < %s -o /dev/null 2>&1 | FileCheck %s
+; RUN: opt -hotcoldsplit -debug-only=hotcoldsplit -hotcoldsplit-threshold=2 -S < %s -o /dev/null 2>&1 | FileCheck %s
declare void @sink() cold
@@ -9,7 +9,10 @@ entry:
br i1 undef, label %cold1, label %exit
cold1:
- ; CHECK: Applying penalty for: 1 non-region successor
+ ; CHECK: Applying penalty for splitting: 2
+ ; CHECK-NEXT: Applying penalty for: 0 params
+ ; CHECK-NEXT: Applying penalty for: 0 outputs/split phis
+ ; CHECK-NEXT: penalty = 2
call void @sink()
br i1 undef, label %cold2, label %cold3
@@ -32,7 +35,11 @@ entry:
br i1 undef, label %cold1, label %exit1
cold1:
- ; CHECK: Applying penalty for: 2 non-region successors
+ ; CHECK: Applying penalty for splitting: 2
+ ; CHECK-NEXT: Applying penalty for: 0 params
+ ; CHECK-NEXT: Applying penalty for: 0 outputs/split phis
+ ; CHECK-NEXT: Applying penalty for: 2 non-region successors
+ ; CHECK-NEXT: penalty = 3
call void @sink()
br i1 undef, label %cold2, label %cold3
diff --git a/llvm/test/Transforms/HotColdSplit/assumption-cache-invalidation.ll b/llvm/test/Transforms/HotColdSplit/assumption-cache-invalidation.ll
index bdb46d584dcb..465d0e6add77 100644
--- a/llvm/test/Transforms/HotColdSplit/assumption-cache-invalidation.ll
+++ b/llvm/test/Transforms/HotColdSplit/assumption-cache-invalidation.ll
@@ -1,5 +1,5 @@
; REQUIRES: asserts
-; RUN: opt -S -instsimplify -hotcoldsplit -debug < %s 2>&1 | FileCheck %s
+; RUN: opt -S -instsimplify -hotcoldsplit -hotcoldsplit-threshold=-1 -debug < %s 2>&1 | FileCheck %s
; RUN: opt -instcombine -hotcoldsplit -instsimplify %s -o /dev/null
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
@@ -13,7 +13,10 @@ target triple = "aarch64"
; CHECK-NOT: @llvm.assume
; CHECK: }
; CHECK: declare {{.*}}@llvm.assume
-; CHECK: define {{.*}}@f.cold.1(i64 %0)
+; CHECK: define {{.*}}@f.cold.1()
+; CHECK-LABEL: newFuncRoot:
+; CHECK: }
+; CHECK: define {{.*}}@f.cold.2(i64 %0)
; CHECK-LABEL: newFuncRoot:
; CHECK: %1 = icmp eq i64 %0, 0
; CHECK-NOT: call void @llvm.assume
More information about the llvm-branch-commits
mailing list