[llvm-branch-commits] [llvm] 8b51e5e - [NewPM][Inliner] Make inlined calls to functions in same SCC as callee exponentially expensive
Tom Stellard via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Mar 8 19:23:25 PST 2022
Author: Arthur Eubanks
Date: 2022-03-08T17:12:22-08:00
New Revision: 8b51e5ee0a2e6a38886e7ededd3267443cb99f1e
URL: https://github.com/llvm/llvm-project/commit/8b51e5ee0a2e6a38886e7ededd3267443cb99f1e
DIFF: https://github.com/llvm/llvm-project/commit/8b51e5ee0a2e6a38886e7ededd3267443cb99f1e.diff
LOG: [NewPM][Inliner] Make inlined calls to functions in same SCC as callee exponentially expensive
Introduce a new attribute "function-inline-cost-multiplier" which
multiplies the inline cost of a call site (or all calls to a callee) by
the multiplier.
When processing the list of calls created by inlining, check each call
to see if the new call's callee is in the same SCC as the original
callee. If so, set the "function-inline-cost-multiplier" attribute of
the new call site to double the original call site's attribute value.
This does not happen when the original call site is intra-SCC.
This is an alternative to D120584, which marks the call sites as
noinline.
Hopefully fixes PR45253.
Reviewed By: davidxl
Differential Revision: https://reviews.llvm.org/D121084
(cherry picked from commit 53e5e586709a329370ea268a8e8191b16fd641b7)
Added:
llvm/test/Transforms/Inline/mut-rec-scc-2.ll
llvm/test/Transforms/Inline/mut-rec-scc.ll
Modified:
llvm/include/llvm/Analysis/InlineCost.h
llvm/lib/Analysis/InlineCost.cpp
llvm/lib/Transforms/IPO/Inliner.cpp
llvm/test/Transforms/Inline/inline-cost-attributes.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/Analysis/InlineCost.h b/llvm/include/llvm/Analysis/InlineCost.h
index f86ee5a148749..d3fa3b879125f 100644
--- a/llvm/include/llvm/Analysis/InlineCost.h
+++ b/llvm/include/llvm/Analysis/InlineCost.h
@@ -52,6 +52,9 @@ const unsigned TotalAllocaSizeRecursiveCaller = 1024;
/// Do not inline dynamic allocas that have been constant propagated to be
/// static allocas above this amount in bytes.
const uint64_t MaxSimplifiedDynamicAllocaToInline = 65536;
+
+const char FunctionInlineCostMultiplierAttributeName[] =
+ "function-inline-cost-multiplier";
} // namespace InlineConstants
// The cost-benefit pair computed by cost-benefit analysis.
@@ -217,6 +220,8 @@ struct InlineParams {
Optional<bool> AllowRecursiveCall = false;
};
+Optional<int> getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind);
+
/// Generate the parameters to tune the inline cost analysis based only on the
/// commandline options.
InlineParams getInlineParams();
diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index d5411d916c777..cd5314e7a17a1 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -133,8 +133,6 @@ static cl::opt<bool> DisableGEPConstOperand(
cl::desc("Disables evaluation of GetElementPtr with constant operands"));
namespace {
-class InlineCostCallAnalyzer;
-
/// This function behaves more like CallBase::hasFnAttr: when it looks for the
/// requested attribute, it check both the call instruction and the called
/// function (if it's available and operand bundles don't prohibit that).
@@ -151,7 +149,9 @@ Attribute getFnAttr(CallBase &CB, StringRef AttrKind) {
return {};
}
+} // namespace
+namespace llvm {
Optional<int> getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind) {
Attribute Attr = getFnAttr(CB, AttrKind);
int AttrValue;
@@ -159,6 +159,10 @@ Optional<int> getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind) {
return None;
return AttrValue;
}
+} // namespace llvm
+
+namespace {
+class InlineCostCallAnalyzer;
// This struct is used to store information about inline cost of a
// particular instruction
@@ -904,6 +908,11 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
getStringFnAttrAsInt(CandidateCall, "function-inline-cost"))
Cost = *AttrCost;
+ if (Optional<int> AttrCostMult = getStringFnAttrAsInt(
+ CandidateCall,
+ InlineConstants::FunctionInlineCostMultiplierAttributeName))
+ Cost *= *AttrCostMult;
+
if (Optional<int> AttrThreshold =
getStringFnAttrAsInt(CandidateCall, "function-inline-threshold"))
Threshold = *AttrThreshold;
diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp
index 49babc24cb82c..10abea7ebd321 100644
--- a/llvm/lib/Transforms/IPO/Inliner.cpp
+++ b/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -22,6 +22,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
@@ -92,6 +93,18 @@ static cl::opt<bool>
DisableInlinedAllocaMerging("disable-inlined-alloca-merging",
cl::init(false), cl::Hidden);
+static cl::opt<int> IntraSCCCostMultiplier(
+ "intra-scc-cost-multiplier", cl::init(2), cl::Hidden,
+ cl::desc(
+ "Cost multiplier to multiply onto inlined call sites where the "
+ "new call was previously an intra-SCC call (not relevant when the "
+ "original call was already intra-SCC). This can accumulate over "
+ "multiple inlinings (e.g. if a call site already had a cost "
+ "multiplier and one of its inlined calls was also subject to "
+ "this, the inlined call would have the original multiplier "
+ "multiplied by intra-scc-cost-multiplier). This is to prevent tons of "
+ "inlining through a child SCC which can cause terrible compile times"));
+
/// A flag for test, so we can print the content of the advisor when running it
/// as part of the default (e.g. -O3) pipeline.
static cl::opt<bool> KeepAdvisorForPrinting("keep-inline-advisor-for-printing",
@@ -876,8 +889,8 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// trigger infinite inlining, much like is prevented within the inliner
// itself by the InlineHistory above, but spread across CGSCC iterations
// and thus hidden from the full inline history.
- if (CG.lookupSCC(*CG.lookup(Callee)) == C &&
- UR.InlinedInternalEdges.count({&N, C})) {
+ LazyCallGraph::SCC *CalleeSCC = CG.lookupSCC(*CG.lookup(Callee));
+ if (CalleeSCC == C && UR.InlinedInternalEdges.count({&N, C})) {
LLVM_DEBUG(dbgs() << "Skipping inlining internal SCC edge from a node "
"previously split out of this SCC by inlining: "
<< F.getName() << " -> " << Callee.getName() << "\n");
@@ -897,6 +910,11 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
continue;
}
+ int CBCostMult =
+ getStringFnAttrAsInt(
+ *CB, InlineConstants::FunctionInlineCostMultiplierAttributeName)
+ .getValueOr(1);
+
// Setup the data structure used to plumb customization into the
// `InlineFunction` routine.
InlineFunctionInfo IFI(
@@ -935,9 +953,28 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
if (tryPromoteCall(*ICB))
NewCallee = ICB->getCalledFunction();
}
- if (NewCallee)
- if (!NewCallee->isDeclaration())
+ if (NewCallee) {
+ if (!NewCallee->isDeclaration()) {
Calls->push({ICB, NewHistoryID});
+ // Continually inlining through an SCC can result in huge compile
+ // times and bloated code since we arbitrarily stop at some point
+ // when the inliner decides it's not profitable to inline anymore.
+ // We attempt to mitigate this by making these calls exponentially
+ // more expensive.
+ // This doesn't apply to calls in the same SCC since if we do
+ // inline through the SCC the function will end up being
+ // self-recursive which the inliner bails out on, and inlining
+ // within an SCC is necessary for performance.
+ if (CalleeSCC != C &&
+ CalleeSCC == CG.lookupSCC(CG.get(*NewCallee))) {
+ Attribute NewCBCostMult = Attribute::get(
+ M.getContext(),
+ InlineConstants::FunctionInlineCostMultiplierAttributeName,
+ itostr(CBCostMult * IntraSCCCostMultiplier));
+ ICB->addFnAttr(NewCBCostMult);
+ }
+ }
+ }
}
}
diff --git a/llvm/test/Transforms/Inline/inline-cost-attributes.ll b/llvm/test/Transforms/Inline/inline-cost-attributes.ll
index b412a5fac0c44..71264ab6c389f 100644
--- a/llvm/test/Transforms/Inline/inline-cost-attributes.ll
+++ b/llvm/test/Transforms/Inline/inline-cost-attributes.ll
@@ -11,8 +11,9 @@ entry:
define void @fn2() "function-inline-threshold"="41" {
; INLINER-LABEL: Inlining calls in: fn2
-; INLINER-NEXT: Function size: 6
+; INLINER-NEXT: Function size: 7
; INLINER-NEXT: NOT Inlining (cost=321, threshold=123), Call: call void @fn1()
+; INLINER-NEXT: NOT Inlining (cost=963, threshold=123), Call: call void @fn1()
; INLINER-NEXT: NOT Inlining (cost=321, threshold=321), Call: call void @fn1()
; INLINER-NEXT: NOT Inlining (cost=197, threshold=123), Call: call void @fn1()
; INLINER-NEXT: Inlining (cost=197, threshold=321), Call: call void @fn1()
@@ -23,6 +24,8 @@ define void @fn2() "function-inline-threshold"="41" {
; COST-NEXT: call void @extern()
; COST-NEXT: cost delta = 132, threshold delta = 193
; COST-NEXT: call void @fn1()
+; COST-NEXT: cost delta = 132, threshold delta = 193
+; COST-NEXT: call void @fn1()
; COST-NEXT: cost delta = 0
; COST-NEXT: call void @fn1()
; COST-NEXT: cost delta = 271, threshold delta = 17
@@ -33,6 +36,7 @@ define void @fn2() "function-inline-threshold"="41" {
entry:
call void @extern()
call void @fn1() "call-inline-cost"="132" "call-threshold-bonus"="193"
+ call void @fn1() "call-inline-cost"="132" "call-threshold-bonus"="193" "function-inline-cost-multiplier"="3"
call void @fn1() "call-inline-cost"="0" "function-inline-threshold"="321"
call void @fn1() "call-threshold-bonus"="17" "function-inline-cost"="197"
call void @fn1() "call-inline-cost"="473" "function-inline-cost"="197" "function-inline-threshold"="321"
@@ -44,7 +48,7 @@ define void @fn3() {
; INLINER-NEXT: Function size: 3
; INLINER-NEXT: Inlining (cost=386, threshold=849), Call: call void @fn1()
; INLINER-NEXT: Size after inlining: 2
-; INLINER-NEXT: NOT Inlining (cost=403, threshold=41), Call: call void @fn2()
+; INLINER-NEXT: NOT Inlining (cost=535, threshold=41), Call: call void @fn2()
entry:
call void @fn1() "function-inline-cost"="386" "function-inline-threshold"="849"
diff --git a/llvm/test/Transforms/Inline/mut-rec-scc-2.ll b/llvm/test/Transforms/Inline/mut-rec-scc-2.ll
new file mode 100644
index 0000000000000..df7a6aa5fc71c
--- /dev/null
+++ b/llvm/test/Transforms/Inline/mut-rec-scc-2.ll
@@ -0,0 +1,19 @@
+; RUN: opt -S -passes='inline' < %s | FileCheck %s
+
+; Make sure we don't mark calls within the same SCC as original function with noinline.
+; CHECK-NOT: function-inline-cost-multiplier
+
+define void @samescc1() {
+ call void @samescc2()
+ ret void
+}
+
+define void @samescc2() {
+ call void @samescc3()
+ ret void
+}
+
+define void @samescc3() {
+ call void @samescc1()
+ ret void
+}
diff --git a/llvm/test/Transforms/Inline/mut-rec-scc.ll b/llvm/test/Transforms/Inline/mut-rec-scc.ll
new file mode 100644
index 0000000000000..f7336a731f6e0
--- /dev/null
+++ b/llvm/test/Transforms/Inline/mut-rec-scc.ll
@@ -0,0 +1,75 @@
+; RUN: opt -S -passes='cgscc(inline,instcombine)' < %s | FileCheck %s
+; RUN: opt -S -intra-scc-cost-multiplier=3 -passes='cgscc(inline,instcombine)' < %s | FileCheck %s --check-prefix=THREE
+
+; We use call to a dummy function to avoid inlining test1 into test2 or vice
+; versa, such that we aren't left with a trivial cycle, as trivial cycles are
+; special-cased to never be inlined.
+; However, InstCombine will eliminate these calls after inlining, and thus
+; make the functions eligible for inlining in their callers.
+declare void @dummy() readnone nounwind willreturn
+
+define void @test1() {
+; CHECK-LABEL: define void @test1(
+; CHECK-NEXT: call void @test2()
+; CHECK-NEXT: call void @test2()
+; CHECK-NEXT: ret void
+;
+ call void @test2()
+ call void @test2()
+ call void @dummy()
+ call void @dummy()
+ call void @dummy()
+ call void @dummy()
+ call void @dummy()
+ call void @dummy()
+ call void @dummy()
+ call void @dummy()
+ call void @dummy()
+ call void @dummy()
+ call void @dummy()
+ ret void
+}
+
+define void @test2() {
+; CHECK-LABEL: define void @test2(
+; CHECK-NEXT: call void @test1()
+; CHECK-NEXT: call void @test1()
+; CHECK-NEXT: ret void
+;
+ call void @test1()
+ call void @test1()
+ call void @dummy()
+ call void @dummy()
+ call void @dummy()
+ call void @dummy()
+ call void @dummy()
+ call void @dummy()
+ call void @dummy()
+ call void @dummy()
+ call void @dummy()
+ call void @dummy()
+ call void @dummy()
+ ret void
+}
+
+; The inlined call sites should have the "function-inline-cost-multiplier" call site attribute.
+; This test is a bit fragile in the exact number of inlining that happens based on thresholds.
+define void @test3() {
+; CHECK-LABEL: define void @test3(
+; CHECK-NEXT: call void @test2() #[[COSTMULT:[0-9]+]]
+; CHECK-NEXT: call void @test2() #[[COSTMULT]]
+; CHECK-NEXT: call void @test2() #[[COSTMULT]]
+; CHECK-NEXT: call void @test2() #[[COSTMULT]]
+; CHECK-NEXT: call void @test2() #[[COSTMULT]]
+; CHECK-NEXT: call void @test2() #[[COSTMULT]]
+; CHECK-NEXT: call void @test2() #[[COSTMULT]]
+; CHECK-NEXT: call void @test2() #[[COSTMULT]]
+; CHECK-NEXT: ret void
+;
+ call void @test2()
+ call void @test2()
+ ret void
+}
+
+; CHECK: [[COSTMULT]] = { "function-inline-cost-multiplier"="4" }
+; THREE: "function-inline-cost-multiplier"="9"
More information about the llvm-branch-commits
mailing list