[llvm] aa6ee03 - [NFC][Inliner] Introduce another multiplier for cost benefit analysis and make multipliers overriddable in TargetTransformInfo.
Mingming Liu via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 2 21:27:24 PDT 2023
Author: Mingming Liu
Date: 2023-10-02T21:27:07-07:00
New Revision: aa6ee03709521a5ecb46f54a77eccc6fca277f28
URL: https://github.com/llvm/llvm-project/commit/aa6ee03709521a5ecb46f54a77eccc6fca277f28
DIFF: https://github.com/llvm/llvm-project/commit/aa6ee03709521a5ecb46f54a77eccc6fca277f28.diff
LOG: [NFC][Inliner] Introduce another multiplier for cost benefit analysis and make multipliers overriddable in TargetTransformInfo.
- The motivation is to expose tunable knobs to control the aggressiveness of inlines for different backend (e.g., machines with different icache size, and workload with different icache/itlb PMU counters). Tuning inline aggressiveness shows a small (~+0.3%) but stable improvement on workload/hardware that is more frontend bound.
- Both multipliers could be overridden from command line.
Reviewed By: kazu
Differential Revision: https://reviews.llvm.org/D153154
Added:
llvm/test/Transforms/Inline/inline-cost-benefit-multiplier-override.ll
Modified:
llvm/include/llvm/Analysis/TargetTransformInfo.h
llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
llvm/lib/Analysis/InlineCost.cpp
llvm/lib/Analysis/TargetTransformInfo.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 1ae595d2110457d..5234ef8788d9e96 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -348,6 +348,9 @@ class TargetTransformInfo {
/// individual classes of instructions would be better.
unsigned getInliningThresholdMultiplier() const;
+ unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const;
+ unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const;
+
/// \returns A value to be added to the inlining threshold.
unsigned adjustInliningThreshold(const CallBase *CB) const;
@@ -1696,6 +1699,9 @@ class TargetTransformInfo::Concept {
const TTI::PointersChainInfo &Info, Type *AccessTy,
TTI::TargetCostKind CostKind) = 0;
virtual unsigned getInliningThresholdMultiplier() const = 0;
+ virtual unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const = 0;
+ virtual unsigned
+ getInliningCostBenefitAnalysisProfitableMultiplier() const = 0;
virtual unsigned adjustInliningThreshold(const CallBase *CB) = 0;
virtual int getInlinerVectorBonusPercent() const = 0;
virtual unsigned getCallerAllocaCost(const CallBase *CB,
@@ -2068,6 +2074,12 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
unsigned adjustInliningThreshold(const CallBase *CB) override {
return Impl.adjustInliningThreshold(CB);
}
+ unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const override {
+ return Impl.getInliningCostBenefitAnalysisSavingsMultiplier();
+ }
+ unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const override {
+ return Impl.getInliningCostBenefitAnalysisProfitableMultiplier();
+ }
int getInlinerVectorBonusPercent() const override {
return Impl.getInlinerVectorBonusPercent();
}
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 326c3130c6cff76..c1ff314ae51c98b 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -69,6 +69,10 @@ class TargetTransformInfoImplBase {
}
unsigned getInliningThresholdMultiplier() const { return 1; }
+ unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const { return 8; }
+ unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const {
+ return 8;
+ }
unsigned adjustInliningThreshold(const CallBase *CB) const { return 0; }
unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const {
return 0;
diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index 4fcf5575c74b0a4..fa0c30637633df3 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -88,10 +88,21 @@ static cl::opt<bool> InlineEnableCostBenefitAnalysis(
"inline-enable-cost-benefit-analysis", cl::Hidden, cl::init(false),
cl::desc("Enable the cost-benefit analysis for the inliner"));
+// InlineSavingsMultiplier overrides per TTI multipliers iff it is
+// specified explicitly in command line options. This option is exposed
+// for tuning and testing.
static cl::opt<int> InlineSavingsMultiplier(
"inline-savings-multiplier", cl::Hidden, cl::init(8),
cl::desc("Multiplier to multiply cycle savings by during inlining"));
+// InlineSavingsProfitableMultiplier overrides per TTI multipliers iff it is
+// specified explicitly in command line options. This option is exposed
+// for tuning and testing.
+static cl::opt<int> InlineSavingsProfitableMultiplier(
+ "inline-savings-profitable-multiplier", cl::Hidden, cl::init(4),
+ cl::desc("A multiplier on top of cycle savings to decide whether the "
+ "savings won't justify the cost"));
+
static cl::opt<int>
InlineSizeAllowance("inline-size-allowance", cl::Hidden, cl::init(100),
cl::desc("The maximum size of a callee that get's "
@@ -815,6 +826,32 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
return true;
}
+ // A helper function to choose between command line override and default.
+ unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const {
+ if (InlineSavingsMultiplier.getNumOccurrences())
+ return InlineSavingsMultiplier;
+ return TTI.getInliningCostBenefitAnalysisSavingsMultiplier();
+ }
+
+ // A helper function to choose between command line override and default.
+ unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const {
+ if (InlineSavingsProfitableMultiplier.getNumOccurrences())
+ return InlineSavingsProfitableMultiplier;
+ return TTI.getInliningCostBenefitAnalysisProfitableMultiplier();
+ }
+
+ void OverrideCycleSavingsAndSizeForTesting(APInt &CycleSavings, int &Size) {
+ if (std::optional<int> AttrCycleSavings = getStringFnAttrAsInt(
+ CandidateCall, "inline-cycle-savings-for-test")) {
+ CycleSavings = *AttrCycleSavings;
+ }
+
+ if (std::optional<int> AttrRuntimeCost = getStringFnAttrAsInt(
+ CandidateCall, "inline-runtime-cost-for-test")) {
+ Size = *AttrRuntimeCost;
+ }
+ }
+
// Determine whether we should inline the given call site, taking into account
// both the size cost and the cycle savings. Return std::nullopt if we don't
// have sufficient profiling information to determine.
@@ -884,29 +921,55 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
CycleSavings += getCallsiteCost(this->CandidateCall, DL);
CycleSavings *= *CallerBFI->getBlockProfileCount(CallerBB);
- // Remove the cost of the cold basic blocks.
+ // Remove the cost of the cold basic blocks to model the runtime cost more
+ // accurately. Both machine block placement and function splitting could
+ // place cold blocks further from hot blocks.
int Size = Cost - ColdSize;
// Allow tiny callees to be inlined regardless of whether they meet the
// savings threshold.
Size = Size > InlineSizeAllowance ? Size - InlineSizeAllowance : 1;
+ OverrideCycleSavingsAndSizeForTesting(CycleSavings, Size);
CostBenefit.emplace(APInt(128, Size), CycleSavings);
- // Return true if the savings justify the cost of inlining. Specifically,
- // we evaluate the following inequality:
+ // Let R be the ratio of CycleSavings to Size. We accept the inlining
+ // opportunity if R is really high and reject if R is really low. If R is
+ // somewhere in the middle, we fall back to the cost-based analysis.
//
- // CycleSavings PSI->getOrCompHotCountThreshold()
- // -------------- >= -----------------------------------
- // Size InlineSavingsMultiplier
+ // Specifically, let R = CycleSavings / Size, we accept the inlining
+ // opportunity if:
//
- // Note that the left hand side is specific to a call site. The right hand
- // side is a constant for the entire executable.
- APInt LHS = CycleSavings;
- LHS *= InlineSavingsMultiplier;
- APInt RHS(128, PSI->getOrCompHotCountThreshold());
- RHS *= Size;
- return LHS.uge(RHS);
+ // PSI->getOrCompHotCountThreshold()
+ // R > -------------------------------------------------
+ // getInliningCostBenefitAnalysisSavingsMultiplier()
+ //
+ // and reject the inlining opportunity if:
+ //
+ // PSI->getOrCompHotCountThreshold()
+ // R <= ----------------------------------------------------
+ // getInliningCostBenefitAnalysisProfitableMultiplier()
+ //
+ // Otherwise, we fall back to the cost-based analysis.
+ //
+ // Implementation-wise, use multiplication (CycleSavings * Multiplier,
+ // HotCountThreshold * Size) rather than division to avoid precision loss.
+ APInt Threshold(128, PSI->getOrCompHotCountThreshold());
+ Threshold *= Size;
+
+ APInt UpperBoundCycleSavings = CycleSavings;
+ UpperBoundCycleSavings *= getInliningCostBenefitAnalysisSavingsMultiplier();
+ if (UpperBoundCycleSavings.uge(Threshold))
+ return true;
+
+ APInt LowerBoundCycleSavings = CycleSavings;
+ LowerBoundCycleSavings *=
+ getInliningCostBenefitAnalysisProfitableMultiplier();
+ if (LowerBoundCycleSavings.ult(Threshold))
+ return false;
+
+ // Otherwise, fall back to the cost-based analysis.
+ return std::nullopt;
}
InlineResult finalizeAnalysis() override {
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index c751d174a48ab1f..aad14f21d114619 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -212,6 +212,17 @@ unsigned TargetTransformInfo::getInliningThresholdMultiplier() const {
return TTIImpl->getInliningThresholdMultiplier();
}
+unsigned
+TargetTransformInfo::getInliningCostBenefitAnalysisSavingsMultiplier() const {
+ return TTIImpl->getInliningCostBenefitAnalysisSavingsMultiplier();
+}
+
+unsigned
+TargetTransformInfo::getInliningCostBenefitAnalysisProfitableMultiplier()
+ const {
+ return TTIImpl->getInliningCostBenefitAnalysisProfitableMultiplier();
+}
+
unsigned
TargetTransformInfo::adjustInliningThreshold(const CallBase *CB) const {
return TTIImpl->adjustInliningThreshold(CB);
diff --git a/llvm/test/Transforms/Inline/inline-cost-benefit-multiplier-override.ll b/llvm/test/Transforms/Inline/inline-cost-benefit-multiplier-override.ll
new file mode 100644
index 000000000000000..d367b74358fa1c5
--- /dev/null
+++ b/llvm/test/Transforms/Inline/inline-cost-benefit-multiplier-override.ll
@@ -0,0 +1,67 @@
+; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -pass-remarks=inline -pass-remarks-missed=inline -inline-savings-multiplier=4 -inline-savings-profitable-multiplier=5 -S 2>&1| FileCheck %s
+
+; Test that inline cost benefit multipler could be configured from command line.
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; @inlined_caleee is inlined by cost-benefit anlysis.
+; @not_inlined_callee is not inlined, decided by cost-benefit analysis
+; CHECK: remark: <unknown>:0:0: 'inlined_callee' inlined into 'caller' with (cost=always): benefit over cost
+; CHECK: remark: <unknown>:0:0: 'not_inlined_callee' not inlined into 'caller' because it should never be inlined (cost=never): cost over benefit
+
+define i32 @inlined_callee(i32 %c) !prof !17 {
+entry:
+ %mul = mul nsw i32 %c, %c
+ ret i32 %mul
+}
+
+define i32 @not_inlined_callee(i32 %c) !prof !18 {
+entry:
+ %add = add nsw i32 %c, 2
+ ret i32 %add
+}
+
+define i32 @caller(i32 %a, i32 %c) !prof !15 {
+entry:
+ %rem = srem i32 %a, 3
+ %cmp = icmp eq i32 %rem, 0
+ br i1 %cmp, label %if.then, label %if.end, !prof !16
+
+if.then:
+; CHECK-LABEL: if.then:
+; CHECK-NOT: call i32 @inlined_callee
+ %call = tail call i32 @inlined_callee(i32 %c) "inline-cycle-savings-for-test"="26" "inline-runtime-cost-for-test"="1"
+ br label %return
+
+if.end:
+; CHECK-LABEL: if.end:
+; CHECK: call i32 @not_inlined_callee
+ %call1 = tail call i32 @not_inlined_callee(i32 %c) "inline-cycle-savings-for-test"="19" "inline-runtime-cost-for-test"="1"
+ br label %return
+
+return:
+ %retval.0 = phi i32 [ %call, %if.then ], [ %call1, %if.end ]
+ ret i32 %retval.0
+}
+
+!llvm.module.flags = !{!1}
+
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
+!3 = !{!"ProfileFormat", !"InstrProf"}
+!4 = !{!"TotalCount", i64 10000}
+!5 = !{!"MaxCount", i64 1000}
+!6 = !{!"MaxInternalCount", i64 1}
+!7 = !{!"MaxFunctionCount", i64 1000}
+!8 = !{!"NumCounts", i64 3}
+!9 = !{!"NumFunctions", i64 3}
+!10 = !{!"DetailedSummary", !11}
+!11 = !{!12, !13, !14}
+!12 = !{i32 10000, i64 100, i32 1}
+!13 = !{i32 990000, i64 100, i32 1}
+!14 = !{i32 999999, i64 1, i32 2}
+!15 = !{!"function_entry_count", i64 500}
+!16 = !{!"branch_weights", i32 1, i32 2}
+!17 = !{!"function_entry_count", i64 200}
+!18 = !{!"function_entry_count", i64 400}
More information about the llvm-commits
mailing list