[llvm] aa6ee03 - [NFC][Inliner] Introduce another multiplier for cost benefit analysis and make multipliers overriddable in TargetTransformInfo.

Mingming Liu via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 2 21:27:24 PDT 2023


Author: Mingming Liu
Date: 2023-10-02T21:27:07-07:00
New Revision: aa6ee03709521a5ecb46f54a77eccc6fca277f28

URL: https://github.com/llvm/llvm-project/commit/aa6ee03709521a5ecb46f54a77eccc6fca277f28
DIFF: https://github.com/llvm/llvm-project/commit/aa6ee03709521a5ecb46f54a77eccc6fca277f28.diff

LOG: [NFC][Inliner] Introduce another multiplier for cost benefit analysis and make multipliers overriddable in TargetTransformInfo.

- The motivation is to expose tunable knobs to control the aggressiveness of inlines for different backend (e.g., machines with different icache size, and workload with different icache/itlb PMU counters). Tuning inline aggressiveness shows a small (~+0.3%) but stable improvement on workload/hardware that is more frontend bound.
- Both multipliers could be overridden from command line.

Reviewed By: kazu

Differential Revision: https://reviews.llvm.org/D153154

Added: 
    llvm/test/Transforms/Inline/inline-cost-benefit-multiplier-override.ll

Modified: 
    llvm/include/llvm/Analysis/TargetTransformInfo.h
    llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
    llvm/lib/Analysis/InlineCost.cpp
    llvm/lib/Analysis/TargetTransformInfo.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 1ae595d2110457d..5234ef8788d9e96 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -348,6 +348,9 @@ class TargetTransformInfo {
   /// individual classes of instructions would be better.
   unsigned getInliningThresholdMultiplier() const;
 
+  unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const;
+  unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const;
+
   /// \returns A value to be added to the inlining threshold.
   unsigned adjustInliningThreshold(const CallBase *CB) const;
 
@@ -1696,6 +1699,9 @@ class TargetTransformInfo::Concept {
                        const TTI::PointersChainInfo &Info, Type *AccessTy,
                        TTI::TargetCostKind CostKind) = 0;
   virtual unsigned getInliningThresholdMultiplier() const = 0;
+  virtual unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const = 0;
+  virtual unsigned
+  getInliningCostBenefitAnalysisProfitableMultiplier() const = 0;
   virtual unsigned adjustInliningThreshold(const CallBase *CB) = 0;
   virtual int getInlinerVectorBonusPercent() const = 0;
   virtual unsigned getCallerAllocaCost(const CallBase *CB,
@@ -2068,6 +2074,12 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
   unsigned adjustInliningThreshold(const CallBase *CB) override {
     return Impl.adjustInliningThreshold(CB);
   }
+  unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const override {
+    return Impl.getInliningCostBenefitAnalysisSavingsMultiplier();
+  }
+  unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const override {
+    return Impl.getInliningCostBenefitAnalysisProfitableMultiplier();
+  }
   int getInlinerVectorBonusPercent() const override {
     return Impl.getInlinerVectorBonusPercent();
   }

diff  --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 326c3130c6cff76..c1ff314ae51c98b 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -69,6 +69,10 @@ class TargetTransformInfoImplBase {
   }
 
   unsigned getInliningThresholdMultiplier() const { return 1; }
+  unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const { return 8; }
+  unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const {
+    return 8;
+  }
   unsigned adjustInliningThreshold(const CallBase *CB) const { return 0; }
   unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const {
     return 0;

diff  --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index 4fcf5575c74b0a4..fa0c30637633df3 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -88,10 +88,21 @@ static cl::opt<bool> InlineEnableCostBenefitAnalysis(
     "inline-enable-cost-benefit-analysis", cl::Hidden, cl::init(false),
     cl::desc("Enable the cost-benefit analysis for the inliner"));
 
+// InlineSavingsMultiplier overrides per TTI multipliers iff it is
+// specified explicitly in command line options. This option is exposed
+// for tuning and testing.
 static cl::opt<int> InlineSavingsMultiplier(
     "inline-savings-multiplier", cl::Hidden, cl::init(8),
     cl::desc("Multiplier to multiply cycle savings by during inlining"));
 
+// InlineSavingsProfitableMultiplier overrides per TTI multipliers iff it is
+// specified explicitly in command line options. This option is exposed
+// for tuning and testing.
+static cl::opt<int> InlineSavingsProfitableMultiplier(
+    "inline-savings-profitable-multiplier", cl::Hidden, cl::init(4),
+    cl::desc("A multiplier on top of cycle savings to decide whether the "
+             "savings won't justify the cost"));
+
 static cl::opt<int>
     InlineSizeAllowance("inline-size-allowance", cl::Hidden, cl::init(100),
                         cl::desc("The maximum size of a callee that get's "
@@ -815,6 +826,32 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
     return true;
   }
 
+  // A helper function to choose between command line override and default.
+  unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const {
+    if (InlineSavingsMultiplier.getNumOccurrences())
+      return InlineSavingsMultiplier;
+    return TTI.getInliningCostBenefitAnalysisSavingsMultiplier();
+  }
+
+  // A helper function to choose between command line override and default.
+  unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const {
+    if (InlineSavingsProfitableMultiplier.getNumOccurrences())
+      return InlineSavingsProfitableMultiplier;
+    return TTI.getInliningCostBenefitAnalysisProfitableMultiplier();
+  }
+
+  void OverrideCycleSavingsAndSizeForTesting(APInt &CycleSavings, int &Size) {
+    if (std::optional<int> AttrCycleSavings = getStringFnAttrAsInt(
+            CandidateCall, "inline-cycle-savings-for-test")) {
+      CycleSavings = *AttrCycleSavings;
+    }
+
+    if (std::optional<int> AttrRuntimeCost = getStringFnAttrAsInt(
+            CandidateCall, "inline-runtime-cost-for-test")) {
+      Size = *AttrRuntimeCost;
+    }
+  }
+
   // Determine whether we should inline the given call site, taking into account
   // both the size cost and the cycle savings.  Return std::nullopt if we don't
   // have sufficient profiling information to determine.
@@ -884,29 +921,55 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
     CycleSavings += getCallsiteCost(this->CandidateCall, DL);
     CycleSavings *= *CallerBFI->getBlockProfileCount(CallerBB);
 
-    // Remove the cost of the cold basic blocks.
+    // Remove the cost of the cold basic blocks to model the runtime cost more
+    // accurately. Both machine block placement and function splitting could
+    // place cold blocks further from hot blocks.
     int Size = Cost - ColdSize;
 
     // Allow tiny callees to be inlined regardless of whether they meet the
     // savings threshold.
     Size = Size > InlineSizeAllowance ? Size - InlineSizeAllowance : 1;
 
+    OverrideCycleSavingsAndSizeForTesting(CycleSavings, Size);
     CostBenefit.emplace(APInt(128, Size), CycleSavings);
 
-    // Return true if the savings justify the cost of inlining.  Specifically,
-    // we evaluate the following inequality:
+    // Let R be the ratio of CycleSavings to Size.  We accept the inlining
+    // opportunity if R is really high and reject if R is really low.  If R is
+    // somewhere in the middle, we fall back to the cost-based analysis.
     //
-    //  CycleSavings      PSI->getOrCompHotCountThreshold()
-    // -------------- >= -----------------------------------
-    //       Size              InlineSavingsMultiplier
+    // Specifically, let R = CycleSavings / Size, we accept the inlining
+    // opportunity if:
     //
-    // Note that the left hand side is specific to a call site.  The right hand
-    // side is a constant for the entire executable.
-    APInt LHS = CycleSavings;
-    LHS *= InlineSavingsMultiplier;
-    APInt RHS(128, PSI->getOrCompHotCountThreshold());
-    RHS *= Size;
-    return LHS.uge(RHS);
+    //             PSI->getOrCompHotCountThreshold()
+    // R > -------------------------------------------------
+    //     getInliningCostBenefitAnalysisSavingsMultiplier()
+    //
+    // and reject the inlining opportunity if:
+    //
+    //                PSI->getOrCompHotCountThreshold()
+    // R <= ----------------------------------------------------
+    //      getInliningCostBenefitAnalysisProfitableMultiplier()
+    //
+    // Otherwise, we fall back to the cost-based analysis.
+    //
+    // Implementation-wise, use multiplication (CycleSavings * Multiplier,
+    // HotCountThreshold * Size) rather than division to avoid precision loss.
+    APInt Threshold(128, PSI->getOrCompHotCountThreshold());
+    Threshold *= Size;
+
+    APInt UpperBoundCycleSavings = CycleSavings;
+    UpperBoundCycleSavings *= getInliningCostBenefitAnalysisSavingsMultiplier();
+    if (UpperBoundCycleSavings.uge(Threshold))
+      return true;
+
+    APInt LowerBoundCycleSavings = CycleSavings;
+    LowerBoundCycleSavings *=
+        getInliningCostBenefitAnalysisProfitableMultiplier();
+    if (LowerBoundCycleSavings.ult(Threshold))
+      return false;
+
+    // Otherwise, fall back to the cost-based analysis.
+    return std::nullopt;
   }
 
   InlineResult finalizeAnalysis() override {

diff  --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index c751d174a48ab1f..aad14f21d114619 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -212,6 +212,17 @@ unsigned TargetTransformInfo::getInliningThresholdMultiplier() const {
   return TTIImpl->getInliningThresholdMultiplier();
 }
 
+unsigned
+TargetTransformInfo::getInliningCostBenefitAnalysisSavingsMultiplier() const {
+  return TTIImpl->getInliningCostBenefitAnalysisSavingsMultiplier();
+}
+
+unsigned
+TargetTransformInfo::getInliningCostBenefitAnalysisProfitableMultiplier()
+    const {
+  return TTIImpl->getInliningCostBenefitAnalysisProfitableMultiplier();
+}
+
 unsigned
 TargetTransformInfo::adjustInliningThreshold(const CallBase *CB) const {
   return TTIImpl->adjustInliningThreshold(CB);

diff  --git a/llvm/test/Transforms/Inline/inline-cost-benefit-multiplier-override.ll b/llvm/test/Transforms/Inline/inline-cost-benefit-multiplier-override.ll
new file mode 100644
index 000000000000000..d367b74358fa1c5
--- /dev/null
+++ b/llvm/test/Transforms/Inline/inline-cost-benefit-multiplier-override.ll
@@ -0,0 +1,67 @@
+; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -pass-remarks=inline -pass-remarks-missed=inline -inline-savings-multiplier=4 -inline-savings-profitable-multiplier=5 -S 2>&1| FileCheck %s
+
+; Test that inline cost benefit multipler could be configured from command line.
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; @inlined_caleee is inlined by cost-benefit anlysis.
+; @not_inlined_callee is not inlined, decided by cost-benefit analysis
+; CHECK: remark: <unknown>:0:0: 'inlined_callee' inlined into 'caller' with (cost=always): benefit over cost
+; CHECK: remark: <unknown>:0:0: 'not_inlined_callee' not inlined into 'caller' because it should never be inlined (cost=never): cost over benefit
+
+define i32 @inlined_callee(i32 %c) !prof !17 {
+entry:
+  %mul = mul nsw i32 %c, %c
+  ret i32 %mul
+}
+
+define i32 @not_inlined_callee(i32 %c) !prof !18 {
+entry:
+  %add = add nsw i32 %c, 2
+  ret i32 %add
+}
+
+define i32 @caller(i32 %a, i32 %c)  !prof !15 {
+entry:
+  %rem = srem i32 %a, 3
+  %cmp = icmp eq i32 %rem, 0
+  br i1 %cmp, label %if.then, label %if.end, !prof !16
+
+if.then:
+; CHECK-LABEL: if.then:
+; CHECK-NOT: call i32 @inlined_callee
+  %call = tail call i32 @inlined_callee(i32 %c) "inline-cycle-savings-for-test"="26" "inline-runtime-cost-for-test"="1"
+  br label %return
+
+if.end:
+; CHECK-LABEL: if.end:
+; CHECK: call i32 @not_inlined_callee
+  %call1 = tail call i32 @not_inlined_callee(i32 %c) "inline-cycle-savings-for-test"="19" "inline-runtime-cost-for-test"="1"
+  br label %return
+
+return:
+  %retval.0 = phi i32 [ %call, %if.then ], [ %call1, %if.end ]
+  ret i32 %retval.0
+}
+
+!llvm.module.flags = !{!1}
+
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
+!3 = !{!"ProfileFormat", !"InstrProf"}
+!4 = !{!"TotalCount", i64 10000}
+!5 = !{!"MaxCount", i64 1000}
+!6 = !{!"MaxInternalCount", i64 1}
+!7 = !{!"MaxFunctionCount", i64 1000}
+!8 = !{!"NumCounts", i64 3}
+!9 = !{!"NumFunctions", i64 3}
+!10 = !{!"DetailedSummary", !11}
+!11 = !{!12, !13, !14}
+!12 = !{i32 10000, i64 100, i32 1}
+!13 = !{i32 990000, i64 100, i32 1}
+!14 = !{i32 999999, i64 1, i32 2}
+!15 = !{!"function_entry_count", i64 500}
+!16 = !{!"branch_weights", i32 1, i32 2}
+!17 = !{!"function_entry_count", i64 200}
+!18 = !{!"function_entry_count", i64 400}


        


More information about the llvm-commits mailing list