[llvm] [Inline] Accumulate the cost of the inlined function to the new call site (PR #111104)

via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 4 00:21:56 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-analysis

Author: DianQK (DianQK)

<details>
<summary>Changes</summary>

Fixes #<!-- -->111102.

Consider the following IR, the `call void @<!-- -->bar` is a local host call site:

```llvm
@<!-- -->define void foo() {
  ...
loop:
  call void @<!-- -->bar()
  ...
}

define void @<!-- -->bar() {
  ...
  call void @<!-- -->baz()
  ...
}

define void @<!-- -->baz() {
  ...
}
```

If `foo` can inline `baz` after inlined `bar`, I think it can also inline a `bar` that has inlined `baz`. With this in mind, I accumulated the previous cost into the new call site, which is a linearly increasing limit.

I did some experimenting on [DianQK/perf/inline-extra-cost](https://llvm-compile-time-tracker.com/?config=Overview&stat=instructions%3Au&remote=DianQK) and the current thresholds look OK. I'm looking forward to getting some feedback before modifying this threshold to an option.

---
Full diff: https://github.com/llvm/llvm-project/pull/111104.diff


8 Files Affected:

- (modified) llvm/include/llvm/Analysis/InlineAdvisor.h (+9-3) 
- (modified) llvm/include/llvm/Analysis/InlineCost.h (+3) 
- (modified) llvm/lib/Analysis/InlineAdvisor.cpp (+4-2) 
- (modified) llvm/lib/Analysis/InlineCost.cpp (+5) 
- (modified) llvm/lib/Transforms/IPO/Inliner.cpp (+15) 
- (modified) llvm/lib/Transforms/IPO/ModuleInliner.cpp (+18-1) 
- (modified) llvm/test/Transforms/Inline/inline-history-noinline.ll (+1-1) 
- (added) llvm/test/Transforms/Inline/inline-hot-callsite-limit.ll (+109) 


``````````diff
diff --git a/llvm/include/llvm/Analysis/InlineAdvisor.h b/llvm/include/llvm/Analysis/InlineAdvisor.h
index 871a6e97861e29..2880553817ebd8 100644
--- a/llvm/include/llvm/Analysis/InlineAdvisor.h
+++ b/llvm/include/llvm/Analysis/InlineAdvisor.h
@@ -74,7 +74,8 @@ class InlineAdvisor;
 class InlineAdvice {
 public:
   InlineAdvice(InlineAdvisor *Advisor, CallBase &CB,
-               OptimizationRemarkEmitter &ORE, bool IsInliningRecommended);
+               OptimizationRemarkEmitter &ORE, bool IsInliningRecommended,
+               std::optional<int> InliningCost = std::nullopt);
 
   InlineAdvice(InlineAdvice &&) = delete;
   InlineAdvice(const InlineAdvice &) = delete;
@@ -108,6 +109,7 @@ class InlineAdvice {
 
   /// Get the inlining recommendation.
   bool isInliningRecommended() const { return IsInliningRecommended; }
+  std::optional<int> inliningCost() const { return InliningCost; }
   const DebugLoc &getOriginalCallSiteDebugLoc() const { return DLoc; }
   const BasicBlock *getOriginalCallSiteBasicBlock() const { return Block; }
 
@@ -129,6 +131,7 @@ class InlineAdvice {
   const BasicBlock *const Block;
   OptimizationRemarkEmitter &ORE;
   const bool IsInliningRecommended;
+  const std::optional<int> InliningCost;
 
 private:
   void markRecorded() {
@@ -145,8 +148,11 @@ class DefaultInlineAdvice : public InlineAdvice {
   DefaultInlineAdvice(InlineAdvisor *Advisor, CallBase &CB,
                       std::optional<InlineCost> OIC,
                       OptimizationRemarkEmitter &ORE, bool EmitRemarks = true)
-      : InlineAdvice(Advisor, CB, ORE, OIC.has_value()), OriginalCB(&CB),
-        OIC(OIC), EmitRemarks(EmitRemarks) {}
+      : InlineAdvice(Advisor, CB, ORE, OIC.has_value(),
+                     OIC && OIC->isVariable()
+                         ? std::optional<int>(OIC->getCost())
+                         : std::nullopt),
+        OriginalCB(&CB), OIC(OIC), EmitRemarks(EmitRemarks) {}
 
 private:
   void recordUnsuccessfulInliningImpl(const InlineResult &Result) override;
diff --git a/llvm/include/llvm/Analysis/InlineCost.h b/llvm/include/llvm/Analysis/InlineCost.h
index c5978ce54fc18b..1190308dba4193 100644
--- a/llvm/include/llvm/Analysis/InlineCost.h
+++ b/llvm/include/llvm/Analysis/InlineCost.h
@@ -58,6 +58,9 @@ const uint64_t MaxSimplifiedDynamicAllocaToInline = 65536;
 
 const char FunctionInlineCostMultiplierAttributeName[] =
     "function-inline-cost-multiplier";
+/// Cost of call site accumulation added after inlining.
+const char FunctionInlineAdditionalCostAttributeName[] =
+    "function-inline-additional-cost";
 
 const char MaxInlineStackSizeAttributeName[] = "inline-max-stacksize";
 } // namespace InlineConstants
diff --git a/llvm/lib/Analysis/InlineAdvisor.cpp b/llvm/lib/Analysis/InlineAdvisor.cpp
index c6907cb128bb47..9706dd212d7bc5 100644
--- a/llvm/lib/Analysis/InlineAdvisor.cpp
+++ b/llvm/lib/Analysis/InlineAdvisor.cpp
@@ -175,10 +175,12 @@ DefaultInlineAdvisor::getAdviceImpl(CallBase &CB) {
 
 InlineAdvice::InlineAdvice(InlineAdvisor *Advisor, CallBase &CB,
                            OptimizationRemarkEmitter &ORE,
-                           bool IsInliningRecommended)
+                           bool IsInliningRecommended,
+                           std::optional<int> InliningCost)
     : Advisor(Advisor), Caller(CB.getCaller()), Callee(CB.getCalledFunction()),
       DLoc(CB.getDebugLoc()), Block(CB.getParent()), ORE(ORE),
-      IsInliningRecommended(IsInliningRecommended) {}
+      IsInliningRecommended(IsInliningRecommended), InliningCost(InliningCost) {
+}
 
 void InlineAdvice::recordInlineStatsIfNeeded() {
   if (Advisor->ImportedFunctionsStats)
diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index d2c329ba748e58..e45423f2130d8a 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -1017,6 +1017,11 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
             InlineConstants::FunctionInlineCostMultiplierAttributeName))
       Cost *= *AttrCostMult;
 
+    if (std::optional<int> AttrAdditonalCost = getStringFnAttrAsInt(
+            CandidateCall,
+            InlineConstants::FunctionInlineAdditionalCostAttributeName))
+      Cost += *AttrAdditonalCost;
+
     if (std::optional<int> AttrThreshold =
             getStringFnAttrAsInt(CandidateCall, "function-inline-threshold"))
       Threshold = *AttrThreshold;
diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp
index 23ee23eb047f58..977bbcd35f73e8 100644
--- a/llvm/lib/Transforms/IPO/Inliner.cpp
+++ b/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -376,6 +376,11 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
           getStringFnAttrAsInt(
               *CB, InlineConstants::FunctionInlineCostMultiplierAttributeName)
               .value_or(1);
+      int CBInliningAdditionalCost =
+          getStringFnAttrAsInt(
+              *CB, InlineConstants::FunctionInlineAdditionalCostAttributeName)
+              .value_or(0);
+      std::optional<int> InliningCost = Advice->inliningCost();
 
       // Setup the data structure used to plumb customization into the
       // `InlineFunction` routine.
@@ -435,6 +440,16 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
                     InlineConstants::FunctionInlineCostMultiplierAttributeName,
                     itostr(CBCostMult * IntraSCCCostMultiplier));
                 ICB->addFnAttr(NewCBCostMult);
+              } else if (InliningCost && *InliningCost > 0) {
+                // Similar to hot call site thresholds that can cause Inliner to
+                // inline numerous functions causing compile time issues, a
+                // linear accumulator was created to mitigate the problem.
+                Attribute NewCBAdditionalCost = Attribute::get(
+                    M.getContext(),
+                    InlineConstants::FunctionInlineAdditionalCostAttributeName,
+                    itostr(CBInliningAdditionalCost +
+                           (*InliningCost - CBInliningAdditionalCost) / 16));
+                ICB->addFnAttr(NewCBAdditionalCost);
               }
             }
           }
diff --git a/llvm/lib/Transforms/IPO/ModuleInliner.cpp b/llvm/lib/Transforms/IPO/ModuleInliner.cpp
index dbc733826944b9..c196be9d6dd163 100644
--- a/llvm/lib/Transforms/IPO/ModuleInliner.cpp
+++ b/llvm/lib/Transforms/IPO/ModuleInliner.cpp
@@ -225,6 +225,11 @@ PreservedAnalyses ModuleInlinerPass::run(Module &M,
       Advice->recordUnattemptedInlining();
       continue;
     }
+    int CBInliningAdditionalCost =
+        getStringFnAttrAsInt(
+            *CB, InlineConstants::FunctionInlineAdditionalCostAttributeName)
+            .value_or(0);
+    std::optional<int> InliningCost = Advice->inliningCost();
 
     // Setup the data structure used to plumb customization into the
     // `InlineFunction` routine.
@@ -265,8 +270,20 @@ PreservedAnalyses ModuleInlinerPass::run(Module &M,
               NewCallee = ICB->getCalledFunction();
         }
         if (NewCallee)
-          if (!NewCallee->isDeclaration())
+          if (!NewCallee->isDeclaration()) {
             Calls->push({ICB, NewHistoryID});
+            if (InliningCost && *InliningCost > 0) {
+              // Similar to hot call site thresholds that can cause Inliner to
+              // inline numerous functions causing compile time issues, a linear
+              // accumulator was created to mitigate the problem.
+              Attribute NewCBAdditionalCost = Attribute::get(
+                  M.getContext(),
+                  InlineConstants::FunctionInlineAdditionalCostAttributeName,
+                  itostr(CBInliningAdditionalCost +
+                         (*InliningCost - CBInliningAdditionalCost) / 16));
+              ICB->addFnAttr(NewCBAdditionalCost);
+            }
+          }
       }
     }
 
diff --git a/llvm/test/Transforms/Inline/inline-history-noinline.ll b/llvm/test/Transforms/Inline/inline-history-noinline.ll
index 742bd25ecd9bb9..fbe633fc3c797b 100644
--- a/llvm/test/Transforms/Inline/inline-history-noinline.ll
+++ b/llvm/test/Transforms/Inline/inline-history-noinline.ll
@@ -29,4 +29,4 @@ define internal void @a() {
   ret void
 }
 
-; CHECK: [[NOINLINE]] = { noinline }
+; CHECK: [[NOINLINE]] = { noinline {{.*}}}
diff --git a/llvm/test/Transforms/Inline/inline-hot-callsite-limit.ll b/llvm/test/Transforms/Inline/inline-hot-callsite-limit.ll
new file mode 100644
index 00000000000000..a1730d76cd547c
--- /dev/null
+++ b/llvm/test/Transforms/Inline/inline-hot-callsite-limit.ll
@@ -0,0 +1,109 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; This tests that a hot callsite gets the (higher) inlinehint-threshold even without
+; without inline hints and gets inlined because the cost is less than
+; RUN: opt < %s -passes=inline -inline-threshold=0 -locally-hot-callsite-threshold=30 -S | FileCheck %s
+; RUN: opt < %s -passes=module-inline -inline-threshold=0 -locally-hot-callsite-threshold=30 -S | FileCheck %s
+
+; Due to the hot call site, foo0 inlined foo1, foo2, and foo3,
+; but foo4 is not inlined due to the accumulated cost.
+
+declare void @bar(ptr)
+
+define void @foo0(ptr %p) {
+; CHECK-LABEL: define void @foo0(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT:  [[HEADER:.*:]]
+; CHECK-NEXT:    [[I_I2:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[I_I1:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[I_I:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 4, ptr [[I_I]])
+; CHECK-NEXT:    call void @bar(ptr [[I_I]])
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 4, ptr [[I_I1]])
+; CHECK-NEXT:    call void @bar(ptr [[I_I1]])
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 4, ptr [[I_I2]])
+; CHECK-NEXT:    call void @bar(ptr [[I_I2]])
+; CHECK-NEXT:    call void @foo4(ptr [[P]]) #[[ATTR1:[0-9]+]]
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 4, ptr [[I_I2]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 4, ptr [[I_I1]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 4, ptr [[I_I]])
+; CHECK-NEXT:    br label %[[LOOP]]
+;
+header:
+  br label %loop
+
+loop:
+  call void @foo1(ptr %p)
+  br label %loop
+}
+
+define void @foo1(ptr %p) {
+; CHECK-LABEL: define void @foo1(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT:    [[I:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @bar(ptr [[I]])
+; CHECK-NEXT:    call void @foo2(ptr [[P]])
+; CHECK-NEXT:    ret void
+;
+  %i = alloca i32
+  call void @bar(ptr %i)
+  call void @foo2(ptr %p)
+  ret void
+}
+
+define void @foo2(ptr %p) {
+; CHECK-LABEL: define void @foo2(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT:    [[I:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @bar(ptr [[I]])
+; CHECK-NEXT:    call void @foo3(ptr [[P]])
+; CHECK-NEXT:    ret void
+;
+  %i = alloca i32
+  call void @bar(ptr %i)
+  call void @foo3(ptr %p)
+  ret void
+}
+
+define void @foo3(ptr %p) {
+; CHECK-LABEL: define void @foo3(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT:    [[I:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @bar(ptr [[I]])
+; CHECK-NEXT:    call void @foo4(ptr [[P]])
+; CHECK-NEXT:    ret void
+;
+  %i = alloca i32
+  call void @bar(ptr %i)
+  call void @foo4(ptr %p)
+  ret void
+}
+
+define void @foo4(ptr %p) {
+; CHECK-LABEL: define void @foo4(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT:    [[I:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @bar(ptr [[I]])
+; CHECK-NEXT:    call void @foo5(ptr [[P]])
+; CHECK-NEXT:    ret void
+;
+  %i = alloca i32
+  call void @bar(ptr %i)
+  call void @foo5(ptr %p)
+  ret void
+}
+
+define void @foo5(ptr %p) {
+; CHECK-LABEL: define void @foo5(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT:    [[I:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @bar(ptr [[I]])
+; CHECK-NEXT:    call void @bar(ptr [[I]])
+; CHECK-NEXT:    ret void
+;
+  %i = alloca i32
+  call void @bar(ptr %i)
+  call void @bar(ptr %i)
+  ret void
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/111104


More information about the llvm-commits mailing list