[llvm] a740b70 - [llvm][Inliner] Add an optional PriorityInlineOrder
Liqiang Tao via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 18 02:00:39 PDT 2021
Author: Liqiang Tao
Date: 2021-06-18T16:55:38+08:00
New Revision: a740b707d1937621dcf56579001bcad87b81724f
URL: https://github.com/llvm/llvm-project/commit/a740b707d1937621dcf56579001bcad87b81724f
DIFF: https://github.com/llvm/llvm-project/commit/a740b707d1937621dcf56579001bcad87b81724f.diff
LOG: [llvm][Inliner] Add an optional PriorityInlineOrder
This patch adds an optional PriorityInlineOrder, which uses the heap to order inlining.
The callsite which size is smaller would have a higher priority.
Reviewed By: mtrofin
Differential Revision: https://reviews.llvm.org/D104028
Added:
Modified:
llvm/lib/Transforms/IPO/Inliner.cpp
llvm/test/Transforms/Inline/inline_call.ll
llvm/test/Transforms/Inline/inline_invoke.ll
llvm/test/Transforms/Inline/last-callsite.ll
llvm/test/Transforms/Inline/monster_scc.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp
index 38cf2fe30090d..3a8998f0ff8db 100644
--- a/llvm/lib/Transforms/IPO/Inliner.cpp
+++ b/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -99,6 +99,10 @@ static cl::opt<std::string> CGSCCInlineReplayFile(
"by inlining from cgscc inline remarks."),
cl::Hidden);
+static cl::opt<bool> InlineEnablePriorityOrder(
+ "inline-enable-priority-order", cl::Hidden, cl::init(false),
+ cl::desc("Enable the priority inline order for the inliner"));
+
LegacyInlinerBase::LegacyInlinerBase(char &ID) : CallGraphSCCPass(ID) {}
LegacyInlinerBase::LegacyInlinerBase(char &ID, bool InsertLifetime)
@@ -673,6 +677,7 @@ InlinerPass::getAdvisor(const ModuleAnalysisManagerCGSCCProxy::Result &MAM,
template <typename T> class InlineOrder {
public:
using reference = T &;
+ using const_reference = const T &;
virtual ~InlineOrder() {}
@@ -680,9 +685,9 @@ template <typename T> class InlineOrder {
virtual void push(const T &Elt) = 0;
- virtual void pop() = 0;
+ virtual T pop() = 0;
- virtual reference front() = 0;
+ virtual const_reference front() = 0;
virtual void erase_if(function_ref<bool(T)> Pred) = 0;
@@ -692,18 +697,19 @@ template <typename T> class InlineOrder {
template <typename T, typename Container = SmallVector<T, 16>>
class DefaultInlineOrder : public InlineOrder<T> {
using reference = T &;
+ using const_reference = const T &;
public:
size_t size() override { return Calls.size() - FirstIndex; }
void push(const T &Elt) override { Calls.push_back(Elt); }
- void pop() override {
+ T pop() override {
assert(size() > 0);
- FirstIndex++;
+ return Calls[FirstIndex++];
}
- reference front() override {
+ const_reference front() override {
assert(size() > 0);
return Calls[FirstIndex];
}
@@ -718,6 +724,57 @@ class DefaultInlineOrder : public InlineOrder<T> {
size_t FirstIndex = 0;
};
+class PriorityInlineOrder : public InlineOrder<std::pair<CallBase *, int>> {
+ using T = std::pair<CallBase *, int>;
+ using reference = T &;
+ using const_reference = const T &;
+
+ static bool cmp(const T &P1, const T &P2) { return P1.second > P2.second; }
+
+ int evaluate(CallBase *CB) {
+ Function *Callee = CB->getCalledFunction();
+ return (int)Callee->getInstructionCount();
+ }
+
+public:
+ size_t size() override { return Heap.size(); }
+
+ void push(const T &Elt) override {
+ CallBase *CB = Elt.first;
+ const int InlineHistoryID = Elt.second;
+ const int Goodness = evaluate(CB);
+
+ Heap.push_back({CB, Goodness});
+ std::push_heap(Heap.begin(), Heap.end(), cmp);
+ InlineHistoryMap[CB] = InlineHistoryID;
+ }
+
+ T pop() override {
+ assert(size() > 0);
+ CallBase *CB = Heap.front().first;
+ T Result = std::make_pair(CB, InlineHistoryMap[CB]);
+ InlineHistoryMap.erase(CB);
+ std::pop_heap(Heap.begin(), Heap.end(), cmp);
+ Heap.pop_back();
+ return Result;
+ }
+
+ const_reference front() override {
+ assert(size() > 0);
+ CallBase *CB = Heap.front().first;
+ return *InlineHistoryMap.find(CB);
+ }
+
+ void erase_if(function_ref<bool(T)> Pred) override {
+ Heap.erase(std::remove_if(Heap.begin(), Heap.end(), Pred), Heap.end());
+ std::make_heap(Heap.begin(), Heap.end(), cmp);
+ }
+
+private:
+ SmallVector<T, 16> Heap;
+ DenseMap<CallBase *, int> InlineHistoryMap;
+};
+
PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
CGSCCAnalysisManager &AM, LazyCallGraph &CG,
CGSCCUpdateResult &UR) {
@@ -740,7 +797,8 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// We use a single common worklist for calls across the entire SCC. We
// process these in-order and append new calls introduced during inlining to
- // the end.
+ // the end. The PriorityInlineOrder is optional here, in which the smaller
+ // callee would have a higher priority to inline.
//
// Note that this particular order of processing is actually critical to
// avoid very bad behaviors. Consider *highly connected* call graphs where
@@ -762,7 +820,12 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// this model, but it is uniformly spread across all the functions in the SCC
// and eventually they all become too large to inline, rather than
// incrementally maknig a single function grow in a super linear fashion.
- DefaultInlineOrder<std::pair<CallBase *, int>> Calls;
+ std::unique_ptr<InlineOrder<std::pair<CallBase *, int>>> Calls;
+ if (InlineEnablePriorityOrder)
+ Calls = std::make_unique<PriorityInlineOrder>();
+ else
+ Calls = std::make_unique<DefaultInlineOrder<std::pair<CallBase *, int>>>();
+ assert(Calls != nullptr && "Expected an initialized InlineOrder");
// Populate the initial list of calls in this SCC.
for (auto &N : InitialC) {
@@ -777,7 +840,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
if (auto *CB = dyn_cast<CallBase>(&I))
if (Function *Callee = CB->getCalledFunction()) {
if (!Callee->isDeclaration())
- Calls.push({CB, -1});
+ Calls->push({CB, -1});
else if (!isa<IntrinsicInst>(I)) {
using namespace ore;
setInlineRemark(*CB, "unavailable definition");
@@ -791,7 +854,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
}
}
}
- if (Calls.empty())
+ if (Calls->empty())
return PreservedAnalyses::all();
// Capture updatable variable for the current SCC.
@@ -813,15 +876,15 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
SmallVector<Function *, 4> DeadFunctions;
// Loop forward over all of the calls.
- while (!Calls.empty()) {
+ while (!Calls->empty()) {
// We expect the calls to typically be batched with sequences of calls that
// have the same caller, so we first set up some shared infrastructure for
// this caller. We also do any pruning we can at this layer on the caller
// alone.
- Function &F = *Calls.front().first->getCaller();
+ Function &F = *Calls->front().first->getCaller();
LazyCallGraph::Node &N = *CG.lookup(F);
if (CG.lookupSCC(N) != C) {
- Calls.pop();
+ Calls->pop();
continue;
}
@@ -837,9 +900,8 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// We bail out as soon as the caller has to change so we can update the
// call graph and prepare the context of that new caller.
bool DidInline = false;
- while (!Calls.empty() && Calls.front().first->getCaller() == &F) {
- auto &P = Calls.front();
- Calls.pop();
+ while (!Calls->empty() && Calls->front().first->getCaller() == &F) {
+ auto P = Calls->pop();
CallBase *CB = P.first;
const int InlineHistoryID = P.second;
Function &Callee = *CB->getCalledFunction();
@@ -909,7 +971,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
}
if (NewCallee)
if (!NewCallee->isDeclaration())
- Calls.push({ICB, NewHistoryID});
+ Calls->push({ICB, NewHistoryID});
}
}
@@ -926,7 +988,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// made dead by this operation on other functions).
Callee.removeDeadConstantUsers();
if (Callee.use_empty() && !CG.isLibFunction(Callee)) {
- Calls.erase_if([&](const std::pair<CallBase *, int> &Call) {
+ Calls->erase_if([&](const std::pair<CallBase *, int> &Call) {
return Call.first->getCaller() == &Callee;
});
// Clear the body and queue the function itself for deletion when we
diff --git a/llvm/test/Transforms/Inline/inline_call.ll b/llvm/test/Transforms/Inline/inline_call.ll
index fb000f0c805f2..9a21607ec01c3 100644
--- a/llvm/test/Transforms/Inline/inline_call.ll
+++ b/llvm/test/Transforms/Inline/inline_call.ll
@@ -1,5 +1,6 @@
; Check the optimizer doesn't crash at inlining the function top and all of its callees are inlined.
; RUN: opt < %s -O3 -S | FileCheck %s
+; RUN: opt < %s -O3 -inline-enable-priority-order=true -S | FileCheck %s
define dso_local void (...)* @second(i8** %p) {
entry:
diff --git a/llvm/test/Transforms/Inline/inline_invoke.ll b/llvm/test/Transforms/Inline/inline_invoke.ll
index 8899f2732a4ce..f6f4357a9e927 100644
--- a/llvm/test/Transforms/Inline/inline_invoke.ll
+++ b/llvm/test/Transforms/Inline/inline_invoke.ll
@@ -1,5 +1,6 @@
; RUN: opt < %s -inline -S | FileCheck %s
; RUN: opt < %s -passes='cgscc(inline)' -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(inline)' -inline-enable-priority-order=true -S | FileCheck %s
; Test that the inliner correctly handles inlining into invoke sites
; by appending selectors and forwarding _Unwind_Resume directly to the
diff --git a/llvm/test/Transforms/Inline/last-callsite.ll b/llvm/test/Transforms/Inline/last-callsite.ll
index 8ec53d0b6ffec..368a1c2b2a1f9 100644
--- a/llvm/test/Transforms/Inline/last-callsite.ll
+++ b/llvm/test/Transforms/Inline/last-callsite.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -passes='cgscc(inline)' -inline-threshold=0 -S | FileCheck %s
+; RUN: opt < %s -passes='cgscc(inline)' -inline-threshold=0 -inline-enable-priority-order=true -S | FileCheck %s
; The 'test1_' prefixed functions test the basic 'last callsite' inline
; threshold adjustment where we specifically inline the last call site of an
diff --git a/llvm/test/Transforms/Inline/monster_scc.ll b/llvm/test/Transforms/Inline/monster_scc.ll
index b4e45e06570cb..fab773f64b81c 100644
--- a/llvm/test/Transforms/Inline/monster_scc.ll
+++ b/llvm/test/Transforms/Inline/monster_scc.ll
@@ -41,7 +41,9 @@
;
; RUN: opt -S < %s -inline -inline-threshold=150 -enable-new-pm=0 | FileCheck %s --check-prefixes=CHECK,OLD
; RUN: opt -S < %s -passes=inline -inline-threshold=150 | FileCheck %s --check-prefixes=CHECK,NEW
+; RUN: opt -S < %s -passes=inline -inline-threshold=150 -inline-enable-priority-order=true | FileCheck %s --check-prefixes=CHECK,PO
; RUN: opt -S < %s -passes=inliner-wrapper -inline-threshold=150 | FileCheck %s --check-prefixes=CHECK,NEW
+; RUN: opt -S < %s -passes=inliner-wrapper -inline-threshold=150 -inline-enable-priority-order=true | FileCheck %s --check-prefixes=CHECK,PO
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
@@ -68,6 +70,21 @@ declare void @_Z1gi(i32)
; NEW-NOT: call
; NEW: call void @_Z1fILb0ELi2EEvPbS0_(
; NEW-NOT: call
+; PO-NOT: call
+; PO: call void @_Z1fILb1ELi1EEvPbS0_(
+; PO-NOT: call
+; PO: call void @_Z1fILb1ELi2EEvPbS0_(
+; PO-NOT: call
+; PO: call void @_Z1fILb1ELi3EEvPbS0_(
+; PO-NOT: call
+; PO: call void @_Z1gi(
+; PO-NOT: call
+; PO: call void @_Z1fILb1ELi1EEvPbS0_(
+; PO-NOT: call
+; PO: call void @_Z1fILb0ELi1EEvPbS0_(
+; PO-NOT: call
+; PO: call void @_Z1fILb0ELi0EEvPbS0_(
+; PO-NOT: call
define void @_Z1fILb0ELi0EEvPbS0_(i8* %B, i8* %E) {
entry:
%cmp = icmp eq i8* %B, %E
@@ -112,6 +129,13 @@ if.end3:
; NEW-NOT: call
; NEW: call void @_Z1fILb0ELi2EEvPbS0_(
; NEW-NOT: call
+; PO-NOT: call
+; PO: call void @_Z1gi(
+; PO-NOT: call
+; PO: call void @_Z1fILb1ELi1EEvPbS0_(
+; PO-NOT: call
+; PO: call void @_Z1fILb0ELi1EEvPbS0_(
+; PO-NOT: call
define void @_Z1fILb1ELi0EEvPbS0_(i8* %B, i8* %E) {
entry:
call void @_Z1gi(i32 0)
@@ -159,6 +183,19 @@ if.end3:
; NEW-NOT: call
; NEW: call void @_Z1fILb0ELi3EEvPbS0_(
; NEW-NOT: call
+; PO-NOT: call
+; PO: call void @_Z1fILb1ELi2EEvPbS0_(
+; PO-NOT: call
+; PO: call void @_Z1fILb1ELi3EEvPbS0_(
+; PO-NOT: call
+; PO: call void @_Z1gi(
+; PO-NOT: call
+; PO: call void @_Z1fILb1ELi1EEvPbS0_(
+; PO-NOT: call
+; PO: call void @_Z1fILb0ELi1EEvPbS0_(
+; PO-NOT: call
+; PO: call void @_Z1fILb0ELi0EEvPbS0_(
+; PO-NOT: call
define void @_Z1fILb0ELi1EEvPbS0_(i8* %B, i8* %E) {
entry:
%cmp = icmp eq i8* %B, %E
@@ -203,6 +240,21 @@ if.end3:
; NEW-NOT: call
; NEW: call void @_Z1fILb0ELi3EEvPbS0_(
; NEW-NOT: call
+; PO-NOT: call
+; PO: call void @_Z1gi(
+; PO-NOT: call
+; PO: call void @_Z1fILb1ELi2EEvPbS0_(
+; PO-NOT: call
+; PO: call void @_Z1fILb1ELi3EEvPbS0_(
+; PO-NOT: call
+; PO: call void @_Z1gi(
+; PO-NOT: call
+; PO: call void @_Z1fILb1ELi1EEvPbS0_(
+; PO-NOT: call
+; PO: call void @_Z1fILb0ELi1EEvPbS0_(
+; PO-NOT: call
+; PO: call void @_Z1fILb0ELi0EEvPbS0_(
+; PO-NOT: call
define void @_Z1fILb1ELi1EEvPbS0_(i8* %B, i8* %E) {
entry:
call void @_Z1gi(i32 1)
@@ -251,6 +303,17 @@ if.end3:
; NEW-NOT: call
; NEW: call void @_Z1fILb0ELi4EEvPbS0_(
; NEW-NOT: call
+; PO-NOT: call
+; PO: call void @_Z1fILb1ELi3EEvPbS0_(
+; PO-NOT: call
+; PO: call void @_Z1gi(
+; PO-NOT: call
+; PO: call void @_Z1fILb1ELi1EEvPbS0_(
+; PO-NOT: call
+; PO: call void @_Z1fILb0ELi1EEvPbS0_(
+; PO-NOT: call
+; PO: call void @_Z1fILb0ELi0EEvPbS0_(
+; PO-NOT: call
define void @_Z1fILb0ELi2EEvPbS0_(i8* %B, i8* %E) {
entry:
%cmp = icmp eq i8* %B, %E
@@ -301,6 +364,19 @@ if.end3:
; NEW-NOT: call
; NEW: call void @_Z1fILb0ELi4EEvPbS0_(
; NEW-NOT: call
+; PO-NOT: call
+; PO: call void @_Z1gi(
+; PO-NOT: call
+; PO: call void @_Z1fILb1ELi3EEvPbS0_(
+; PO-NOT: call
+; PO: call void @_Z1gi(
+; PO-NOT: call
+; PO: call void @_Z1fILb1ELi1EEvPbS0_(
+; PO-NOT: call
+; PO: call void @_Z1fILb0ELi1EEvPbS0_(
+; PO-NOT: call
+; PO: call void @_Z1fILb0ELi0EEvPbS0_(
+; PO-NOT: call
define void @_Z1fILb1ELi2EEvPbS0_(i8* %B, i8* %E) {
entry:
call void @_Z1gi(i32 2)
@@ -340,6 +416,15 @@ if.end3:
; NEW-NOT: call
; NEW: call void @_Z1fILb0ELi0EEvPbS0_(
; NEW-NOT: call
+; PO-NOT: call
+; PO: call void @_Z1gi(
+; PO-NOT: call
+; PO: call void @_Z1fILb1ELi1EEvPbS0_(
+; PO-NOT: call
+; PO: call void @_Z1fILb0ELi1EEvPbS0_(
+; PO-NOT: call
+; PO: call void @_Z1fILb0ELi0EEvPbS0_(
+; PO-NOT: call
define void @_Z1fILb0ELi3EEvPbS0_(i8* %B, i8* %E) {
entry:
%cmp = icmp eq i8* %B, %E
@@ -364,13 +449,35 @@ if.end3:
}
; CHECK-LABEL: define void @_Z1fILb1ELi3EEvPbS0_(
-; CHECK-NOT: call
-; CHECK: call void @_Z1gi(
-; CHECK-NOT: call
-; CHECK: call void @_Z1fILb1ELi0EEvPbS0_(
-; CHECK-NOT: call
-; CHECK: call void @_Z1fILb0ELi0EEvPbS0_(
-; CHECK-NOT: call
+; OLD-NOT: call
+; OLD: call void @_Z1gi(
+; OLD-NOT: call
+; OLD: call void @_Z1fILb1ELi0EEvPbS0_(
+; OLD-NOT: call
+; OLD: call void @_Z1fILb0ELi0EEvPbS0_(
+; OLD-NOT: call
+; NEW-NOT: call
+; NEW: call void @_Z1gi(
+; NEW-NOT: call
+; NEW: call void @_Z1fILb1ELi0EEvPbS0_(
+; NEW-NOT: call
+; NEW: call void @_Z1fILb0ELi0EEvPbS0_(
+; NEW-NOT: call
+; PO-NOT: call
+; PO: call void @_Z1gi(
+; PO-NOT: call
+; PO: call void @_Z1gi(
+; PO-NOT: call
+; PO: call void @_Z1fILb1ELi1EEvPbS0_(
+; PO-NOT: call
+; PO: call void @_Z1fILb0ELi1EEvPbS0_(
+; PO-NOT: call
+; PO: call void @_Z1fILb1ELi1EEvPbS0_(
+; PO-NOT: call
+; PO: call void @_Z1fILb1ELi2EEvPbS0_(
+; PO-NOT: call
+; PO: call void @_Z1fILb0ELi2EEvPbS0_(
+; PO-NOT: call
define void @_Z1fILb1ELi3EEvPbS0_(i8* %B, i8* %E) {
entry:
call void @_Z1gi(i32 3)
@@ -416,6 +523,13 @@ entry:
; NEW-NOT: call
; NEW: call void @_Z1fILb0ELi1EEvPbS0_(
; NEW-NOT: call
+; PO-NOT: call
+; PO: call void @_Z1gi(
+; PO-NOT: call
+; PO: call void @_Z1fILb1ELi1EEvPbS0_(
+; PO-NOT: call
+; PO: call void @_Z1fILb0ELi1EEvPbS0_(
+; PO-NOT: call
define void @_Z1fILb1ELi4EEvPbS0_(i8* %B, i8* %E) {
entry:
call void @_Z1fILb1ELi0EEvPbS0_(i8* %B, i8* %E)
More information about the llvm-commits
mailing list