[llvm-branch-commits] [BOLT] ICF-aware Indirect Call Promotion (PR #120493)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Dec 18 15:28:25 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-bolt
Author: Amir Ayupov (aaupov)
<details>
<summary>Changes</summary>
Indirect Call Promotion used to consider individual symbols in the call
profile. However, with ICF enabled, some symbols might get folded into
one function. Indirect Call Promotion should accumulate their counts to
check against the threshold and generate checks accordingly, one for
unique function.
Test Plan: added bolt/test/X86/icf-aware-icp.s
---
Full diff: https://github.com/llvm/llvm-project/pull/120493.diff
3 Files Affected:
- (modified) bolt/include/bolt/Passes/IndirectCallPromotion.h (+11)
- (modified) bolt/lib/Passes/IndirectCallPromotion.cpp (+16-2)
- (added) bolt/test/X86/icf-aware-icp.s (+56)
``````````diff
diff --git a/bolt/include/bolt/Passes/IndirectCallPromotion.h b/bolt/include/bolt/Passes/IndirectCallPromotion.h
index 8ec160b867cf8ce..6f5f3532fc82bc5 100644
--- a/bolt/include/bolt/Passes/IndirectCallPromotion.h
+++ b/bolt/include/bolt/Passes/IndirectCallPromotion.h
@@ -108,6 +108,9 @@ class IndirectCallPromotion : public BinaryFunctionPass {
Location() {}
explicit Location(MCSymbol *Sym) : Sym(Sym) {}
explicit Location(uint64_t Addr) : Addr(Addr) {}
+ bool operator==(const Location &O) {
+ return Sym == O.Sym || Addr == O.Addr;
+ }
};
struct Callsite {
@@ -123,6 +126,14 @@ class IndirectCallPromotion : public BinaryFunctionPass {
uint64_t Branches, uint64_t JTIndex)
: From(From), To(To), Mispreds(Mispreds), Branches(Branches),
JTIndices(1, JTIndex) {}
+ // Increment the current Callsite counts with another, used for merging
+ // targets.
+ Callsite &operator+=(const Callsite &O) {
+ assert(From == O.From);
+ Mispreds += O.Mispreds;
+ Branches += O.Branches;
+ return *this;
+ }
};
std::unordered_set<const BinaryFunction *> Modified;
diff --git a/bolt/lib/Passes/IndirectCallPromotion.cpp b/bolt/lib/Passes/IndirectCallPromotion.cpp
index 2b5a591f4c7a22f..aad11adb9697294 100644
--- a/bolt/lib/Passes/IndirectCallPromotion.cpp
+++ b/bolt/lib/Passes/IndirectCallPromotion.cpp
@@ -317,11 +317,25 @@ IndirectCallPromotion::getCallTargets(BinaryBasicBlock &BB,
const auto ICSP = BC.MIB->tryGetAnnotationAs<IndirectCallSiteProfile>(
Inst, "CallProfile");
if (ICSP) {
+ // Deduplicate aliases by using function + entry id as a key type.
+ using FuncEntryTy = std::pair<const BinaryFunction *, uint64_t>;
+ std::map<FuncEntryTy, Callsite> FuncToCallsite;
for (const IndirectCallProfile &CSP : ICSP.get()) {
Callsite Site(BF, CSP);
- if (Site.isValid())
- Targets.emplace_back(std::move(Site));
+ if (!Site.isValid())
+ continue;
+
+ uint64_t EntryDesc = 0;
+ const BinaryFunction *Func =
+ BC.getFunctionForSymbol(CSP.Symbol, &EntryDesc);
+
+ auto [It, Success] =
+ FuncToCallsite.try_emplace(std::make_pair(Func, EntryDesc), Site);
+ if (!Success)
+ It->second += Site;
}
+ for (Callsite Site : llvm::make_second_range(FuncToCallsite))
+ Targets.emplace_back(std::move(Site));
}
}
diff --git a/bolt/test/X86/icf-aware-icp.s b/bolt/test/X86/icf-aware-icp.s
new file mode 100644
index 000000000000000..ca4dba58806c3cb
--- /dev/null
+++ b/bolt/test/X86/icf-aware-icp.s
@@ -0,0 +1,56 @@
+## Check that ICP recognizes functions folded by ICF and inserts a single check
+
+# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o
+# RUN: link_fdata %s %t.o %t.fdata
+# RUN: llvm-strip --strip-unneeded %t.o
+# RUN: ld.lld -q -o %t %t.o
+
+# Without ICF, ICP should not be performed:
+# RUN: llvm-bolt %t -o %t.bolt1 --icp=calls --icp-calls-topn=1 --print-icp \
+# RUN: --icp-calls-total-percent-threshold=90 \
+# RUN: --data %t.fdata | FileCheck %s --check-prefix=CHECK-NO-ICF
+
+# CHECK-NO-ICF: ICP percentage of indirect callsites that are optimized = 0.0%
+
+# With ICF, ICP should be performed:
+# RUN: llvm-bolt %t -o %t.bolt1 --icp=calls --icp-calls-topn=1 --print-icp \
+# RUN: --icp-calls-total-percent-threshold=90 \
+# RUN: --data %t.fdata --icf | FileCheck %s --check-prefix=CHECK-ICF
+
+# CHECK-ICF: ICP percentage of indirect callsites that are optimized = 100.0%
+# CHECK-ICF: Binary Function "main" after indirect-call-promotion
+# CHECK-ICF: callq bar
+
+ .globl bar
+bar:
+ imull $0x64, %edi, %eax
+ addl $0x2a, %eax
+ retq
+.size bar, .-bar
+
+ .globl foo
+foo:
+ imull $0x64, %edi, %eax
+ addl $0x2a, %eax
+ retq
+.size foo, .-foo
+
+ .globl main
+main:
+ pushq %rax
+ movslq %edi, %rax
+ leaq funcs(%rip), %rcx
+ xorl %edi, %edi
+LBB00_br:
+ callq *(%rcx,%rax,8)
+# FDATA: 1 main #LBB00_br# 1 foo 0 0 2
+# FDATA: 1 main #LBB00_br# 1 bar 0 0 2
+ popq %rcx
+ retq
+.size main, .-main
+
+ .section .rodata
+ .globl funcs
+funcs:
+ .quad foo
+ .quad bar
``````````
</details>
https://github.com/llvm/llvm-project/pull/120493
More information about the llvm-branch-commits
mailing list