[llvm] Keep function which is directly called or has prologue (PR #88244)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 10 00:57:56 PDT 2024
https://github.com/yubingex007-a11y created https://github.com/llvm/llvm-project/pull/88244
None
>From a30b2a0fe477c945093d41c6f2681e3b749b10f3 Mon Sep 17 00:00:00 2001
From: Bing1 Yu <bing1.yu at intel.com>
Date: Thu, 4 Apr 2024 00:43:56 +0800
Subject: [PATCH 1/3] [bolt] keep only function with prologue
---
bolt/include/bolt/Rewrite/RewriteInstance.h | 2 ++
bolt/lib/Rewrite/RewriteInstance.cpp | 19 +++++++++++++++++++
2 files changed, 21 insertions(+)
diff --git a/bolt/include/bolt/Rewrite/RewriteInstance.h b/bolt/include/bolt/Rewrite/RewriteInstance.h
index 826677cd63b22b..677224624e462e 100644
--- a/bolt/include/bolt/Rewrite/RewriteInstance.h
+++ b/bolt/include/bolt/Rewrite/RewriteInstance.h
@@ -97,6 +97,8 @@ class RewriteInstance {
/// from meta data in the file.
void discoverFileObjects();
+ void keepPrologueFunction();
+
/// Check whether we should use DT_FINI or DT_FINI_ARRAY for instrumentation.
/// DT_FINI is preferred; DT_FINI_ARRAY is only used when no DT_FINI entry was
/// found.
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 0c8ee0d417233b..8830911314d276 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -738,6 +738,7 @@ Error RewriteInstance::run() {
return E;
adjustCommandLineOptions();
discoverFileObjects();
+ keepPrologueFunction();
if (opts::Instrument && !BC->IsStaticExecutable)
if (Error E = discoverRtFiniAddress())
@@ -807,6 +808,24 @@ Error RewriteInstance::run() {
return Error::success();
}
+void RewriteInstance::keepPrologueFunction() {
+ BC->outs() << "keepPrologueFunction!"<<"\n";
+ for(auto &BFI : BC->getBinaryFunctions()) {
+ BinaryFunction &BF = BFI.second;
+ ErrorOr<ArrayRef<uint8_t>> ErrorOrFunctionData = BF.getData();
+ assert(ErrorOrFunctionData && "function data is not available");
+ ArrayRef<uint8_t> IData = *ErrorOrFunctionData;
+ BC->outs() << "Potential Function Entry Point: 0x" << Twine::utohexstr(IData[0]) << "\n";
+ // Check for common function prologue patterns
+ if ((char)IData[0] == '\x55' && (char)IData[1] == '\x48' && (char)IData[2] == '\x89' && (char)IData[3] == '\xe5') {
+ BC->outs() << "Potential Function Entry Point: 0x" << Twine::utohexstr(BF.getAddress()) << "\n";
+ } else {
+ BF.setIgnored();
+ }
+
+ }
+}
+
void RewriteInstance::discoverFileObjects() {
NamedRegionTimer T("discoverFileObjects", "discover file objects",
TimerGroupName, TimerGroupDesc, opts::TimeRewrite);
>From f308f952e6abb0ac182be14acf1dba33949f6ab7 Mon Sep 17 00:00:00 2001
From: "Wang, Qing1" <qing1.wang at intel.com>
Date: Wed, 10 Apr 2024 14:04:31 +0800
Subject: [PATCH 2/3] take endbr64 into consideration
---
bolt/lib/Rewrite/RewriteInstance.cpp | 18 +++++++++++-------
1 file changed, 11 insertions(+), 7 deletions(-)
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 8830911314d276..e4bed40db75839 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -815,13 +815,17 @@ void RewriteInstance::keepPrologueFunction() {
ErrorOr<ArrayRef<uint8_t>> ErrorOrFunctionData = BF.getData();
assert(ErrorOrFunctionData && "function data is not available");
ArrayRef<uint8_t> IData = *ErrorOrFunctionData;
- BC->outs() << "Potential Function Entry Point: 0x" << Twine::utohexstr(IData[0]) << "\n";
- // Check for common function prologue patterns
- if ((char)IData[0] == '\x55' && (char)IData[1] == '\x48' && (char)IData[2] == '\x89' && (char)IData[3] == '\xe5') {
- BC->outs() << "Potential Function Entry Point: 0x" << Twine::utohexstr(BF.getAddress()) << "\n";
- } else {
- BF.setIgnored();
- }
+ // BC->outs() << "Potential Function Entry Point: 0x" << Twine::utohexstr(IData[0]) << "\n";
+ // Check for common function prologue patterns
+ // push %rbp
+ // mov %rsp %rbp
+ if (BF.getSize()>=4 && (char)IData[0] == '\x55' && (char)IData[1] == '\x48' && (char)IData[2] == '\x89' && (char)IData[3] == '\xe5') {
+ LLVM_DEBUG(dbgs() << "Find function with prologue in: 0x" << Twine::utohexstr(BF.getAddress()) << "\n");
+ } else if (BF.getSize()>=4 && (char)IData[0] == '\xf3' && (char)IData[1] == '\x0f' && (char)IData[2] == '\x1e' && (char)IData[3] == '\xfa') {
+ LLVM_DEBUG(dbgs() << "Find function with prologue in: 0x" << Twine::utohexstr(BF.getAddress()) << "\n");
+ } else {
+ BF.setIgnored();
+ }
}
}
>From a846f4f3f4089c516a41db3b64d4cf750bd505ff Mon Sep 17 00:00:00 2001
From: "Wang, Qing1" <qing1.wang at intel.com>
Date: Wed, 10 Apr 2024 15:51:40 +0800
Subject: [PATCH 3/3] keep function which is direct called and has prologue
---
bolt/include/bolt/Core/BinaryFunction.h | 2 ++
bolt/include/bolt/Rewrite/RewriteInstance.h | 2 ++
bolt/lib/Core/BinaryFunction.cpp | 8 ++++-
bolt/lib/Rewrite/RewriteInstance.cpp | 36 +++++++++++++++++++--
4 files changed, 44 insertions(+), 4 deletions(-)
diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
index bc047fefa3151c..cd6ded17e42f63 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -208,6 +208,8 @@ class BinaryFunction {
/// Mark injected functions
bool IsInjected = false;
+ bool IsDirectCalled =false;
+
using LSDATypeTableTy = SmallVector<uint64_t, 0>;
/// List of DWARF CFI instructions. Original CFI from the binary must be
diff --git a/bolt/include/bolt/Rewrite/RewriteInstance.h b/bolt/include/bolt/Rewrite/RewriteInstance.h
index 677224624e462e..d5a3ebfb775173 100644
--- a/bolt/include/bolt/Rewrite/RewriteInstance.h
+++ b/bolt/include/bolt/Rewrite/RewriteInstance.h
@@ -99,6 +99,8 @@ class RewriteInstance {
void keepPrologueFunction();
+ void keepDCPFunction();
+
/// Check whether we should use DT_FINI or DT_FINI_ARRAY for instrumentation.
/// DT_FINI is preferred; DT_FINI_ARRAY is only used when no DT_FINI entry was
/// found.
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index 1fa96dfaabde81..2f7e6a8d3693bf 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -1268,7 +1268,13 @@ Error BinaryFunction::disassemble() {
bool IsCall = MIB->isCall(Instruction);
const bool IsCondBranch = MIB->isConditionalBranch(Instruction);
MCSymbol *TargetSymbol = nullptr;
-
+ if(IsCall) {
+ if (BinaryFunction *TargetFunc = BC.getBinaryFunctionContainingAddress(TargetAddress)) {
+ // direct call here.
+ TargetFunc->IsDirectCalled = true;
+ LLVM_DEBUG(dbgs() << "[Disasm] Find function which is directly called: 0x" << Twine::utohexstr(TargetFunc->getAddress()) << "\n");
+ }
+ }
if (BC.MIB->isUnsupportedBranch(Instruction)) {
setIgnored();
if (BinaryFunction *TargetFunc =
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index e4bed40db75839..a0768ba870f8a5 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -738,7 +738,7 @@ Error RewriteInstance::run() {
return E;
adjustCommandLineOptions();
discoverFileObjects();
- keepPrologueFunction();
+ // keepPrologueFunction();
if (opts::Instrument && !BC->IsStaticExecutable)
if (Error E = discoverRtFiniAddress())
@@ -766,6 +766,8 @@ Error RewriteInstance::run() {
disassembleFunctions();
+ keepDCPFunction();
+
processMetadataPreCFG();
buildFunctionsCFG();
@@ -808,6 +810,34 @@ Error RewriteInstance::run() {
return Error::success();
}
+void RewriteInstance::keepDCPFunction() {
+ BC->outs() << "keepDCPFunction!"<<"\n";
+ for(auto &BFI : BC->getBinaryFunctions()) {
+ BinaryFunction &BF = BFI.second;
+ bool hasPrologue =false;
+ ErrorOr<ArrayRef<uint8_t>> ErrorOrFunctionData = BF.getData();
+ assert(ErrorOrFunctionData && "function data is not available");
+ ArrayRef<uint8_t> IData = *ErrorOrFunctionData;
+ // BC->outs() << "Potential Function Entry Point: 0x" << Twine::utohexstr(IData[0]) << "\n";
+ // Check for common function prologue patterns
+ // push %rbp
+ // mov %rsp %rbp
+ if (BF.getSize()>=4 && (char)IData[0] == '\x55' && (char)IData[1] == '\x48' && (char)IData[2] == '\x89' && (char)IData[3] == '\xe5') {
+ LLVM_DEBUG(dbgs() << "[keepDCPFunction] Find function with prologue in: 0x" << Twine::utohexstr(BF.getAddress()) << "\n");
+ hasPrologue =true;
+ } else if (BF.getSize()>=4 && (char)IData[0] == '\xf3' && (char)IData[1] == '\x0f' && (char)IData[2] == '\x1e' && (char)IData[3] == '\xfa') {
+ LLVM_DEBUG(dbgs() << "[keepDCPFunction] Find function with prologue in: 0x" << Twine::utohexstr(BF.getAddress()) << "\n");
+ hasPrologue= true;
+ } else {
+ hasPrologue = false;
+ }
+ LLVM_DEBUG(
+ if(BF.IsDirectCalled) dbgs() << "[keepDCPFunction] Find function which is directly called: 0x" << Twine::utohexstr(BF.getAddress()) << "\n"
+ );
+ if(!BF.IsDirectCalled && !hasPrologue )
+ BF.setIgnored();
+ }
+}
void RewriteInstance::keepPrologueFunction() {
BC->outs() << "keepPrologueFunction!"<<"\n";
for(auto &BFI : BC->getBinaryFunctions()) {
@@ -820,9 +850,9 @@ void RewriteInstance::keepPrologueFunction() {
// push %rbp
// mov %rsp %rbp
if (BF.getSize()>=4 && (char)IData[0] == '\x55' && (char)IData[1] == '\x48' && (char)IData[2] == '\x89' && (char)IData[3] == '\xe5') {
- LLVM_DEBUG(dbgs() << "Find function with prologue in: 0x" << Twine::utohexstr(BF.getAddress()) << "\n");
+ LLVM_DEBUG(dbgs() << "[keepPrologueFunction] Find function with prologue in: 0x" << Twine::utohexstr(BF.getAddress()) << "\n");
} else if (BF.getSize()>=4 && (char)IData[0] == '\xf3' && (char)IData[1] == '\x0f' && (char)IData[2] == '\x1e' && (char)IData[3] == '\xfa') {
- LLVM_DEBUG(dbgs() << "Find function with prologue in: 0x" << Twine::utohexstr(BF.getAddress()) << "\n");
+ LLVM_DEBUG(dbgs() << "[keepPrologueFunction] Find function with prologue in: 0x" << Twine::utohexstr(BF.getAddress()) << "\n");
} else {
BF.setIgnored();
}
More information about the llvm-commits
mailing list