[llvm] Keep function which is directly called or has prologue (PR #88244)

via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 10 00:57:56 PDT 2024


https://github.com/yubingex007-a11y created https://github.com/llvm/llvm-project/pull/88244

None

>From a30b2a0fe477c945093d41c6f2681e3b749b10f3 Mon Sep 17 00:00:00 2001
From: Bing1 Yu <bing1.yu at intel.com>
Date: Thu, 4 Apr 2024 00:43:56 +0800
Subject: [PATCH 1/3] [bolt] keep only function with prologue

---
 bolt/include/bolt/Rewrite/RewriteInstance.h |  2 ++
 bolt/lib/Rewrite/RewriteInstance.cpp        | 19 +++++++++++++++++++
 2 files changed, 21 insertions(+)

diff --git a/bolt/include/bolt/Rewrite/RewriteInstance.h b/bolt/include/bolt/Rewrite/RewriteInstance.h
index 826677cd63b22b..677224624e462e 100644
--- a/bolt/include/bolt/Rewrite/RewriteInstance.h
+++ b/bolt/include/bolt/Rewrite/RewriteInstance.h
@@ -97,6 +97,8 @@ class RewriteInstance {
   /// from meta data in the file.
   void discoverFileObjects();
 
+  void keepPrologueFunction();
+
   /// Check whether we should use DT_FINI or DT_FINI_ARRAY for instrumentation.
   /// DT_FINI is preferred; DT_FINI_ARRAY is only used when no DT_FINI entry was
   /// found.
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 0c8ee0d417233b..8830911314d276 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -738,6 +738,7 @@ Error RewriteInstance::run() {
     return E;
   adjustCommandLineOptions();
   discoverFileObjects();
+  keepPrologueFunction();
 
   if (opts::Instrument && !BC->IsStaticExecutable)
     if (Error E = discoverRtFiniAddress())
@@ -807,6 +808,24 @@ Error RewriteInstance::run() {
   return Error::success();
 }
 
+void RewriteInstance::keepPrologueFunction() {
+ BC->outs() << "keepPrologueFunction!"<<"\n";
+  for(auto &BFI : BC->getBinaryFunctions()) {
+    BinaryFunction &BF = BFI.second;
+    ErrorOr<ArrayRef<uint8_t>> ErrorOrFunctionData = BF.getData();
+    assert(ErrorOrFunctionData && "function data is not available");
+    ArrayRef<uint8_t> IData = *ErrorOrFunctionData;
+      BC->outs()  << "Potential Function Entry Point: 0x" << Twine::utohexstr(IData[0]) << "\n";
+      // Check for common function prologue patterns
+      if ((char)IData[0] == '\x55' && (char)IData[1] == '\x48' && (char)IData[2] == '\x89' && (char)IData[3] == '\xe5') {
+          BC->outs()  << "Potential Function Entry Point: 0x" << Twine::utohexstr(BF.getAddress()) << "\n";
+      } else {
+        BF.setIgnored();
+      }
+
+  }
+}
+
 void RewriteInstance::discoverFileObjects() {
   NamedRegionTimer T("discoverFileObjects", "discover file objects",
                      TimerGroupName, TimerGroupDesc, opts::TimeRewrite);

>From f308f952e6abb0ac182be14acf1dba33949f6ab7 Mon Sep 17 00:00:00 2001
From: "Wang, Qing1" <qing1.wang at intel.com>
Date: Wed, 10 Apr 2024 14:04:31 +0800
Subject: [PATCH 2/3] take endbr64 into consideration

---
 bolt/lib/Rewrite/RewriteInstance.cpp | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 8830911314d276..e4bed40db75839 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -815,13 +815,17 @@ void RewriteInstance::keepPrologueFunction() {
     ErrorOr<ArrayRef<uint8_t>> ErrorOrFunctionData = BF.getData();
     assert(ErrorOrFunctionData && "function data is not available");
     ArrayRef<uint8_t> IData = *ErrorOrFunctionData;
-      BC->outs()  << "Potential Function Entry Point: 0x" << Twine::utohexstr(IData[0]) << "\n";
-      // Check for common function prologue patterns
-      if ((char)IData[0] == '\x55' && (char)IData[1] == '\x48' && (char)IData[2] == '\x89' && (char)IData[3] == '\xe5') {
-          BC->outs()  << "Potential Function Entry Point: 0x" << Twine::utohexstr(BF.getAddress()) << "\n";
-      } else {
-        BF.setIgnored();
-      }
+    // BC->outs()  << "Potential Function Entry Point: 0x" << Twine::utohexstr(IData[0]) << "\n";
+    // Check for common function prologue patterns
+    // push %rbp
+    // mov %rsp %rbp
+    if (BF.getSize()>=4 && (char)IData[0] == '\x55' && (char)IData[1] == '\x48' && (char)IData[2] == '\x89' && (char)IData[3] == '\xe5') {
+        LLVM_DEBUG(dbgs() << "Find function with prologue in: 0x" << Twine::utohexstr(BF.getAddress()) << "\n");
+    } else if (BF.getSize()>=4 && (char)IData[0] == '\xf3' && (char)IData[1] == '\x0f' && (char)IData[2] == '\x1e' && (char)IData[3] == '\xfa') {
+        LLVM_DEBUG(dbgs() <<  "Find function with prologue in: 0x" << Twine::utohexstr(BF.getAddress()) << "\n");
+    } else {
+      BF.setIgnored();
+    }
 
   }
 }

>From a846f4f3f4089c516a41db3b64d4cf750bd505ff Mon Sep 17 00:00:00 2001
From: "Wang, Qing1" <qing1.wang at intel.com>
Date: Wed, 10 Apr 2024 15:51:40 +0800
Subject: [PATCH 3/3] keep function which is direct called and has prologue

---
 bolt/include/bolt/Core/BinaryFunction.h     |  2 ++
 bolt/include/bolt/Rewrite/RewriteInstance.h |  2 ++
 bolt/lib/Core/BinaryFunction.cpp            |  8 ++++-
 bolt/lib/Rewrite/RewriteInstance.cpp        | 36 +++++++++++++++++++--
 4 files changed, 44 insertions(+), 4 deletions(-)

diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
index bc047fefa3151c..cd6ded17e42f63 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -208,6 +208,8 @@ class BinaryFunction {
   /// Mark injected functions
   bool IsInjected = false;
 
+  bool IsDirectCalled =false;
+
   using LSDATypeTableTy = SmallVector<uint64_t, 0>;
 
   /// List of DWARF CFI instructions. Original CFI from the binary must be
diff --git a/bolt/include/bolt/Rewrite/RewriteInstance.h b/bolt/include/bolt/Rewrite/RewriteInstance.h
index 677224624e462e..d5a3ebfb775173 100644
--- a/bolt/include/bolt/Rewrite/RewriteInstance.h
+++ b/bolt/include/bolt/Rewrite/RewriteInstance.h
@@ -99,6 +99,8 @@ class RewriteInstance {
 
   void keepPrologueFunction();
 
+  void keepDCPFunction();
+
   /// Check whether we should use DT_FINI or DT_FINI_ARRAY for instrumentation.
   /// DT_FINI is preferred; DT_FINI_ARRAY is only used when no DT_FINI entry was
   /// found.
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index 1fa96dfaabde81..2f7e6a8d3693bf 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -1268,7 +1268,13 @@ Error BinaryFunction::disassemble() {
         bool IsCall = MIB->isCall(Instruction);
         const bool IsCondBranch = MIB->isConditionalBranch(Instruction);
         MCSymbol *TargetSymbol = nullptr;
-
+        if(IsCall) {
+          if (BinaryFunction *TargetFunc = BC.getBinaryFunctionContainingAddress(TargetAddress)) {
+            // direct call here.
+            TargetFunc->IsDirectCalled = true;
+            LLVM_DEBUG(dbgs() << "[Disasm] Find function which is directly called: 0x" << Twine::utohexstr(TargetFunc->getAddress()) << "\n");
+          }
+        }
         if (BC.MIB->isUnsupportedBranch(Instruction)) {
           setIgnored();
           if (BinaryFunction *TargetFunc =
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index e4bed40db75839..a0768ba870f8a5 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -738,7 +738,7 @@ Error RewriteInstance::run() {
     return E;
   adjustCommandLineOptions();
   discoverFileObjects();
-  keepPrologueFunction();
+  // keepPrologueFunction();
 
   if (opts::Instrument && !BC->IsStaticExecutable)
     if (Error E = discoverRtFiniAddress())
@@ -766,6 +766,8 @@ Error RewriteInstance::run() {
 
   disassembleFunctions();
 
+  keepDCPFunction();
+
   processMetadataPreCFG();
 
   buildFunctionsCFG();
@@ -808,6 +810,34 @@ Error RewriteInstance::run() {
   return Error::success();
 }
 
+void RewriteInstance::keepDCPFunction() {
+ BC->outs() << "keepDCPFunction!"<<"\n";
+  for(auto &BFI : BC->getBinaryFunctions()) {
+    BinaryFunction &BF = BFI.second;
+    bool hasPrologue =false;
+    ErrorOr<ArrayRef<uint8_t>> ErrorOrFunctionData = BF.getData();
+    assert(ErrorOrFunctionData && "function data is not available");
+    ArrayRef<uint8_t> IData = *ErrorOrFunctionData;
+    // BC->outs()  << "Potential Function Entry Point: 0x" << Twine::utohexstr(IData[0]) << "\n";
+    // Check for common function prologue patterns
+    // push %rbp
+    // mov %rsp %rbp
+    if (BF.getSize()>=4 && (char)IData[0] == '\x55' && (char)IData[1] == '\x48' && (char)IData[2] == '\x89' && (char)IData[3] == '\xe5') {
+        LLVM_DEBUG(dbgs() << "[keepDCPFunction] Find function with prologue in: 0x" << Twine::utohexstr(BF.getAddress()) << "\n");
+        hasPrologue =true;
+    } else if (BF.getSize()>=4 && (char)IData[0] == '\xf3' && (char)IData[1] == '\x0f' && (char)IData[2] == '\x1e' && (char)IData[3] == '\xfa') {
+       LLVM_DEBUG(dbgs() <<  "[keepDCPFunction] Find function with prologue in: 0x" << Twine::utohexstr(BF.getAddress()) << "\n");
+        hasPrologue= true;
+    } else {
+      hasPrologue = false;
+    }
+   LLVM_DEBUG(
+    if(BF.IsDirectCalled) dbgs() << "[keepDCPFunction] Find function which is directly called: 0x" << Twine::utohexstr(BF.getAddress()) << "\n"
+    );
+    if(!BF.IsDirectCalled && !hasPrologue )
+      BF.setIgnored();
+  }
+}
 void RewriteInstance::keepPrologueFunction() {
  BC->outs() << "keepPrologueFunction!"<<"\n";
   for(auto &BFI : BC->getBinaryFunctions()) {
@@ -820,9 +850,9 @@ void RewriteInstance::keepPrologueFunction() {
     // push %rbp
     // mov %rsp %rbp
     if (BF.getSize()>=4 && (char)IData[0] == '\x55' && (char)IData[1] == '\x48' && (char)IData[2] == '\x89' && (char)IData[3] == '\xe5') {
-        LLVM_DEBUG(dbgs() << "Find function with prologue in: 0x" << Twine::utohexstr(BF.getAddress()) << "\n");
+        LLVM_DEBUG(dbgs() << "[keepPrologueFunction] Find function with prologue in: 0x" << Twine::utohexstr(BF.getAddress()) << "\n");
     } else if (BF.getSize()>=4 && (char)IData[0] == '\xf3' && (char)IData[1] == '\x0f' && (char)IData[2] == '\x1e' && (char)IData[3] == '\xfa') {
-        LLVM_DEBUG(dbgs() <<  "Find function with prologue in: 0x" << Twine::utohexstr(BF.getAddress()) << "\n");
+        LLVM_DEBUG(dbgs() <<  "[keepPrologueFunction] Find function with prologue in: 0x" << Twine::utohexstr(BF.getAddress()) << "\n");
     } else {
       BF.setIgnored();
     }



More information about the llvm-commits mailing list