[llvm] WIP: [Bolt] Add support for DT_INIT_ARRAY (PR #152491)

Ádám Kallai via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 7 06:03:23 PDT 2025


https://github.com/kaadam created https://github.com/llvm/llvm-project/pull/152491

Currently Bolt relies on ELF 'e_entry' field or DT_INIT to determine entry point of an ELF file for the instrumentation. There is a case when an ELF file only has DT_INIT_ARRAY/DT_FINI_ARRAY sections, and the ELF 'e_entry' holds zero, ie. if it is a shared object.

>From db2061e6f909926546e8c603afdb856dfd210f66 Mon Sep 17 00:00:00 2001
From: Adam Kallai <kadam at inf.u-szeged.hu>
Date: Tue, 6 May 2025 14:58:47 +0200
Subject: [PATCH] WIP: [Bolt] Add support for DT_INIT_ARRAY

Currently Bolt relies on ELF 'e_entry' field or DT_INIT to determine
entry point of an ELF file for the instrumentation.
There is a case when an ELF file only has DT_INIT_ARRAY/DT_FINI_ARRAY sections,
and the ELF 'e_entry' holds zero, ie. if it is a shared object.
---
 bolt/include/bolt/Core/BinaryContext.h      |  9 ++
 bolt/include/bolt/Rewrite/RewriteInstance.h |  9 ++
 bolt/lib/Rewrite/RewriteInstance.cpp        | 93 +++++++++++++++++++--
 3 files changed, 105 insertions(+), 6 deletions(-)

diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h
index 91ecf89da618c..f3aa06ced79af 100644
--- a/bolt/include/bolt/Core/BinaryContext.h
+++ b/bolt/include/bolt/Core/BinaryContext.h
@@ -800,6 +800,15 @@ class BinaryContext {
   /// the execution of the binary is completed.
   std::optional<uint64_t> FiniFunctionAddress;
 
+  /// DT_INIT.
+  std::optional<uint64_t> InitAddress;
+
+  /// DT_INIT_ARRAY. Only used when DT_INIT is not set.
+  std::optional<uint64_t> InitArrayAddress;
+
+  /// DT_INIT_ARRAYSZ. Only used when DT_INIT is not set.
+  std::optional<uint64_t> InitArraySize;
+
   /// DT_FINI.
   std::optional<uint64_t> FiniAddress;
 
diff --git a/bolt/include/bolt/Rewrite/RewriteInstance.h b/bolt/include/bolt/Rewrite/RewriteInstance.h
index 91d62a78de390..4c7920519a875 100644
--- a/bolt/include/bolt/Rewrite/RewriteInstance.h
+++ b/bolt/include/bolt/Rewrite/RewriteInstance.h
@@ -93,11 +93,20 @@ class RewriteInstance {
   /// section allocations if found.
   void discoverBOLTReserved();
 
+  /// Check whether we should use DT_INIT or DT_INIT_ARRAY for instrumentation.
+  /// DT_INIT is preferred; DT_INIT_ARRAY is only used when no DT_INIT entry was
+  /// found.
+  Error discoverRtInitAddress();
+
   /// Check whether we should use DT_FINI or DT_FINI_ARRAY for instrumentation.
   /// DT_FINI is preferred; DT_FINI_ARRAY is only used when no DT_FINI entry was
   /// found.
   Error discoverRtFiniAddress();
 
+  /// If DT_INIT_ARRAY is used for instrumentation, update the relocation of its
+  /// first entry to point to the instrumentation library's init address.
+  void updateRtInitReloc();
+
   /// If DT_FINI_ARRAY is used for instrumentation, update the relocation of its
   /// first entry to point to the instrumentation library's fini address.
   void updateRtFiniReloc();
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index fe4a23cc01382..7b1860d191c8d 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -528,7 +528,7 @@ Error RewriteInstance::discoverStorage() {
   auto ELF64LEFile = cast<ELF64LEObjectFile>(InputFile);
   const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile();
 
-  BC->StartFunctionAddress = Obj.getHeader().e_entry;
+  BC->InitAddress = Obj.getHeader().e_entry;
 
   NextAvailableAddress = 0;
   uint64_t NextAvailableOffset = 0;
@@ -705,12 +705,15 @@ Error RewriteInstance::run() {
     return E;
   if (Error E = readSpecialSections())
     return E;
-  adjustCommandLineOptions();
   discoverFileObjects();
 
-  if (opts::Instrument && !BC->IsStaticExecutable)
+  if (opts::Instrument && !BC->IsStaticExecutable) {
+    if (Error E = discoverRtInitAddress())
+      return E;
     if (Error E = discoverRtFiniAddress())
       return E;
+  }
+  adjustCommandLineOptions();
 
   preprocessProfileData();
 
@@ -752,8 +755,10 @@ Error RewriteInstance::run() {
 
   updateMetadata();
 
-  if (opts::Instrument && !BC->IsStaticExecutable)
+  if (opts::Instrument && !BC->IsStaticExecutable) {
+    updateRtInitReloc();
     updateRtFiniReloc();
+  }
 
   if (opts::OutputFilename == "/dev/null") {
     BC->outs() << "BOLT-INFO: skipping writing final binary to disk\n";
@@ -1381,6 +1386,42 @@ void RewriteInstance::discoverBOLTReserved() {
   NextAvailableAddress = BC->BOLTReserved.start();
 }
 
+Error RewriteInstance::discoverRtInitAddress() {
+  if (BC->InitAddress.has_value() && BC->InitAddress.value() != 0) {
+    BC->StartFunctionAddress = BC->InitAddress;
+    return Error::success();
+  }
+
+  if (!BC->InitArrayAddress || !BC->InitArraySize) {
+    return createStringError(
+        std::errc::not_supported,
+        "Instrumentation needs either DT_INIT or DT_INIT_ARRAY");
+  }
+
+  if (*BC->InitArraySize < BC->AsmInfo->getCodePointerSize()) {
+    return createStringError(std::errc::not_supported,
+                             "Need at least 1 DT_INIT_ARRAY slot");
+  }
+
+  ErrorOr<BinarySection &> InitArraySection =
+      BC->getSectionForAddress(*BC->InitArrayAddress);
+  if (auto EC = InitArraySection.getError())
+    return errorCodeToError(EC);
+
+  if (const Relocation *Reloc = InitArraySection->getDynamicRelocationAt(0)) {
+    BC->StartFunctionAddress = Reloc->Addend;
+    return Error::success();
+  }
+
+  if (const Relocation *Reloc = InitArraySection->getRelocationAt(0)) {
+    BC->StartFunctionAddress = Reloc->Value;
+    return Error::success();
+  }
+
+  return createStringError(std::errc::not_supported,
+                           "No relocation for first DT_INIT_ARRAY slot");
+}
+
 Error RewriteInstance::discoverRtFiniAddress() {
   // Use DT_FINI if it's available.
   if (BC->FiniAddress) {
@@ -1452,6 +1493,40 @@ void RewriteInstance::updateRtFiniReloc() {
       /*Addend*/ RT->getRuntimeFiniAddress(), /*Value*/ 0});
 }
 
+void RewriteInstance::updateRtInitReloc() {
+  // Updating DT_INIT is handled by patchELFDynamic.
+  if (BC->InitAddress.has_value() && BC->InitAddress.value() != 0)
+    return;
+
+  const RuntimeLibrary *RT = BC->getRuntimeLibrary();
+  if (!RT || !RT->getRuntimeStartAddress())
+    return;
+
+  assert(BC->InitArrayAddress && BC->InitArraySize &&
+         "inconsistent .init_array state");
+
+  ErrorOr<BinarySection &> InitArraySection =
+      BC->getSectionForAddress(*BC->InitArrayAddress);
+  assert(InitArraySection && ".init_array removed");
+
+  if (std::optional<Relocation> Reloc =
+          InitArraySection->takeDynamicRelocationAt(0)) {
+    assert(Reloc->Addend == BC->StartFunctionAddress &&
+           "inconsistent .init_array dynamic relocation");
+    Reloc->Addend = RT->getRuntimeStartAddress();
+    InitArraySection->addDynamicRelocation(*Reloc);
+  }
+
+  // Update the static relocation by adding a pending relocation which will get
+  // patched when flushPendingRelocations is called in rewriteFile. Note that
+  // flushPendingRelocations will calculate the value to patch as
+  // "Symbol + Addend". Since we don't have a symbol, just set the addend to the
+  // desired value.
+  InitArraySection->addPendingRelocation(Relocation{
+      /*Offset*/ 0, /*Symbol*/ nullptr, /*Type*/ Relocation::getAbs64(),
+      /*Addend*/ RT->getRuntimeStartAddress(), /*Value*/ 0});
+}
+
 void RewriteInstance::registerFragments() {
   if (!BC->HasSplitFunctions ||
       opts::HeatmapMode == opts::HeatmapModeKind::HM_Exclusive)
@@ -5705,10 +5780,16 @@ Error RewriteInstance::readELFDynamic(ELFObjectFile<ELFT> *File) {
     switch (Dyn.d_tag) {
     case ELF::DT_INIT:
       if (!BC->HasInterpHeader) {
-        LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Set start function address\n");
-        BC->StartFunctionAddress = Dyn.getPtr();
+        LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Set entry point address\n");
+        BC->InitAddress = Dyn.getPtr();
       }
       break;
+    case ELF::DT_INIT_ARRAY:
+      BC->InitArrayAddress = Dyn.getPtr();
+      break;
+    case ELF::DT_INIT_ARRAYSZ:
+      BC->InitArraySize = Dyn.getPtr();
+      break;
     case ELF::DT_FINI:
       BC->FiniAddress = Dyn.getPtr();
       break;



More information about the llvm-commits mailing list