[llvm] WIP: [Bolt] Add support for DT_INIT_ARRAY (PR #152491)
Ádám Kallai via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 7 06:03:23 PDT 2025
https://github.com/kaadam created https://github.com/llvm/llvm-project/pull/152491
Currently Bolt relies on ELF 'e_entry' field or DT_INIT to determine entry point of an ELF file for the instrumentation. There is a case when an ELF file only has DT_INIT_ARRAY/DT_FINI_ARRAY sections, and the ELF 'e_entry' holds zero, ie. if it is a shared object.
>From db2061e6f909926546e8c603afdb856dfd210f66 Mon Sep 17 00:00:00 2001
From: Adam Kallai <kadam at inf.u-szeged.hu>
Date: Tue, 6 May 2025 14:58:47 +0200
Subject: [PATCH] WIP: [Bolt] Add support for DT_INIT_ARRAY
Currently Bolt relies on ELF 'e_entry' field or DT_INIT to determine
entry point of an ELF file for the instrumentation.
There is a case when an ELF file only has DT_INIT_ARRAY/DT_FINI_ARRAY sections,
and the ELF 'e_entry' holds zero, ie. if it is a shared object.
---
bolt/include/bolt/Core/BinaryContext.h | 9 ++
bolt/include/bolt/Rewrite/RewriteInstance.h | 9 ++
bolt/lib/Rewrite/RewriteInstance.cpp | 93 +++++++++++++++++++--
3 files changed, 105 insertions(+), 6 deletions(-)
diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h
index 91ecf89da618c..f3aa06ced79af 100644
--- a/bolt/include/bolt/Core/BinaryContext.h
+++ b/bolt/include/bolt/Core/BinaryContext.h
@@ -800,6 +800,15 @@ class BinaryContext {
/// the execution of the binary is completed.
std::optional<uint64_t> FiniFunctionAddress;
+ /// DT_INIT.
+ std::optional<uint64_t> InitAddress;
+
+ /// DT_INIT_ARRAY. Only used when DT_INIT is not set.
+ std::optional<uint64_t> InitArrayAddress;
+
+ /// DT_INIT_ARRAYSZ. Only used when DT_INIT is not set.
+ std::optional<uint64_t> InitArraySize;
+
/// DT_FINI.
std::optional<uint64_t> FiniAddress;
diff --git a/bolt/include/bolt/Rewrite/RewriteInstance.h b/bolt/include/bolt/Rewrite/RewriteInstance.h
index 91d62a78de390..4c7920519a875 100644
--- a/bolt/include/bolt/Rewrite/RewriteInstance.h
+++ b/bolt/include/bolt/Rewrite/RewriteInstance.h
@@ -93,11 +93,20 @@ class RewriteInstance {
/// section allocations if found.
void discoverBOLTReserved();
+ /// Check whether we should use DT_INIT or DT_INIT_ARRAY for instrumentation.
+ /// DT_INIT is preferred; DT_INIT_ARRAY is only used when no DT_INIT entry was
+ /// found.
+ Error discoverRtInitAddress();
+
/// Check whether we should use DT_FINI or DT_FINI_ARRAY for instrumentation.
/// DT_FINI is preferred; DT_FINI_ARRAY is only used when no DT_FINI entry was
/// found.
Error discoverRtFiniAddress();
+ /// If DT_INIT_ARRAY is used for instrumentation, update the relocation of its
+ /// first entry to point to the instrumentation library's init address.
+ void updateRtInitReloc();
+
/// If DT_FINI_ARRAY is used for instrumentation, update the relocation of its
/// first entry to point to the instrumentation library's fini address.
void updateRtFiniReloc();
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index fe4a23cc01382..7b1860d191c8d 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -528,7 +528,7 @@ Error RewriteInstance::discoverStorage() {
auto ELF64LEFile = cast<ELF64LEObjectFile>(InputFile);
const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile();
- BC->StartFunctionAddress = Obj.getHeader().e_entry;
+ BC->InitAddress = Obj.getHeader().e_entry;
NextAvailableAddress = 0;
uint64_t NextAvailableOffset = 0;
@@ -705,12 +705,15 @@ Error RewriteInstance::run() {
return E;
if (Error E = readSpecialSections())
return E;
- adjustCommandLineOptions();
discoverFileObjects();
- if (opts::Instrument && !BC->IsStaticExecutable)
+ if (opts::Instrument && !BC->IsStaticExecutable) {
+ if (Error E = discoverRtInitAddress())
+ return E;
if (Error E = discoverRtFiniAddress())
return E;
+ }
+ adjustCommandLineOptions();
preprocessProfileData();
@@ -752,8 +755,10 @@ Error RewriteInstance::run() {
updateMetadata();
- if (opts::Instrument && !BC->IsStaticExecutable)
+ if (opts::Instrument && !BC->IsStaticExecutable) {
+ updateRtInitReloc();
updateRtFiniReloc();
+ }
if (opts::OutputFilename == "/dev/null") {
BC->outs() << "BOLT-INFO: skipping writing final binary to disk\n";
@@ -1381,6 +1386,42 @@ void RewriteInstance::discoverBOLTReserved() {
NextAvailableAddress = BC->BOLTReserved.start();
}
+Error RewriteInstance::discoverRtInitAddress() {
+ if (BC->InitAddress.has_value() && BC->InitAddress.value() != 0) {
+ BC->StartFunctionAddress = BC->InitAddress;
+ return Error::success();
+ }
+
+ if (!BC->InitArrayAddress || !BC->InitArraySize) {
+ return createStringError(
+ std::errc::not_supported,
+ "Instrumentation needs either DT_INIT or DT_INIT_ARRAY");
+ }
+
+ if (*BC->InitArraySize < BC->AsmInfo->getCodePointerSize()) {
+ return createStringError(std::errc::not_supported,
+ "Need at least 1 DT_INIT_ARRAY slot");
+ }
+
+ ErrorOr<BinarySection &> InitArraySection =
+ BC->getSectionForAddress(*BC->InitArrayAddress);
+ if (auto EC = InitArraySection.getError())
+ return errorCodeToError(EC);
+
+ if (const Relocation *Reloc = InitArraySection->getDynamicRelocationAt(0)) {
+ BC->StartFunctionAddress = Reloc->Addend;
+ return Error::success();
+ }
+
+ if (const Relocation *Reloc = InitArraySection->getRelocationAt(0)) {
+ BC->StartFunctionAddress = Reloc->Value;
+ return Error::success();
+ }
+
+ return createStringError(std::errc::not_supported,
+ "No relocation for first DT_INIT_ARRAY slot");
+}
+
Error RewriteInstance::discoverRtFiniAddress() {
// Use DT_FINI if it's available.
if (BC->FiniAddress) {
@@ -1452,6 +1493,40 @@ void RewriteInstance::updateRtFiniReloc() {
/*Addend*/ RT->getRuntimeFiniAddress(), /*Value*/ 0});
}
+void RewriteInstance::updateRtInitReloc() {
+ // Updating DT_INIT is handled by patchELFDynamic.
+ if (BC->InitAddress.has_value() && BC->InitAddress.value() != 0)
+ return;
+
+ const RuntimeLibrary *RT = BC->getRuntimeLibrary();
+ if (!RT || !RT->getRuntimeStartAddress())
+ return;
+
+ assert(BC->InitArrayAddress && BC->InitArraySize &&
+ "inconsistent .init_array state");
+
+ ErrorOr<BinarySection &> InitArraySection =
+ BC->getSectionForAddress(*BC->InitArrayAddress);
+ assert(InitArraySection && ".init_array removed");
+
+ if (std::optional<Relocation> Reloc =
+ InitArraySection->takeDynamicRelocationAt(0)) {
+ assert(Reloc->Addend == BC->StartFunctionAddress &&
+ "inconsistent .init_array dynamic relocation");
+ Reloc->Addend = RT->getRuntimeStartAddress();
+ InitArraySection->addDynamicRelocation(*Reloc);
+ }
+
+ // Update the static relocation by adding a pending relocation which will get
+ // patched when flushPendingRelocations is called in rewriteFile. Note that
+ // flushPendingRelocations will calculate the value to patch as
+ // "Symbol + Addend". Since we don't have a symbol, just set the addend to the
+ // desired value.
+ InitArraySection->addPendingRelocation(Relocation{
+ /*Offset*/ 0, /*Symbol*/ nullptr, /*Type*/ Relocation::getAbs64(),
+ /*Addend*/ RT->getRuntimeStartAddress(), /*Value*/ 0});
+}
+
void RewriteInstance::registerFragments() {
if (!BC->HasSplitFunctions ||
opts::HeatmapMode == opts::HeatmapModeKind::HM_Exclusive)
@@ -5705,10 +5780,16 @@ Error RewriteInstance::readELFDynamic(ELFObjectFile<ELFT> *File) {
switch (Dyn.d_tag) {
case ELF::DT_INIT:
if (!BC->HasInterpHeader) {
- LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Set start function address\n");
- BC->StartFunctionAddress = Dyn.getPtr();
+ LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Set entry point address\n");
+ BC->InitAddress = Dyn.getPtr();
}
break;
+ case ELF::DT_INIT_ARRAY:
+ BC->InitArrayAddress = Dyn.getPtr();
+ break;
+ case ELF::DT_INIT_ARRAYSZ:
+ BC->InitArraySize = Dyn.getPtr();
+ break;
case ELF::DT_FINI:
BC->FiniAddress = Dyn.getPtr();
break;
More information about the llvm-commits
mailing list