[llvm] [BOLT] Use rewriter interface for updating binary build ID (PR #94273)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 3 12:29:50 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-bolt
Author: Maksim Panchenko (maksfb)
<details>
<summary>Changes</summary>
Move functionality for patching build ID into a separate rewriter class and change the way we do the patching. Support build ID in different note sections in order to update the build ID in the Linux kernel binary which puts in into ".notes" section instead of ".note.gnu.build-id".
---
Full diff: https://github.com/llvm/llvm-project/pull/94273.diff
9 Files Affected:
- (modified) bolt/include/bolt/Core/BinarySection.h (+1)
- (modified) bolt/include/bolt/Rewrite/MetadataManager.h (+3)
- (modified) bolt/include/bolt/Rewrite/MetadataRewriter.h (+4)
- (modified) bolt/include/bolt/Rewrite/MetadataRewriters.h (+2)
- (modified) bolt/include/bolt/Rewrite/RewriteInstance.h (+3-20)
- (added) bolt/lib/Rewrite/BuildIDRewriter.cpp (+113)
- (modified) bolt/lib/Rewrite/CMakeLists.txt (+1)
- (modified) bolt/lib/Rewrite/MetadataManager.cpp (+12)
- (modified) bolt/lib/Rewrite/RewriteInstance.cpp (+8-84)
``````````diff
diff --git a/bolt/include/bolt/Core/BinarySection.h b/bolt/include/bolt/Core/BinarySection.h
index 5b7a5b08820e6..d362961176b32 100644
--- a/bolt/include/bolt/Core/BinarySection.h
+++ b/bolt/include/bolt/Core/BinarySection.h
@@ -284,6 +284,7 @@ class BinarySection {
return true;
}
}
+ bool isNote() const { return isELF() && ELFType == ELF::SHT_NOTE; }
bool isReordered() const { return IsReordered; }
bool isAnonymous() const { return IsAnonymous; }
bool isRelro() const { return IsRelro; }
diff --git a/bolt/include/bolt/Rewrite/MetadataManager.h b/bolt/include/bolt/Rewrite/MetadataManager.h
index 2ff70dbaab3de..6001b70f625e2 100644
--- a/bolt/include/bolt/Rewrite/MetadataManager.h
+++ b/bolt/include/bolt/Rewrite/MetadataManager.h
@@ -28,6 +28,9 @@ class MetadataManager {
/// Register a new \p Rewriter.
void registerRewriter(std::unique_ptr<MetadataRewriter> Rewriter);
+ /// Run initializers after sections are discovered.
+ void runSectionInitializers();
+
/// Execute initialization of rewriters while functions are disassembled, but
/// CFG is not yet built.
void runInitializersPreCFG();
diff --git a/bolt/include/bolt/Rewrite/MetadataRewriter.h b/bolt/include/bolt/Rewrite/MetadataRewriter.h
index 1e7e0381c1e98..6ff8f0af7a8e6 100644
--- a/bolt/include/bolt/Rewrite/MetadataRewriter.h
+++ b/bolt/include/bolt/Rewrite/MetadataRewriter.h
@@ -45,6 +45,10 @@ class MetadataRewriter {
/// Return name for the rewriter.
StringRef getName() const { return Name; }
+ /// Run initialization after the binary is read and sections are identified,
+ /// but before functions are discovered.
+ virtual Error sectionInitializer() { return Error::success(); }
+
/// Interface for modifying/annotating functions in the binary based on the
/// contents of the section. Functions are in pre-cfg state.
virtual Error preCFGInitializer() { return Error::success(); }
diff --git a/bolt/include/bolt/Rewrite/MetadataRewriters.h b/bolt/include/bolt/Rewrite/MetadataRewriters.h
index 8523231886503..b71bd6cad2505 100644
--- a/bolt/include/bolt/Rewrite/MetadataRewriters.h
+++ b/bolt/include/bolt/Rewrite/MetadataRewriters.h
@@ -21,6 +21,8 @@ class BinaryContext;
std::unique_ptr<MetadataRewriter> createLinuxKernelRewriter(BinaryContext &);
+std::unique_ptr<MetadataRewriter> createBuildIDRewriter(BinaryContext &);
+
std::unique_ptr<MetadataRewriter> createPseudoProbeRewriter(BinaryContext &);
std::unique_ptr<MetadataRewriter> createSDTRewriter(BinaryContext &);
diff --git a/bolt/include/bolt/Rewrite/RewriteInstance.h b/bolt/include/bolt/Rewrite/RewriteInstance.h
index a55516d553979..af1d9b4b70a3d 100644
--- a/bolt/include/bolt/Rewrite/RewriteInstance.h
+++ b/bolt/include/bolt/Rewrite/RewriteInstance.h
@@ -79,15 +79,6 @@ class RewriteInstance {
return InputFile->getFileName();
}
- /// Set the build-id string if we did not fail to parse the contents of the
- /// ELF note section containing build-id information.
- void parseBuildID();
-
- /// The build-id is typically a stream of 20 bytes. Return these bytes in
- /// printable hexadecimal form if they are available, or std::nullopt
- /// otherwise.
- std::optional<std::string> getPrintableBuildID() const;
-
/// If this instance uses a profile, return appropriate profile reader.
const ProfileReaderBase *getProfileReader() const {
return ProfileReader.get();
@@ -184,6 +175,9 @@ class RewriteInstance {
/// Link additional runtime code to support instrumentation.
void linkRuntime();
+ /// Process metadata in sections before functions are discovered.
+ void processSectionMetadata();
+
/// Process metadata in special sections before CFG is built for functions.
void processMetadataPreCFG();
@@ -368,11 +362,6 @@ class RewriteInstance {
/// Loop over now emitted functions to write translation maps
void encodeBATSection();
- /// Update the ELF note section containing the binary build-id to reflect
- /// a new build-id, so tools can differentiate between the old and the
- /// rewritten binary.
- void patchBuildID();
-
/// Return file offset corresponding to a virtual \p Address.
/// Return 0 if the address has no mapping in the file, including being
/// part of .bss section.
@@ -562,18 +551,12 @@ class RewriteInstance {
/// Exception handling and stack unwinding information in this binary.
ErrorOr<BinarySection &> EHFrameSection{std::errc::bad_address};
- /// .note.gnu.build-id section.
- ErrorOr<BinarySection &> BuildIDSection{std::errc::bad_address};
-
/// Helper for accessing sections by name.
BinarySection *getSection(const Twine &Name) {
ErrorOr<BinarySection &> ErrOrSection = BC->getUniqueSectionByName(Name);
return ErrOrSection ? &ErrOrSection.get() : nullptr;
}
- /// A reference to the build-id bytes in the original binary
- StringRef BuildID;
-
/// Keep track of functions we fail to write in the binary. We need to avoid
/// rewriting CFI info for these functions.
std::vector<uint64_t> FailedAddresses;
diff --git a/bolt/lib/Rewrite/BuildIDRewriter.cpp b/bolt/lib/Rewrite/BuildIDRewriter.cpp
new file mode 100644
index 0000000000000..83d0c9bfe182a
--- /dev/null
+++ b/bolt/lib/Rewrite/BuildIDRewriter.cpp
@@ -0,0 +1,113 @@
+//===- bolt/Rewrite/BuildIDRewriter.cpp -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Read and update build ID stored in ELF note section.
+//
+//===----------------------------------------------------------------------===//
+
+#include "bolt/Rewrite/MetadataRewriter.h"
+#include "bolt/Rewrite/MetadataRewriters.h"
+#include "llvm/Support/Errc.h"
+
+using namespace llvm;
+using namespace bolt;
+
+namespace {
+
+/// The build-id is typically a stream of 20 bytes. Return these bytes in
+/// printable hexadecimal form.
+std::string getPrintableBuildID(StringRef BuildID) {
+ std::string Str;
+ raw_string_ostream OS(Str);
+ for (const char &Char : BuildID)
+ OS << format("%.2x", static_cast<unsigned char>(Char));
+
+ return OS.str();
+}
+
+class BuildIDRewriter final : public MetadataRewriter {
+
+ /// Information about binary build ID.
+ ErrorOr<BinarySection &> BuildIDSection{std::errc::bad_address};
+ StringRef BuildID;
+ std::optional<uint64_t> BuildIDOffset;
+ std::optional<uint64_t> BuildIDSize;
+
+public:
+ BuildIDRewriter(StringRef Name, BinaryContext &BC)
+ : MetadataRewriter(Name, BC) {}
+
+ Error sectionInitializer() override;
+
+ Error postEmitFinalizer() override;
+};
+
+Error BuildIDRewriter::sectionInitializer() {
+ // Typically, build ID will reside in .note.gnu.build-id section. Howerver,
+ // a linker script can change the section name and such is the case with
+ // the Linux kernel. Hence, we iterate over all note sections.
+ for (BinarySection &NoteSection : BC.sections()) {
+ if (!NoteSection.isNote())
+ continue;
+
+ StringRef Buf = NoteSection.getContents();
+ DataExtractor DE = DataExtractor(Buf, BC.AsmInfo->isLittleEndian(),
+ BC.AsmInfo->getCodePointerSize());
+ DataExtractor::Cursor Cursor(0);
+ while (Cursor && !DE.eof(Cursor)) {
+ const uint32_t NameSz = DE.getU32(Cursor);
+ const uint32_t DescSz = DE.getU32(Cursor);
+ const uint32_t Type = DE.getU32(Cursor);
+
+ StringRef Name =
+ NameSz ? Buf.slice(Cursor.tell(), Cursor.tell() + NameSz) : "<empty>";
+ Cursor.seek(alignTo(Cursor.tell() + NameSz, 4));
+
+ const uint64_t DescOffset = Cursor.tell();
+ StringRef Desc =
+ DescSz ? Buf.slice(DescOffset, DescOffset + DescSz) : "<empty>";
+ Cursor.seek(alignTo(DescOffset + DescSz, 4));
+
+ if (!Cursor)
+ return createStringError(errc::executable_format_error,
+ "out of bounds while reading note section: %s",
+ toString(Cursor.takeError()).c_str());
+
+ if (Type == ELF::NT_GNU_BUILD_ID && Name.substr(0, 3) == "GNU" &&
+ DescSz) {
+ BuildIDSection = NoteSection;
+ BuildID = Desc;
+ BC.setFileBuildID(getPrintableBuildID(Desc));
+ BuildIDOffset = DescOffset;
+ BuildIDSize = DescSz;
+
+ return Error::success();
+ }
+ }
+ }
+
+ return Error::success();
+}
+
+Error BuildIDRewriter::postEmitFinalizer() {
+ if (!BuildIDSection || !BuildIDOffset)
+ return Error::success();
+
+ const uint8_t LastByte = BuildID[BuildID.size() - 1];
+ SmallVector<char, 1> Patch = {static_cast<char>(LastByte ^ 1)};
+ BuildIDSection->addPatch(*BuildIDOffset + BuildID.size() - 1, Patch);
+ BC.outs() << "BOLT-INFO: patched build-id (flipped last bit)\n";
+
+ return Error::success();
+}
+} // namespace
+
+std::unique_ptr<MetadataRewriter>
+llvm::bolt::createBuildIDRewriter(BinaryContext &BC) {
+ return std::make_unique<BuildIDRewriter>("build-id-rewriter", BC);
+}
diff --git a/bolt/lib/Rewrite/CMakeLists.txt b/bolt/lib/Rewrite/CMakeLists.txt
index 578f1763bfe4e..34993af2623bf 100644
--- a/bolt/lib/Rewrite/CMakeLists.txt
+++ b/bolt/lib/Rewrite/CMakeLists.txt
@@ -21,6 +21,7 @@ add_llvm_library(LLVMBOLTRewrite
LinuxKernelRewriter.cpp
MachORewriteInstance.cpp
MetadataManager.cpp
+ BuildIDRewriter.cpp
PseudoProbeRewriter.cpp
RewriteInstance.cpp
SDTRewriter.cpp
diff --git a/bolt/lib/Rewrite/MetadataManager.cpp b/bolt/lib/Rewrite/MetadataManager.cpp
index 4ce44820d9eca..713d2e47b6efa 100644
--- a/bolt/lib/Rewrite/MetadataManager.cpp
+++ b/bolt/lib/Rewrite/MetadataManager.cpp
@@ -20,6 +20,18 @@ void MetadataManager::registerRewriter(
Rewriters.emplace_back(std::move(Rewriter));
}
+void MetadataManager::runSectionInitializers() {
+ for (auto &Rewriter : Rewriters) {
+ LLVM_DEBUG(dbgs() << "BOLT-DEBUG: invoking " << Rewriter->getName()
+ << " after reading sections\n");
+ if (Error E = Rewriter->sectionInitializer()) {
+ errs() << "BOLT-ERROR: while running " << Rewriter->getName()
+ << " after reading sections: " << toString(std::move(E)) << '\n';
+ exit(1);
+ }
+ }
+}
+
void MetadataManager::runInitializersPreCFG() {
for (auto &Rewriter : Rewriters) {
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: invoking " << Rewriter->getName()
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index e452e956c949e..1a3a8af21d81b 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -643,82 +643,6 @@ Error RewriteInstance::discoverStorage() {
return Error::success();
}
-void RewriteInstance::parseBuildID() {
- if (!BuildIDSection)
- return;
-
- StringRef Buf = BuildIDSection->getContents();
-
- // Reading notes section (see Portable Formats Specification, Version 1.1,
- // pg 2-5, section "Note Section").
- DataExtractor DE =
- DataExtractor(Buf,
- /*IsLittleEndian=*/true, InputFile->getBytesInAddress());
- uint64_t Offset = 0;
- if (!DE.isValidOffset(Offset))
- return;
- uint32_t NameSz = DE.getU32(&Offset);
- if (!DE.isValidOffset(Offset))
- return;
- uint32_t DescSz = DE.getU32(&Offset);
- if (!DE.isValidOffset(Offset))
- return;
- uint32_t Type = DE.getU32(&Offset);
-
- LLVM_DEBUG(dbgs() << "NameSz = " << NameSz << "; DescSz = " << DescSz
- << "; Type = " << Type << "\n");
-
- // Type 3 is a GNU build-id note section
- if (Type != 3)
- return;
-
- StringRef Name = Buf.slice(Offset, Offset + NameSz);
- Offset = alignTo(Offset + NameSz, 4);
- if (Name.substr(0, 3) != "GNU")
- return;
-
- BuildID = Buf.slice(Offset, Offset + DescSz);
-}
-
-std::optional<std::string> RewriteInstance::getPrintableBuildID() const {
- if (BuildID.empty())
- return std::nullopt;
-
- std::string Str;
- raw_string_ostream OS(Str);
- const unsigned char *CharIter = BuildID.bytes_begin();
- while (CharIter != BuildID.bytes_end()) {
- if (*CharIter < 0x10)
- OS << "0";
- OS << Twine::utohexstr(*CharIter);
- ++CharIter;
- }
- return OS.str();
-}
-
-void RewriteInstance::patchBuildID() {
- raw_fd_ostream &OS = Out->os();
-
- if (BuildID.empty())
- return;
-
- size_t IDOffset = BuildIDSection->getContents().rfind(BuildID);
- assert(IDOffset != StringRef::npos && "failed to patch build-id");
-
- uint64_t FileOffset = getFileOffsetForAddress(BuildIDSection->getAddress());
- if (!FileOffset) {
- BC->errs()
- << "BOLT-WARNING: Non-allocatable build-id will not be updated.\n";
- return;
- }
-
- char LastIDByte = BuildID[BuildID.size() - 1];
- LastIDByte ^= 1;
- OS.pwrite(&LastIDByte, 1, FileOffset + IDOffset + BuildID.size() - 1);
-
- BC->outs() << "BOLT-INFO: patched build-id (flipped last bit)\n";
-}
-
Error RewriteInstance::run() {
assert(BC && "failed to create a binary context");
@@ -1977,7 +1901,6 @@ Error RewriteInstance::readSpecialSections() {
".rela" + std::string(BC->getMainCodeSectionName()));
HasSymbolTable = (bool)BC->getUniqueSectionByName(".symtab");
EHFrameSection = BC->getUniqueSectionByName(".eh_frame");
- BuildIDSection = BC->getUniqueSectionByName(".note.gnu.build-id");
if (ErrorOr<BinarySection &> BATSec =
BC->getUniqueSectionByName(BoltAddressTranslation::SECTION_NAME)) {
@@ -2035,10 +1958,7 @@ Error RewriteInstance::readSpecialSections() {
report_error("expected valid eh_frame section", EHFrameOrError.takeError());
CFIRdWrt.reset(new CFIReaderWriter(*BC, *EHFrameOrError.get()));
- // Parse build-id
- parseBuildID();
- if (std::optional<std::string> FileBuildID = getPrintableBuildID())
- BC->setFileBuildID(*FileBuildID);
+ processSectionMetadata();
// Read .dynamic/PT_DYNAMIC.
return readELFDynamic();
@@ -3218,14 +3138,20 @@ void RewriteInstance::initializeMetadataManager() {
if (BC->IsLinuxKernel)
MetadataManager.registerRewriter(createLinuxKernelRewriter(*BC));
+ MetadataManager.registerRewriter(createBuildIDRewriter(*BC));
+
MetadataManager.registerRewriter(createPseudoProbeRewriter(*BC));
MetadataManager.registerRewriter(createSDTRewriter(*BC));
}
-void RewriteInstance::processMetadataPreCFG() {
+void RewriteInstance::processSectionMetadata() {
initializeMetadataManager();
+ MetadataManager.runSectionInitializers();
+}
+
+void RewriteInstance::processMetadataPreCFG() {
MetadataManager.runInitializersPreCFG();
processProfileDataPreCFG();
@@ -5772,8 +5698,6 @@ void RewriteInstance::rewriteFile() {
// Update symbol tables.
patchELFSymTabs();
- patchBuildID();
-
if (opts::EnableBAT)
encodeBATSection();
``````````
</details>
https://github.com/llvm/llvm-project/pull/94273
More information about the llvm-commits
mailing list