[llvm] [BOLT] Refactor MCInstReference and move it to Core (NFC) (PR #155846)
Anatoly Trosinenko via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 26 08:55:33 PDT 2025
https://github.com/atrosinenko updated https://github.com/llvm/llvm-project/pull/155846
>From 4a0d3e8632ee2e77cb64c0fffbbde2e6bcd15c54 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko <atrosinenko at accesssoftek.com>
Date: Wed, 27 Aug 2025 14:19:30 +0300
Subject: [PATCH 1/8] [BOLT] Refactor MCInstReference and move it to Core (NFC)
(#138655)
Refactor MCInstReference class and move it from PAuth gadget scanner to
Core.
MCInstReference is a class representing a constant reference to an
instruction inside a parent entity - either inside a basic block (which
has a reference to its parent function) or directly inside a function
(when CFG information is not available).
---
bolt/include/bolt/Core/MCInstUtils.h | 155 +++++++++++++++
bolt/include/bolt/Passes/PAuthGadgetScanner.h | 176 +-----------------
bolt/lib/Core/CMakeLists.txt | 1 +
bolt/lib/Core/MCInstUtils.cpp | 56 ++++++
bolt/lib/Passes/PAuthGadgetScanner.cpp | 102 +++++-----
5 files changed, 256 insertions(+), 234 deletions(-)
create mode 100644 bolt/include/bolt/Core/MCInstUtils.h
create mode 100644 bolt/lib/Core/MCInstUtils.cpp
diff --git a/bolt/include/bolt/Core/MCInstUtils.h b/bolt/include/bolt/Core/MCInstUtils.h
new file mode 100644
index 0000000000000..2e93dfaf4c275
--- /dev/null
+++ b/bolt/include/bolt/Core/MCInstUtils.h
@@ -0,0 +1,155 @@
+//===- bolt/Core/MCInstUtils.h ----------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BOLT_CORE_MCINSTUTILS_H
+#define BOLT_CORE_MCINSTUTILS_H
+
+#include "bolt/Core/BinaryBasicBlock.h"
+#include <map>
+#include <variant>
+
+namespace llvm {
+namespace bolt {
+
+class BinaryFunction;
+
+/// MCInstReference represents a reference to a constant MCInst as stored either
+/// in a BinaryFunction (i.e. before a CFG is created), or in a BinaryBasicBlock
+/// (after a CFG is created).
+class MCInstReference {
+ using nocfg_const_iterator = std::map<uint32_t, MCInst>::const_iterator;
+
+ // Two cases are possible:
+ // * functions with CFG reconstructed - a function stores a collection of
+ // basic blocks, each basic block stores a contiguous vector of MCInst
+ // * functions without CFG - there are no basic blocks created,
+ // the instructions are directly stored in std::map in BinaryFunction
+ //
+ // In both cases, the direct parent of MCInst is stored together with an
+ // iterator pointing to the instruction.
+
+ // Helper struct: CFG is available, the direct parent is a basic block,
+ // iterator's type is `MCInst *`.
+ struct RefInBB {
+ RefInBB(const BinaryBasicBlock *BB, const MCInst *Inst)
+ : BB(BB), It(Inst) {}
+ RefInBB(const RefInBB &Other) = default;
+ RefInBB &operator=(const RefInBB &Other) = default;
+
+ const BinaryBasicBlock *BB;
+ BinaryBasicBlock::const_iterator It;
+
+ bool operator==(const RefInBB &Other) const {
+ return BB == Other.BB && It == Other.It;
+ }
+ };
+
+ // Helper struct: CFG is *not* available, the direct parent is a function,
+ // iterator's type is std::map<uint32_t, MCInst>::iterator (the mapped value
+ // is an instruction's offset).
+ struct RefInBF {
+ RefInBF(const BinaryFunction *BF, nocfg_const_iterator It)
+ : BF(BF), It(It) {}
+ RefInBF(const RefInBF &Other) = default;
+ RefInBF &operator=(const RefInBF &Other) = default;
+
+ const BinaryFunction *BF;
+ nocfg_const_iterator It;
+
+ bool operator==(const RefInBF &Other) const {
+ return BF == Other.BF && It->first == Other.It->first;
+ }
+ };
+
+ std::variant<RefInBB, RefInBF> Reference;
+
+ // Utility methods to be used like this:
+ //
+ // if (auto *Ref = tryGetRefInBB())
+ // return Ref->doSomething(...);
+ // return getRefInBF().doSomethingElse(...);
+ const RefInBB *tryGetRefInBB() const {
+ assert(std::get_if<RefInBB>(&Reference) ||
+ std::get_if<RefInBF>(&Reference));
+ return std::get_if<RefInBB>(&Reference);
+ }
+ const RefInBF &getRefInBF() const {
+ assert(std::get_if<RefInBF>(&Reference));
+ return *std::get_if<RefInBF>(&Reference);
+ }
+
+public:
+ /// Constructs an empty reference.
+ MCInstReference() : Reference(RefInBB(nullptr, nullptr)) {}
+ /// Constructs a reference to the instruction inside the basic block.
+ MCInstReference(const BinaryBasicBlock *BB, const MCInst *Inst)
+ : Reference(RefInBB(BB, Inst)) {
+ assert(BB && Inst && "Neither BB nor Inst should be nullptr");
+ }
+ /// Constructs a reference to the instruction inside the basic block.
+ MCInstReference(const BinaryBasicBlock *BB, unsigned Index)
+ : Reference(RefInBB(BB, &BB->getInstructionAtIndex(Index))) {
+ assert(BB && "Basic block should not be nullptr");
+ }
+ /// Constructs a reference to the instruction inside the function without
+ /// CFG information.
+ MCInstReference(const BinaryFunction *BF, nocfg_const_iterator It)
+ : Reference(RefInBF(BF, It)) {
+ assert(BF && "Function should not be nullptr");
+ }
+
+ /// Locates an instruction inside a function and returns a reference.
+ static MCInstReference get(const MCInst *Inst, const BinaryFunction &BF);
+
+ bool operator==(const MCInstReference &Other) const {
+ return Reference == Other.Reference;
+ }
+
+ const MCInst &getMCInst() const {
+ assert(!empty() && "Empty reference");
+ if (auto *Ref = tryGetRefInBB())
+ return *Ref->It;
+ return getRefInBF().It->second;
+ }
+
+ operator const MCInst &() const { return getMCInst(); }
+
+ bool empty() const {
+ if (auto *Ref = tryGetRefInBB())
+ return Ref->BB == nullptr;
+ return getRefInBF().BF == nullptr;
+ }
+
+ bool hasCFG() const { return !empty() && tryGetRefInBB() != nullptr; }
+
+ const BinaryFunction *getFunction() const {
+ assert(!empty() && "Empty reference");
+ if (auto *Ref = tryGetRefInBB())
+ return Ref->BB->getFunction();
+ return getRefInBF().BF;
+ }
+
+ const BinaryBasicBlock *getBasicBlock() const {
+ assert(!empty() && "Empty reference");
+ if (auto *Ref = tryGetRefInBB())
+ return Ref->BB;
+ return nullptr;
+ }
+
+ raw_ostream &print(raw_ostream &OS) const;
+};
+
+static inline raw_ostream &operator<<(raw_ostream &OS,
+ const MCInstReference &Ref) {
+ return Ref.print(OS);
+}
+
+} // namespace bolt
+} // namespace llvm
+
+#endif
diff --git a/bolt/include/bolt/Passes/PAuthGadgetScanner.h b/bolt/include/bolt/Passes/PAuthGadgetScanner.h
index 721fd664a3253..cb865a725d72a 100644
--- a/bolt/include/bolt/Passes/PAuthGadgetScanner.h
+++ b/bolt/include/bolt/Passes/PAuthGadgetScanner.h
@@ -11,187 +11,13 @@
#include "bolt/Core/BinaryContext.h"
#include "bolt/Core/BinaryFunction.h"
+#include "bolt/Core/MCInstUtils.h"
#include "bolt/Passes/BinaryPasses.h"
#include "llvm/Support/raw_ostream.h"
#include <memory>
namespace llvm {
namespace bolt {
-
-/// @brief MCInstReference represents a reference to an MCInst as stored either
-/// in a BinaryFunction (i.e. before a CFG is created), or in a BinaryBasicBlock
-/// (after a CFG is created). It aims to store the necessary information to be
-/// able to find the specific MCInst in either the BinaryFunction or
-/// BinaryBasicBlock data structures later, so that e.g. the InputAddress of
-/// the corresponding instruction can be computed.
-
-struct MCInstInBBReference {
- BinaryBasicBlock *BB;
- int64_t BBIndex;
- MCInstInBBReference(BinaryBasicBlock *BB, int64_t BBIndex)
- : BB(BB), BBIndex(BBIndex) {}
- MCInstInBBReference() : BB(nullptr), BBIndex(0) {}
- static MCInstInBBReference get(const MCInst *Inst, BinaryFunction &BF) {
- for (BinaryBasicBlock &BB : BF)
- for (size_t I = 0; I < BB.size(); ++I)
- if (Inst == &BB.getInstructionAtIndex(I))
- return MCInstInBBReference(&BB, I);
- return {};
- }
- bool operator==(const MCInstInBBReference &RHS) const {
- return BB == RHS.BB && BBIndex == RHS.BBIndex;
- }
- bool operator<(const MCInstInBBReference &RHS) const {
- return std::tie(BB, BBIndex) < std::tie(RHS.BB, RHS.BBIndex);
- }
- operator MCInst &() const {
- assert(BB != nullptr);
- return BB->getInstructionAtIndex(BBIndex);
- }
- uint64_t getAddress() const {
- // 4 bytes per instruction on AArch64.
- // FIXME: the assumption of 4 byte per instruction needs to be fixed before
- // this method gets used on any non-AArch64 binaries (but should be fine for
- // pac-ret analysis, as that is an AArch64-specific feature).
- return BB->getFunction()->getAddress() + BB->getOffset() + BBIndex * 4;
- }
-};
-
-raw_ostream &operator<<(raw_ostream &OS, const MCInstInBBReference &);
-
-struct MCInstInBFReference {
- BinaryFunction *BF;
- uint64_t Offset;
- MCInstInBFReference(BinaryFunction *BF, uint64_t Offset)
- : BF(BF), Offset(Offset) {}
-
- static MCInstInBFReference get(const MCInst *Inst, BinaryFunction &BF) {
- for (auto &I : BF.instrs())
- if (Inst == &I.second)
- return MCInstInBFReference(&BF, I.first);
- return {};
- }
-
- MCInstInBFReference() : BF(nullptr), Offset(0) {}
- bool operator==(const MCInstInBFReference &RHS) const {
- return BF == RHS.BF && Offset == RHS.Offset;
- }
- bool operator<(const MCInstInBFReference &RHS) const {
- return std::tie(BF, Offset) < std::tie(RHS.BF, RHS.Offset);
- }
- operator MCInst &() const {
- assert(BF != nullptr);
- return *BF->getInstructionAtOffset(Offset);
- }
-
- uint64_t getOffset() const { return Offset; }
-
- uint64_t getAddress() const { return BF->getAddress() + getOffset(); }
-};
-
-raw_ostream &operator<<(raw_ostream &OS, const MCInstInBFReference &);
-
-struct MCInstReference {
- enum Kind { FunctionParent, BasicBlockParent };
- Kind ParentKind;
- union U {
- MCInstInBBReference BBRef;
- MCInstInBFReference BFRef;
- U(MCInstInBBReference BBRef) : BBRef(BBRef) {}
- U(MCInstInBFReference BFRef) : BFRef(BFRef) {}
- } U;
- MCInstReference(MCInstInBBReference BBRef)
- : ParentKind(BasicBlockParent), U(BBRef) {}
- MCInstReference(MCInstInBFReference BFRef)
- : ParentKind(FunctionParent), U(BFRef) {}
- MCInstReference(BinaryBasicBlock *BB, int64_t BBIndex)
- : MCInstReference(MCInstInBBReference(BB, BBIndex)) {}
- MCInstReference(BinaryFunction *BF, uint32_t Offset)
- : MCInstReference(MCInstInBFReference(BF, Offset)) {}
-
- static MCInstReference get(const MCInst *Inst, BinaryFunction &BF) {
- if (BF.hasCFG())
- return MCInstInBBReference::get(Inst, BF);
- return MCInstInBFReference::get(Inst, BF);
- }
-
- bool operator<(const MCInstReference &RHS) const {
- if (ParentKind != RHS.ParentKind)
- return ParentKind < RHS.ParentKind;
- switch (ParentKind) {
- case BasicBlockParent:
- return U.BBRef < RHS.U.BBRef;
- case FunctionParent:
- return U.BFRef < RHS.U.BFRef;
- }
- llvm_unreachable("");
- }
-
- bool operator==(const MCInstReference &RHS) const {
- if (ParentKind != RHS.ParentKind)
- return false;
- switch (ParentKind) {
- case BasicBlockParent:
- return U.BBRef == RHS.U.BBRef;
- case FunctionParent:
- return U.BFRef == RHS.U.BFRef;
- }
- llvm_unreachable("");
- }
-
- operator MCInst &() const {
- switch (ParentKind) {
- case BasicBlockParent:
- return U.BBRef;
- case FunctionParent:
- return U.BFRef;
- }
- llvm_unreachable("");
- }
-
- operator bool() const {
- switch (ParentKind) {
- case BasicBlockParent:
- return U.BBRef.BB != nullptr;
- case FunctionParent:
- return U.BFRef.BF != nullptr;
- }
- llvm_unreachable("");
- }
-
- uint64_t getAddress() const {
- switch (ParentKind) {
- case BasicBlockParent:
- return U.BBRef.getAddress();
- case FunctionParent:
- return U.BFRef.getAddress();
- }
- llvm_unreachable("");
- }
-
- BinaryFunction *getFunction() const {
- switch (ParentKind) {
- case FunctionParent:
- return U.BFRef.BF;
- case BasicBlockParent:
- return U.BBRef.BB->getFunction();
- }
- llvm_unreachable("");
- }
-
- BinaryBasicBlock *getBasicBlock() const {
- switch (ParentKind) {
- case FunctionParent:
- return nullptr;
- case BasicBlockParent:
- return U.BBRef.BB;
- }
- llvm_unreachable("");
- }
-};
-
-raw_ostream &operator<<(raw_ostream &OS, const MCInstReference &);
-
namespace PAuthGadgetScanner {
// The report classes are designed to be used in an immutable manner.
diff --git a/bolt/lib/Core/CMakeLists.txt b/bolt/lib/Core/CMakeLists.txt
index fc72dc023c590..58cfcab370f16 100644
--- a/bolt/lib/Core/CMakeLists.txt
+++ b/bolt/lib/Core/CMakeLists.txt
@@ -32,6 +32,7 @@ add_llvm_library(LLVMBOLTCore
GDBIndex.cpp
HashUtilities.cpp
JumpTable.cpp
+ MCInstUtils.cpp
MCPlusBuilder.cpp
ParallelUtilities.cpp
Relocation.cpp
diff --git a/bolt/lib/Core/MCInstUtils.cpp b/bolt/lib/Core/MCInstUtils.cpp
new file mode 100644
index 0000000000000..3cdb9673d4dc0
--- /dev/null
+++ b/bolt/lib/Core/MCInstUtils.cpp
@@ -0,0 +1,56 @@
+//===- bolt/Passes/MCInstUtils.cpp ----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "bolt/Core/MCInstUtils.h"
+#include "bolt/Core/BinaryBasicBlock.h"
+#include "bolt/Core/BinaryFunction.h"
+
+#include <iterator>
+
+using namespace llvm;
+using namespace llvm::bolt;
+
+MCInstReference MCInstReference::get(const MCInst *Inst,
+ const BinaryFunction &BF) {
+ if (BF.hasCFG()) {
+ for (BinaryBasicBlock &BB : BF)
+ for (MCInst &MI : BB)
+ if (&MI == Inst)
+ return MCInstReference(&BB, Inst);
+ return {};
+ }
+
+ for (auto I = BF.instrs().begin(), E = BF.instrs().end(); I != E; ++I) {
+ if (&I->second == Inst)
+ return MCInstReference(&BF, I);
+ }
+ return {};
+}
+
+raw_ostream &MCInstReference::print(raw_ostream &OS) const {
+ if (const RefInBB *Ref = tryGetRefInBB()) {
+ OS << "MCInstBBRef<";
+ if (Ref->BB == nullptr) {
+ OS << "BB:(null)";
+ } else {
+ unsigned IndexInBB = std::distance(Ref->BB->begin(), Ref->It);
+ OS << "BB:" << Ref->BB->getName() << ":" << IndexInBB;
+ }
+ OS << ">";
+ return OS;
+ }
+
+ const RefInBF &Ref = getRefInBF();
+ OS << "MCInstBFRef<";
+ if (Ref.BF == nullptr)
+ OS << "BF:(null)";
+ else
+ OS << "BF:" << Ref.BF->getPrintName() << ":" << Ref.It->first;
+ OS << ">";
+ return OS;
+}
diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index 65c84ebc8c4f4..5d884e2d37354 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -24,39 +24,6 @@
namespace llvm {
namespace bolt {
-
-raw_ostream &operator<<(raw_ostream &OS, const MCInstInBBReference &Ref) {
- OS << "MCInstBBRef<";
- if (Ref.BB == nullptr)
- OS << "BB:(null)";
- else
- OS << "BB:" << Ref.BB->getName() << ":" << Ref.BBIndex;
- OS << ">";
- return OS;
-}
-
-raw_ostream &operator<<(raw_ostream &OS, const MCInstInBFReference &Ref) {
- OS << "MCInstBFRef<";
- if (Ref.BF == nullptr)
- OS << "BF:(null)";
- else
- OS << "BF:" << Ref.BF->getPrintName() << ":" << Ref.getOffset();
- OS << ">";
- return OS;
-}
-
-raw_ostream &operator<<(raw_ostream &OS, const MCInstReference &Ref) {
- switch (Ref.ParentKind) {
- case MCInstReference::BasicBlockParent:
- OS << Ref.U.BBRef;
- return OS;
- case MCInstReference::FunctionParent:
- OS << Ref.U.BFRef;
- return OS;
- }
- llvm_unreachable("");
-}
-
namespace PAuthGadgetScanner {
[[maybe_unused]] static void traceInst(const BinaryContext &BC, StringRef Label,
@@ -91,10 +58,10 @@ template <typename T> static void iterateOverInstrs(BinaryFunction &BF, T Fn) {
if (BF.hasCFG()) {
for (BinaryBasicBlock &BB : BF)
for (int64_t I = 0, E = BB.size(); I < E; ++I)
- Fn(MCInstInBBReference(&BB, I));
+ Fn(MCInstReference(&BB, I));
} else {
- for (auto I : BF.instrs())
- Fn(MCInstInBFReference(&BF, I.first));
+ for (auto I = BF.instrs().begin(), E = BF.instrs().end(); I != E; ++I)
+ Fn(MCInstReference(&BF, I));
}
}
@@ -566,7 +533,7 @@ class SrcSafetyAnalysis {
std::vector<MCInstReference> Result;
for (const MCInst *Inst : lastWritingInsts(S, ClobberedReg)) {
MCInstReference Ref = MCInstReference::get(Inst, BF);
- assert(Ref && "Expected Inst to be found");
+ assert(!Ref.empty() && "Expected Inst to be found");
Result.push_back(Ref);
}
return Result;
@@ -1138,7 +1105,7 @@ class DstSafetyAnalysis {
std::vector<MCInstReference> Result;
for (const MCInst *Inst : firstLeakingInsts(S, LeakedReg)) {
MCInstReference Ref = MCInstReference::get(Inst, BF);
- assert(Ref && "Expected Inst to be found");
+ assert(!Ref.empty() && "Expected Inst to be found");
Result.push_back(Ref);
}
return Result;
@@ -1345,8 +1312,7 @@ static bool shouldAnalyzeTailCallInst(const BinaryContext &BC,
// (such as isBranch at the time of writing this comment), some don't (such
// as isCall). For that reason, call MCInstrDesc's methods explicitly when
// it is important.
- const MCInstrDesc &Desc =
- BC.MII->get(static_cast<const MCInst &>(Inst).getOpcode());
+ const MCInstrDesc &Desc = BC.MII->get(Inst.getMCInst().getOpcode());
// Tail call should be a branch (but not necessarily an indirect one).
if (!Desc.isBranch())
return false;
@@ -1541,7 +1507,7 @@ void FunctionAnalysisContext::findUnsafeUses(
// This is printed as "[message] in function [name], basic block ...,
// at address ..." when the issue is reported to the user.
Reports.push_back(make_generic_report(
- MCInstReference::get(FirstInst, BF),
+ MCInstReference(&BB, FirstInst),
"Warning: possibly imprecise CFG, the analysis quality may be "
"degraded in this function. According to BOLT, unreachable code is "
"found" /* in function [name]... */));
@@ -1705,31 +1671,49 @@ void Analysis::runOnFunction(BinaryFunction &BF,
}
}
+// Compute the instruction address for printing (may be slow).
+static uint64_t getAddress(const MCInstReference &Inst) {
+ const BinaryFunction *BF = Inst.getFunction();
+
+ if (Inst.hasCFG()) {
+ const BinaryBasicBlock *BB = Inst.getBasicBlock();
+
+ auto It = static_cast<BinaryBasicBlock::const_iterator>(&Inst.getMCInst());
+ unsigned IndexInBB = std::distance(BB->begin(), It);
+
+ // FIXME: this assumes all instructions are 4 bytes in size. This is true
+ // for AArch64, but it might be good to extract this function so it can be
+ // used elsewhere and for other targets too.
+ return BF->getAddress() + BB->getOffset() + IndexInBB * 4;
+ }
+
+ for (auto I = BF->instrs().begin(), E = BF->instrs().end(); I != E; ++I) {
+ if (&I->second == &Inst.getMCInst())
+ return BF->getAddress() + I->first;
+ }
+ llvm_unreachable("Instruction not found in function");
+}
+
static void printBB(const BinaryContext &BC, const BinaryBasicBlock *BB,
size_t StartIndex = 0, size_t EndIndex = -1) {
if (EndIndex == (size_t)-1)
EndIndex = BB->size() - 1;
const BinaryFunction *BF = BB->getFunction();
for (unsigned I = StartIndex; I <= EndIndex; ++I) {
- // FIXME: this assumes all instructions are 4 bytes in size. This is true
- // for AArch64, but it might be good to extract this function so it can be
- // used elsewhere and for other targets too.
- uint64_t Address = BB->getOffset() + BF->getAddress() + 4 * I;
- const MCInst &Inst = BB->getInstructionAtIndex(I);
+ MCInstReference Inst(BB, I);
if (BC.MIB->isCFI(Inst))
continue;
- BC.printInstruction(outs(), Inst, Address, BF);
+ BC.printInstruction(outs(), Inst, getAddress(Inst), BF);
}
}
static void reportFoundGadgetInSingleBBSingleRelatedInst(
raw_ostream &OS, const BinaryContext &BC, const MCInstReference RelatedInst,
const MCInstReference Location) {
- BinaryBasicBlock *BB = Location.getBasicBlock();
- assert(RelatedInst.ParentKind == MCInstReference::BasicBlockParent);
- assert(Location.ParentKind == MCInstReference::BasicBlockParent);
- MCInstInBBReference RelatedInstBB = RelatedInst.U.BBRef;
- if (BB == RelatedInstBB.BB) {
+ const BinaryBasicBlock *BB = Location.getBasicBlock();
+ assert(RelatedInst.hasCFG());
+ assert(Location.hasCFG());
+ if (BB == RelatedInst.getBasicBlock()) {
OS << " This happens in the following basic block:\n";
printBB(BC, BB);
}
@@ -1737,16 +1721,16 @@ static void reportFoundGadgetInSingleBBSingleRelatedInst(
void Diagnostic::printBasicInfo(raw_ostream &OS, const BinaryContext &BC,
StringRef IssueKind) const {
- BinaryFunction *BF = Location.getFunction();
- BinaryBasicBlock *BB = Location.getBasicBlock();
+ const BinaryBasicBlock *BB = Location.getBasicBlock();
+ const BinaryFunction *BF = Location.getFunction();
OS << "\nGS-PAUTH: " << IssueKind;
OS << " in function " << BF->getPrintName();
if (BB)
OS << ", basic block " << BB->getName();
- OS << ", at address " << llvm::format("%x", Location.getAddress()) << "\n";
+ OS << ", at address " << llvm::format("%x", getAddress(Location)) << "\n";
OS << " The instruction is ";
- BC.printInstruction(OS, Location, Location.getAddress(), BF);
+ BC.printInstruction(OS, Location, getAddress(Location), BF);
}
void GadgetDiagnostic::generateReport(raw_ostream &OS,
@@ -1762,19 +1746,19 @@ static void printRelatedInstrs(raw_ostream &OS, const MCInstReference Location,
// Sort by address to ensure output is deterministic.
SmallVector<MCInstReference> RI(RelatedInstrs);
llvm::sort(RI, [](const MCInstReference &A, const MCInstReference &B) {
- return A.getAddress() < B.getAddress();
+ return getAddress(A) < getAddress(B);
});
for (unsigned I = 0; I < RI.size(); ++I) {
MCInstReference InstRef = RI[I];
OS << " " << (I + 1) << ". ";
- BC.printInstruction(OS, InstRef, InstRef.getAddress(), &BF);
+ BC.printInstruction(OS, InstRef, getAddress(InstRef), &BF);
};
if (RelatedInstrs.size() == 1) {
const MCInstReference RelatedInst = RelatedInstrs[0];
// Printing the details for the MCInstReference::FunctionParent case
// is not implemented not to overcomplicate the code, as most functions
// are expected to have CFG information.
- if (RelatedInst.ParentKind == MCInstReference::BasicBlockParent)
+ if (RelatedInst.hasCFG())
reportFoundGadgetInSingleBBSingleRelatedInst(OS, BC, RelatedInst,
Location);
}
>From eb32eb8a1c3e6163d4efa606ce8bdd96dc73dd5a Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko <atrosinenko at accesssoftek.com>
Date: Thu, 28 Aug 2025 15:46:05 +0300
Subject: [PATCH 2/8] Move public members to the top of MCInstReference, make
nocfg_const_iterator public
---
bolt/include/bolt/Core/MCInstUtils.h | 121 ++++++++++++++-------------
1 file changed, 61 insertions(+), 60 deletions(-)
diff --git a/bolt/include/bolt/Core/MCInstUtils.h b/bolt/include/bolt/Core/MCInstUtils.h
index 2e93dfaf4c275..82660573afaf3 100644
--- a/bolt/include/bolt/Core/MCInstUtils.h
+++ b/bolt/include/bolt/Core/MCInstUtils.h
@@ -22,68 +22,9 @@ class BinaryFunction;
/// in a BinaryFunction (i.e. before a CFG is created), or in a BinaryBasicBlock
/// (after a CFG is created).
class MCInstReference {
+public:
using nocfg_const_iterator = std::map<uint32_t, MCInst>::const_iterator;
- // Two cases are possible:
- // * functions with CFG reconstructed - a function stores a collection of
- // basic blocks, each basic block stores a contiguous vector of MCInst
- // * functions without CFG - there are no basic blocks created,
- // the instructions are directly stored in std::map in BinaryFunction
- //
- // In both cases, the direct parent of MCInst is stored together with an
- // iterator pointing to the instruction.
-
- // Helper struct: CFG is available, the direct parent is a basic block,
- // iterator's type is `MCInst *`.
- struct RefInBB {
- RefInBB(const BinaryBasicBlock *BB, const MCInst *Inst)
- : BB(BB), It(Inst) {}
- RefInBB(const RefInBB &Other) = default;
- RefInBB &operator=(const RefInBB &Other) = default;
-
- const BinaryBasicBlock *BB;
- BinaryBasicBlock::const_iterator It;
-
- bool operator==(const RefInBB &Other) const {
- return BB == Other.BB && It == Other.It;
- }
- };
-
- // Helper struct: CFG is *not* available, the direct parent is a function,
- // iterator's type is std::map<uint32_t, MCInst>::iterator (the mapped value
- // is an instruction's offset).
- struct RefInBF {
- RefInBF(const BinaryFunction *BF, nocfg_const_iterator It)
- : BF(BF), It(It) {}
- RefInBF(const RefInBF &Other) = default;
- RefInBF &operator=(const RefInBF &Other) = default;
-
- const BinaryFunction *BF;
- nocfg_const_iterator It;
-
- bool operator==(const RefInBF &Other) const {
- return BF == Other.BF && It->first == Other.It->first;
- }
- };
-
- std::variant<RefInBB, RefInBF> Reference;
-
- // Utility methods to be used like this:
- //
- // if (auto *Ref = tryGetRefInBB())
- // return Ref->doSomething(...);
- // return getRefInBF().doSomethingElse(...);
- const RefInBB *tryGetRefInBB() const {
- assert(std::get_if<RefInBB>(&Reference) ||
- std::get_if<RefInBF>(&Reference));
- return std::get_if<RefInBB>(&Reference);
- }
- const RefInBF &getRefInBF() const {
- assert(std::get_if<RefInBF>(&Reference));
- return *std::get_if<RefInBF>(&Reference);
- }
-
-public:
/// Constructs an empty reference.
MCInstReference() : Reference(RefInBB(nullptr, nullptr)) {}
/// Constructs a reference to the instruction inside the basic block.
@@ -142,6 +83,66 @@ class MCInstReference {
}
raw_ostream &print(raw_ostream &OS) const;
+
+private:
+ // Two cases are possible:
+ // * functions with CFG reconstructed - a function stores a collection of
+ // basic blocks, each basic block stores a contiguous vector of MCInst
+ // * functions without CFG - there are no basic blocks created,
+ // the instructions are directly stored in std::map in BinaryFunction
+ //
+ // In both cases, the direct parent of MCInst is stored together with an
+ // iterator pointing to the instruction.
+
+ // Helper struct: CFG is available, the direct parent is a basic block,
+ // iterator's type is `MCInst *`.
+ struct RefInBB {
+ RefInBB(const BinaryBasicBlock *BB, const MCInst *Inst)
+ : BB(BB), It(Inst) {}
+ RefInBB(const RefInBB &Other) = default;
+ RefInBB &operator=(const RefInBB &Other) = default;
+
+ const BinaryBasicBlock *BB;
+ BinaryBasicBlock::const_iterator It;
+
+ bool operator==(const RefInBB &Other) const {
+ return BB == Other.BB && It == Other.It;
+ }
+ };
+
+ // Helper struct: CFG is *not* available, the direct parent is a function,
+ // iterator's type is std::map<uint32_t, MCInst>::iterator (the mapped value
+ // is an instruction's offset).
+ struct RefInBF {
+ RefInBF(const BinaryFunction *BF, nocfg_const_iterator It)
+ : BF(BF), It(It) {}
+ RefInBF(const RefInBF &Other) = default;
+ RefInBF &operator=(const RefInBF &Other) = default;
+
+ const BinaryFunction *BF;
+ nocfg_const_iterator It;
+
+ bool operator==(const RefInBF &Other) const {
+ return BF == Other.BF && It->first == Other.It->first;
+ }
+ };
+
+ std::variant<RefInBB, RefInBF> Reference;
+
+ // Utility methods to be used like this:
+ //
+ // if (auto *Ref = tryGetRefInBB())
+ // return Ref->doSomething(...);
+ // return getRefInBF().doSomethingElse(...);
+ const RefInBB *tryGetRefInBB() const {
+ assert(std::get_if<RefInBB>(&Reference) ||
+ std::get_if<RefInBF>(&Reference));
+ return std::get_if<RefInBB>(&Reference);
+ }
+ const RefInBF &getRefInBF() const {
+ assert(std::get_if<RefInBF>(&Reference));
+ return *std::get_if<RefInBF>(&Reference);
+ }
};
static inline raw_ostream &operator<<(raw_ostream &OS,
>From 3d8ac8c8cd8d5015b4f5b4a1dff6c7455e1401fd Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko <atrosinenko at accesssoftek.com>
Date: Thu, 28 Aug 2025 16:19:56 +0300
Subject: [PATCH 3/8] Fix iterating over the instructions of BinaryBasicBlock
---
bolt/include/bolt/Core/MCInstUtils.h | 15 ++++++--
bolt/lib/Core/MCInstUtils.cpp | 52 +++++++++++++++++++++++++-
bolt/lib/Passes/PAuthGadgetScanner.cpp | 33 +++-------------
3 files changed, 66 insertions(+), 34 deletions(-)
diff --git a/bolt/include/bolt/Core/MCInstUtils.h b/bolt/include/bolt/Core/MCInstUtils.h
index 82660573afaf3..a2797c6b31aa2 100644
--- a/bolt/include/bolt/Core/MCInstUtils.h
+++ b/bolt/include/bolt/Core/MCInstUtils.h
@@ -13,6 +13,10 @@
#include <map>
#include <variant>
+namespace llvm {
+class MCCodeEmitter;
+}
+
namespace llvm {
namespace bolt {
@@ -54,7 +58,7 @@ class MCInstReference {
const MCInst &getMCInst() const {
assert(!empty() && "Empty reference");
if (auto *Ref = tryGetRefInBB())
- return *Ref->It;
+ return *Ref->Inst;
return getRefInBF().It->second;
}
@@ -82,6 +86,9 @@ class MCInstReference {
return nullptr;
}
+ // MCCodeEmitter is not thread safe.
+ uint64_t getAddress(const MCCodeEmitter *Emitter = nullptr) const;
+
raw_ostream &print(raw_ostream &OS) const;
private:
@@ -98,15 +105,15 @@ class MCInstReference {
// iterator's type is `MCInst *`.
struct RefInBB {
RefInBB(const BinaryBasicBlock *BB, const MCInst *Inst)
- : BB(BB), It(Inst) {}
+ : BB(BB), Inst(Inst) {}
RefInBB(const RefInBB &Other) = default;
RefInBB &operator=(const RefInBB &Other) = default;
const BinaryBasicBlock *BB;
- BinaryBasicBlock::const_iterator It;
+ const MCInst *Inst;
bool operator==(const RefInBB &Other) const {
- return BB == Other.BB && It == Other.It;
+ return BB == Other.BB && Inst == Other.Inst;
}
};
diff --git a/bolt/lib/Core/MCInstUtils.cpp b/bolt/lib/Core/MCInstUtils.cpp
index 3cdb9673d4dc0..f721c259e9b28 100644
--- a/bolt/lib/Core/MCInstUtils.cpp
+++ b/bolt/lib/Core/MCInstUtils.cpp
@@ -9,12 +9,33 @@
#include "bolt/Core/MCInstUtils.h"
#include "bolt/Core/BinaryBasicBlock.h"
#include "bolt/Core/BinaryFunction.h"
+#include "llvm/ADT/iterator.h"
-#include <iterator>
+#include <type_traits>
using namespace llvm;
using namespace llvm::bolt;
+// It is assumed in a few places that BinaryBasicBlock stores its instructions
+// in a contiguous vector. Give this assumption a name to simplify marking the
+// particular places with static_assert.
+using BasicBlockStorageIsVector =
+ std::is_same<BinaryBasicBlock::const_iterator,
+ std::vector<MCInst>::const_iterator>;
+
+namespace {
+// Cannot reuse MCPlusBuilder::InstructionIterator because it has to be
+// constructed from a non-const std::map iterator.
+class mapped_mcinst_iterator
+ : public iterator_adaptor_base<mapped_mcinst_iterator,
+ MCInstReference::nocfg_const_iterator> {
+public:
+ mapped_mcinst_iterator(MCInstReference::nocfg_const_iterator It)
+ : iterator_adaptor_base(It) {}
+ const MCInst &operator*() const { return this->I->second; }
+};
+} // anonymous namespace
+
MCInstReference MCInstReference::get(const MCInst *Inst,
const BinaryFunction &BF) {
if (BF.hasCFG()) {
@@ -32,13 +53,40 @@ MCInstReference MCInstReference::get(const MCInst *Inst,
return {};
}
+uint64_t MCInstReference::getAddress(const MCCodeEmitter *Emitter) const {
+ assert(!empty() && "Taking instruction address by empty reference");
+
+ const BinaryContext &BC = getFunction()->getBinaryContext();
+ if (auto *Ref = tryGetRefInBB()) {
+ static_assert(BasicBlockStorageIsVector::value,
+ "Cannot use 'const MCInst *' as iterator type");
+ uint64_t AddressOfBB = getFunction()->getAddress() + Ref->BB->getOffset();
+ const MCInst *FirstInstInBB = &*Ref->BB->begin();
+
+ uint64_t OffsetInBB = BC.computeCodeSize(FirstInstInBB, Ref->Inst, Emitter);
+
+ return AddressOfBB + OffsetInBB;
+ }
+
+ auto &Ref = getRefInBF();
+ mapped_mcinst_iterator FirstInstInBF(Ref.BF->instrs().begin());
+ mapped_mcinst_iterator ThisInst(Ref.It);
+
+ uint64_t OffsetInBF = BC.computeCodeSize(FirstInstInBF, ThisInst, Emitter);
+
+ return getFunction()->getAddress() + OffsetInBF;
+}
+
raw_ostream &MCInstReference::print(raw_ostream &OS) const {
if (const RefInBB *Ref = tryGetRefInBB()) {
OS << "MCInstBBRef<";
if (Ref->BB == nullptr) {
OS << "BB:(null)";
} else {
- unsigned IndexInBB = std::distance(Ref->BB->begin(), Ref->It);
+ static_assert(BasicBlockStorageIsVector::value,
+ "Cannot use pointer arithmetic on 'const MCInst *'");
+ const MCInst *FirstInstInBB = &*Ref->BB->begin();
+ unsigned IndexInBB = Ref->Inst - FirstInstInBB;
OS << "BB:" << Ref->BB->getName() << ":" << IndexInBB;
}
OS << ">";
diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index 5d884e2d37354..287cbe31df3af 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -1671,29 +1671,6 @@ void Analysis::runOnFunction(BinaryFunction &BF,
}
}
-// Compute the instruction address for printing (may be slow).
-static uint64_t getAddress(const MCInstReference &Inst) {
- const BinaryFunction *BF = Inst.getFunction();
-
- if (Inst.hasCFG()) {
- const BinaryBasicBlock *BB = Inst.getBasicBlock();
-
- auto It = static_cast<BinaryBasicBlock::const_iterator>(&Inst.getMCInst());
- unsigned IndexInBB = std::distance(BB->begin(), It);
-
- // FIXME: this assumes all instructions are 4 bytes in size. This is true
- // for AArch64, but it might be good to extract this function so it can be
- // used elsewhere and for other targets too.
- return BF->getAddress() + BB->getOffset() + IndexInBB * 4;
- }
-
- for (auto I = BF->instrs().begin(), E = BF->instrs().end(); I != E; ++I) {
- if (&I->second == &Inst.getMCInst())
- return BF->getAddress() + I->first;
- }
- llvm_unreachable("Instruction not found in function");
-}
-
static void printBB(const BinaryContext &BC, const BinaryBasicBlock *BB,
size_t StartIndex = 0, size_t EndIndex = -1) {
if (EndIndex == (size_t)-1)
@@ -1703,7 +1680,7 @@ static void printBB(const BinaryContext &BC, const BinaryBasicBlock *BB,
MCInstReference Inst(BB, I);
if (BC.MIB->isCFI(Inst))
continue;
- BC.printInstruction(outs(), Inst, getAddress(Inst), BF);
+ BC.printInstruction(outs(), Inst, Inst.getAddress(), BF);
}
}
@@ -1728,9 +1705,9 @@ void Diagnostic::printBasicInfo(raw_ostream &OS, const BinaryContext &BC,
OS << " in function " << BF->getPrintName();
if (BB)
OS << ", basic block " << BB->getName();
- OS << ", at address " << llvm::format("%x", getAddress(Location)) << "\n";
+ OS << ", at address " << llvm::format("%x", Location.getAddress()) << "\n";
OS << " The instruction is ";
- BC.printInstruction(OS, Location, getAddress(Location), BF);
+ BC.printInstruction(OS, Location, Location.getAddress(), BF);
}
void GadgetDiagnostic::generateReport(raw_ostream &OS,
@@ -1746,12 +1723,12 @@ static void printRelatedInstrs(raw_ostream &OS, const MCInstReference Location,
// Sort by address to ensure output is deterministic.
SmallVector<MCInstReference> RI(RelatedInstrs);
llvm::sort(RI, [](const MCInstReference &A, const MCInstReference &B) {
- return getAddress(A) < getAddress(B);
+ return A.getAddress() < B.getAddress();
});
for (unsigned I = 0; I < RI.size(); ++I) {
MCInstReference InstRef = RI[I];
OS << " " << (I + 1) << ". ";
- BC.printInstruction(OS, InstRef, getAddress(InstRef), &BF);
+ BC.printInstruction(OS, InstRef, InstRef.getAddress(), &BF);
};
if (RelatedInstrs.size() == 1) {
const MCInstReference RelatedInst = RelatedInstrs[0];
>From 2a25f3ac711f318195f92335333da3a2ac183421 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko <atrosinenko at accesssoftek.com>
Date: Wed, 17 Sep 2025 22:29:14 +0300
Subject: [PATCH 4/8] Use Index inside the BB instead of direct MCInst* in
RefInBB
---
bolt/include/bolt/Core/MCInstUtils.h | 40 ++++++++++++++++++----------
bolt/lib/Core/MCInstUtils.cpp | 22 +++++++--------
2 files changed, 35 insertions(+), 27 deletions(-)
diff --git a/bolt/include/bolt/Core/MCInstUtils.h b/bolt/include/bolt/Core/MCInstUtils.h
index a2797c6b31aa2..bd3b3016f706f 100644
--- a/bolt/include/bolt/Core/MCInstUtils.h
+++ b/bolt/include/bolt/Core/MCInstUtils.h
@@ -25,20 +25,21 @@ class BinaryFunction;
/// MCInstReference represents a reference to a constant MCInst as stored either
/// in a BinaryFunction (i.e. before a CFG is created), or in a BinaryBasicBlock
/// (after a CFG is created).
+///
+/// The reference may be invalidated when the function containing the referenced
+/// instruction is modified.
class MCInstReference {
public:
using nocfg_const_iterator = std::map<uint32_t, MCInst>::const_iterator;
/// Constructs an empty reference.
- MCInstReference() : Reference(RefInBB(nullptr, nullptr)) {}
+ MCInstReference() : Reference(RefInBB(nullptr, /*Index=*/0)) {}
/// Constructs a reference to the instruction inside the basic block.
MCInstReference(const BinaryBasicBlock *BB, const MCInst *Inst)
- : Reference(RefInBB(BB, Inst)) {
- assert(BB && Inst && "Neither BB nor Inst should be nullptr");
- }
+ : Reference(RefInBB(BB, getInstIndexInBB(BB, Inst))) {}
/// Constructs a reference to the instruction inside the basic block.
MCInstReference(const BinaryBasicBlock *BB, unsigned Index)
- : Reference(RefInBB(BB, &BB->getInstructionAtIndex(Index))) {
+ : Reference(RefInBB(BB, Index)) {
assert(BB && "Basic block should not be nullptr");
}
/// Constructs a reference to the instruction inside the function without
@@ -57,8 +58,11 @@ class MCInstReference {
const MCInst &getMCInst() const {
assert(!empty() && "Empty reference");
- if (auto *Ref = tryGetRefInBB())
- return *Ref->Inst;
+ if (auto *Ref = tryGetRefInBB()) {
+ [[maybe_unused]] unsigned NumInstructions = Ref->BB->size();
+ assert(Ref->Index < NumInstructions && "Invalid reference");
+ return Ref->BB->getInstructionAtIndex(Ref->Index);
+ }
return getRefInBF().It->second;
}
@@ -92,6 +96,15 @@ class MCInstReference {
raw_ostream &print(raw_ostream &OS) const;
private:
+ static unsigned getInstIndexInBB(const BinaryBasicBlock *BB,
+ const MCInst *Inst) {
+ assert(BB && Inst && "Neither BB nor Inst should be nullptr");
+ // Usage of pointer arithmetic assumes the instructions are stored in a
+ // vector, see BasicBlockStorageIsVector in MCInstUtils.cpp.
+ const MCInst *FirstInstInBB = &*BB->begin();
+ return Inst - FirstInstInBB;
+ }
+
// Two cases are possible:
// * functions with CFG reconstructed - a function stores a collection of
// basic blocks, each basic block stores a contiguous vector of MCInst
@@ -99,21 +112,20 @@ class MCInstReference {
// the instructions are directly stored in std::map in BinaryFunction
//
// In both cases, the direct parent of MCInst is stored together with an
- // iterator pointing to the instruction.
+ // index or iterator pointing to the instruction.
- // Helper struct: CFG is available, the direct parent is a basic block,
- // iterator's type is `MCInst *`.
+ // Helper struct: CFG is available, the direct parent is a basic block.
struct RefInBB {
- RefInBB(const BinaryBasicBlock *BB, const MCInst *Inst)
- : BB(BB), Inst(Inst) {}
+ RefInBB(const BinaryBasicBlock *BB, unsigned Index)
+ : BB(BB), Index(Index) {}
RefInBB(const RefInBB &Other) = default;
RefInBB &operator=(const RefInBB &Other) = default;
const BinaryBasicBlock *BB;
- const MCInst *Inst;
+ unsigned Index;
bool operator==(const RefInBB &Other) const {
- return BB == Other.BB && Inst == Other.Inst;
+ return BB == Other.BB && Index == Other.Index;
}
};
diff --git a/bolt/lib/Core/MCInstUtils.cpp b/bolt/lib/Core/MCInstUtils.cpp
index f721c259e9b28..2bd07882912f2 100644
--- a/bolt/lib/Core/MCInstUtils.cpp
+++ b/bolt/lib/Core/MCInstUtils.cpp
@@ -17,11 +17,11 @@ using namespace llvm;
using namespace llvm::bolt;
// It is assumed in a few places that BinaryBasicBlock stores its instructions
-// in a contiguous vector. Give this assumption a name to simplify marking the
-// particular places with static_assert.
+// in a contiguous vector.
using BasicBlockStorageIsVector =
std::is_same<BinaryBasicBlock::const_iterator,
std::vector<MCInst>::const_iterator>;
+static_assert(BasicBlockStorageIsVector::value);
namespace {
// Cannot reuse MCPlusBuilder::InstructionIterator because it has to be
@@ -58,12 +58,13 @@ uint64_t MCInstReference::getAddress(const MCCodeEmitter *Emitter) const {
const BinaryContext &BC = getFunction()->getBinaryContext();
if (auto *Ref = tryGetRefInBB()) {
- static_assert(BasicBlockStorageIsVector::value,
- "Cannot use 'const MCInst *' as iterator type");
uint64_t AddressOfBB = getFunction()->getAddress() + Ref->BB->getOffset();
const MCInst *FirstInstInBB = &*Ref->BB->begin();
+ const MCInst *ThisInst = &getMCInst();
- uint64_t OffsetInBB = BC.computeCodeSize(FirstInstInBB, Ref->Inst, Emitter);
+ // Usage of plain 'const MCInst *' as iterators assumes the instructions
+ // are stored in a vector, see BasicBlockStorageIsVector.
+ uint64_t OffsetInBB = BC.computeCodeSize(FirstInstInBB, ThisInst, Emitter);
return AddressOfBB + OffsetInBB;
}
@@ -80,15 +81,10 @@ uint64_t MCInstReference::getAddress(const MCCodeEmitter *Emitter) const {
raw_ostream &MCInstReference::print(raw_ostream &OS) const {
if (const RefInBB *Ref = tryGetRefInBB()) {
OS << "MCInstBBRef<";
- if (Ref->BB == nullptr) {
+ if (Ref->BB == nullptr)
OS << "BB:(null)";
- } else {
- static_assert(BasicBlockStorageIsVector::value,
- "Cannot use pointer arithmetic on 'const MCInst *'");
- const MCInst *FirstInstInBB = &*Ref->BB->begin();
- unsigned IndexInBB = Ref->Inst - FirstInstInBB;
- OS << "BB:" << Ref->BB->getName() << ":" << IndexInBB;
- }
+ else
+ OS << "BB:" << Ref->BB->getName() << ":" << Ref->Index;
OS << ">";
return OS;
}
>From b06809883ed96d1c00793834bd05bcff51f032d9 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko <atrosinenko at accesssoftek.com>
Date: Tue, 23 Sep 2025 14:40:36 +0300
Subject: [PATCH 5/8] Rename getAddress to computeAddress, add a comment
---
bolt/include/bolt/Core/MCInstUtils.h | 13 +++++++++++--
bolt/lib/Core/MCInstUtils.cpp | 2 +-
bolt/lib/Passes/PAuthGadgetScanner.cpp | 21 ++++++++++++---------
3 files changed, 24 insertions(+), 12 deletions(-)
diff --git a/bolt/include/bolt/Core/MCInstUtils.h b/bolt/include/bolt/Core/MCInstUtils.h
index bd3b3016f706f..4226437463283 100644
--- a/bolt/include/bolt/Core/MCInstUtils.h
+++ b/bolt/include/bolt/Core/MCInstUtils.h
@@ -90,8 +90,17 @@ class MCInstReference {
return nullptr;
}
- // MCCodeEmitter is not thread safe.
- uint64_t getAddress(const MCCodeEmitter *Emitter = nullptr) const;
+ /// Computes the address of the instruction (or offset from base for PIC).
+ ///
+ /// This function is intended for the use cases like debug printing, as it
+ /// is only as precise as BinaryContext::computeCodeSize() is and requires
+ /// iterating over the prefix of the basic block (when CFG is available) or
+ /// of the function (when CFG is unavailable).
+ ///
+ /// MCCodeEmitter is not thread safe and the default instance from
+ /// BinaryContext is used by default, thus pass an instance explicitly if
+ /// this function may be called from multithreaded code.
+ uint64_t computeAddress(const MCCodeEmitter *Emitter = nullptr) const;
raw_ostream &print(raw_ostream &OS) const;
diff --git a/bolt/lib/Core/MCInstUtils.cpp b/bolt/lib/Core/MCInstUtils.cpp
index 2bd07882912f2..bc17d1c306122 100644
--- a/bolt/lib/Core/MCInstUtils.cpp
+++ b/bolt/lib/Core/MCInstUtils.cpp
@@ -53,7 +53,7 @@ MCInstReference MCInstReference::get(const MCInst *Inst,
return {};
}
-uint64_t MCInstReference::getAddress(const MCCodeEmitter *Emitter) const {
+uint64_t MCInstReference::computeAddress(const MCCodeEmitter *Emitter) const {
assert(!empty() && "Taking instruction address by empty reference");
const BinaryContext &BC = getFunction()->getBinaryContext();
diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index 287cbe31df3af..d29c817407eae 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -1680,7 +1680,7 @@ static void printBB(const BinaryContext &BC, const BinaryBasicBlock *BB,
MCInstReference Inst(BB, I);
if (BC.MIB->isCFI(Inst))
continue;
- BC.printInstruction(outs(), Inst, Inst.getAddress(), BF);
+ BC.printInstruction(outs(), Inst, Inst.computeAddress(), BF);
}
}
@@ -1700,14 +1700,15 @@ void Diagnostic::printBasicInfo(raw_ostream &OS, const BinaryContext &BC,
StringRef IssueKind) const {
const BinaryBasicBlock *BB = Location.getBasicBlock();
const BinaryFunction *BF = Location.getFunction();
+ const uint64_t Address = Location.computeAddress();
OS << "\nGS-PAUTH: " << IssueKind;
OS << " in function " << BF->getPrintName();
if (BB)
OS << ", basic block " << BB->getName();
- OS << ", at address " << llvm::format("%x", Location.getAddress()) << "\n";
+ OS << ", at address " << llvm::format("%x", Address) << "\n";
OS << " The instruction is ";
- BC.printInstruction(OS, Location, Location.getAddress(), BF);
+ BC.printInstruction(OS, Location, Address, BF);
}
void GadgetDiagnostic::generateReport(raw_ostream &OS,
@@ -1721,15 +1722,17 @@ static void printRelatedInstrs(raw_ostream &OS, const MCInstReference Location,
const BinaryContext &BC = BF.getBinaryContext();
// Sort by address to ensure output is deterministic.
- SmallVector<MCInstReference> RI(RelatedInstrs);
- llvm::sort(RI, [](const MCInstReference &A, const MCInstReference &B) {
- return A.getAddress() < B.getAddress();
- });
+ SmallVector<std::pair<uint64_t, MCInstReference>> RI;
+ for (auto &InstRef : RelatedInstrs)
+ RI.push_back(std::make_pair(InstRef.computeAddress(), InstRef));
+ llvm::sort(RI, [](auto A, auto B) { return A.first < B.first; });
+
for (unsigned I = 0; I < RI.size(); ++I) {
- MCInstReference InstRef = RI[I];
+ auto [Address, InstRef] = RI[I];
OS << " " << (I + 1) << ". ";
- BC.printInstruction(OS, InstRef, InstRef.getAddress(), &BF);
+ BC.printInstruction(OS, InstRef, InstRef.computeAddress(), &BF);
};
+
if (RelatedInstrs.size() == 1) {
const MCInstReference RelatedInst = RelatedInstrs[0];
// Printing the details for the MCInstReference::FunctionParent case
>From 0558f1b3892087c43d8d2659cddc9aca6f3ef479 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko <atrosinenko at accesssoftek.com>
Date: Tue, 23 Sep 2025 14:51:04 +0300
Subject: [PATCH 6/8] Use references instead of pointers expected to be
non-NULL
---
bolt/include/bolt/Core/MCInstUtils.h | 29 ++++++++++++--------------
bolt/lib/Core/MCInstUtils.cpp | 10 ++++-----
bolt/lib/Passes/PAuthGadgetScanner.cpp | 18 ++++++++--------
3 files changed, 27 insertions(+), 30 deletions(-)
diff --git a/bolt/include/bolt/Core/MCInstUtils.h b/bolt/include/bolt/Core/MCInstUtils.h
index 4226437463283..21ed2061a9edc 100644
--- a/bolt/include/bolt/Core/MCInstUtils.h
+++ b/bolt/include/bolt/Core/MCInstUtils.h
@@ -34,23 +34,21 @@ class MCInstReference {
/// Constructs an empty reference.
MCInstReference() : Reference(RefInBB(nullptr, /*Index=*/0)) {}
+
/// Constructs a reference to the instruction inside the basic block.
- MCInstReference(const BinaryBasicBlock *BB, const MCInst *Inst)
- : Reference(RefInBB(BB, getInstIndexInBB(BB, Inst))) {}
+ MCInstReference(const BinaryBasicBlock &BB, const MCInst &Inst)
+ : Reference(RefInBB(&BB, getInstIndexInBB(BB, Inst))) {}
/// Constructs a reference to the instruction inside the basic block.
- MCInstReference(const BinaryBasicBlock *BB, unsigned Index)
- : Reference(RefInBB(BB, Index)) {
- assert(BB && "Basic block should not be nullptr");
- }
+ MCInstReference(const BinaryBasicBlock &BB, unsigned Index)
+ : Reference(RefInBB(&BB, Index)) {}
+
/// Constructs a reference to the instruction inside the function without
/// CFG information.
- MCInstReference(const BinaryFunction *BF, nocfg_const_iterator It)
- : Reference(RefInBF(BF, It)) {
- assert(BF && "Function should not be nullptr");
- }
+ MCInstReference(const BinaryFunction &BF, nocfg_const_iterator It)
+ : Reference(RefInBF(&BF, It)) {}
/// Locates an instruction inside a function and returns a reference.
- static MCInstReference get(const MCInst *Inst, const BinaryFunction &BF);
+ static MCInstReference get(const MCInst &Inst, const BinaryFunction &BF);
bool operator==(const MCInstReference &Other) const {
return Reference == Other.Reference;
@@ -105,13 +103,12 @@ class MCInstReference {
raw_ostream &print(raw_ostream &OS) const;
private:
- static unsigned getInstIndexInBB(const BinaryBasicBlock *BB,
- const MCInst *Inst) {
- assert(BB && Inst && "Neither BB nor Inst should be nullptr");
+ static unsigned getInstIndexInBB(const BinaryBasicBlock &BB,
+ const MCInst &Inst) {
// Usage of pointer arithmetic assumes the instructions are stored in a
// vector, see BasicBlockStorageIsVector in MCInstUtils.cpp.
- const MCInst *FirstInstInBB = &*BB->begin();
- return Inst - FirstInstInBB;
+ const MCInst *FirstInstInBB = &*BB.begin();
+ return &Inst - FirstInstInBB;
}
// Two cases are possible:
diff --git a/bolt/lib/Core/MCInstUtils.cpp b/bolt/lib/Core/MCInstUtils.cpp
index bc17d1c306122..6742dd4110775 100644
--- a/bolt/lib/Core/MCInstUtils.cpp
+++ b/bolt/lib/Core/MCInstUtils.cpp
@@ -36,19 +36,19 @@ class mapped_mcinst_iterator
};
} // anonymous namespace
-MCInstReference MCInstReference::get(const MCInst *Inst,
+MCInstReference MCInstReference::get(const MCInst &Inst,
const BinaryFunction &BF) {
if (BF.hasCFG()) {
for (BinaryBasicBlock &BB : BF)
for (MCInst &MI : BB)
- if (&MI == Inst)
- return MCInstReference(&BB, Inst);
+ if (&MI == &Inst)
+ return MCInstReference(BB, Inst);
return {};
}
for (auto I = BF.instrs().begin(), E = BF.instrs().end(); I != E; ++I) {
- if (&I->second == Inst)
- return MCInstReference(&BF, I);
+ if (&I->second == &Inst)
+ return MCInstReference(BF, I);
}
return {};
}
diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index d29c817407eae..e9520a9819939 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -58,10 +58,10 @@ template <typename T> static void iterateOverInstrs(BinaryFunction &BF, T Fn) {
if (BF.hasCFG()) {
for (BinaryBasicBlock &BB : BF)
for (int64_t I = 0, E = BB.size(); I < E; ++I)
- Fn(MCInstReference(&BB, I));
+ Fn(MCInstReference(BB, I));
} else {
for (auto I = BF.instrs().begin(), E = BF.instrs().end(); I != E; ++I)
- Fn(MCInstReference(&BF, I));
+ Fn(MCInstReference(BF, I));
}
}
@@ -532,7 +532,7 @@ class SrcSafetyAnalysis {
std::vector<MCInstReference> Result;
for (const MCInst *Inst : lastWritingInsts(S, ClobberedReg)) {
- MCInstReference Ref = MCInstReference::get(Inst, BF);
+ MCInstReference Ref = MCInstReference::get(*Inst, BF);
assert(!Ref.empty() && "Expected Inst to be found");
Result.push_back(Ref);
}
@@ -1104,7 +1104,7 @@ class DstSafetyAnalysis {
std::vector<MCInstReference> Result;
for (const MCInst *Inst : firstLeakingInsts(S, LeakedReg)) {
- MCInstReference Ref = MCInstReference::get(Inst, BF);
+ MCInstReference Ref = MCInstReference::get(*Inst, BF);
assert(!Ref.empty() && "Expected Inst to be found");
Result.push_back(Ref);
}
@@ -1507,7 +1507,7 @@ void FunctionAnalysisContext::findUnsafeUses(
// This is printed as "[message] in function [name], basic block ...,
// at address ..." when the issue is reported to the user.
Reports.push_back(make_generic_report(
- MCInstReference(&BB, FirstInst),
+ MCInstReference(BB, *FirstInst),
"Warning: possibly imprecise CFG, the analysis quality may be "
"degraded in this function. According to BOLT, unreachable code is "
"found" /* in function [name]... */));
@@ -1671,11 +1671,11 @@ void Analysis::runOnFunction(BinaryFunction &BF,
}
}
-static void printBB(const BinaryContext &BC, const BinaryBasicBlock *BB,
+static void printBB(const BinaryContext &BC, const BinaryBasicBlock &BB,
size_t StartIndex = 0, size_t EndIndex = -1) {
if (EndIndex == (size_t)-1)
- EndIndex = BB->size() - 1;
- const BinaryFunction *BF = BB->getFunction();
+ EndIndex = BB.size() - 1;
+ const BinaryFunction *BF = BB.getFunction();
for (unsigned I = StartIndex; I <= EndIndex; ++I) {
MCInstReference Inst(BB, I);
if (BC.MIB->isCFI(Inst))
@@ -1692,7 +1692,7 @@ static void reportFoundGadgetInSingleBBSingleRelatedInst(
assert(Location.hasCFG());
if (BB == RelatedInst.getBasicBlock()) {
OS << " This happens in the following basic block:\n";
- printBB(BC, BB);
+ printBB(BC, *BB);
}
}
>From 5a7414cf1b7ce5883af02cd6ab886614ed84f5c7 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko <atrosinenko at accesssoftek.com>
Date: Tue, 23 Sep 2025 15:54:34 +0300
Subject: [PATCH 7/8] Never return an empty reference from MCInstReference::get
---
bolt/lib/Core/MCInstUtils.cpp | 7 ++++---
bolt/lib/Passes/PAuthGadgetScanner.cpp | 14 ++++----------
2 files changed, 8 insertions(+), 13 deletions(-)
diff --git a/bolt/lib/Core/MCInstUtils.cpp b/bolt/lib/Core/MCInstUtils.cpp
index 6742dd4110775..6f6b9ef638424 100644
--- a/bolt/lib/Core/MCInstUtils.cpp
+++ b/bolt/lib/Core/MCInstUtils.cpp
@@ -39,18 +39,19 @@ class mapped_mcinst_iterator
MCInstReference MCInstReference::get(const MCInst &Inst,
const BinaryFunction &BF) {
if (BF.hasCFG()) {
- for (BinaryBasicBlock &BB : BF)
+ for (BinaryBasicBlock &BB : BF) {
for (MCInst &MI : BB)
if (&MI == &Inst)
return MCInstReference(BB, Inst);
- return {};
+ }
+ llvm_unreachable("Inst is not contained in BF");
}
for (auto I = BF.instrs().begin(), E = BF.instrs().end(); I != E; ++I) {
if (&I->second == &Inst)
return MCInstReference(BF, I);
}
- return {};
+ llvm_unreachable("Inst is not contained in BF");
}
uint64_t MCInstReference::computeAddress(const MCCodeEmitter *Emitter) const {
diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index e9520a9819939..bd990d8201679 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -531,11 +531,8 @@ class SrcSafetyAnalysis {
const SrcState &S = getStateBefore(Inst);
std::vector<MCInstReference> Result;
- for (const MCInst *Inst : lastWritingInsts(S, ClobberedReg)) {
- MCInstReference Ref = MCInstReference::get(*Inst, BF);
- assert(!Ref.empty() && "Expected Inst to be found");
- Result.push_back(Ref);
- }
+ for (const MCInst *Inst : lastWritingInsts(S, ClobberedReg))
+ Result.push_back(MCInstReference::get(*Inst, BF));
return Result;
}
};
@@ -1103,11 +1100,8 @@ class DstSafetyAnalysis {
const DstState &S = getStateAfter(Inst);
std::vector<MCInstReference> Result;
- for (const MCInst *Inst : firstLeakingInsts(S, LeakedReg)) {
- MCInstReference Ref = MCInstReference::get(*Inst, BF);
- assert(!Ref.empty() && "Expected Inst to be found");
- Result.push_back(Ref);
- }
+ for (const MCInst *Inst : firstLeakingInsts(S, LeakedReg))
+ Result.push_back(MCInstReference::get(*Inst, BF));
return Result;
}
};
>From ec6bff9d37a2dba56bd38833b3d4c8be2b27f4e0 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko <atrosinenko at accesssoftek.com>
Date: Fri, 26 Sep 2025 18:55:01 +0300
Subject: [PATCH 8/8] Clarify the description of computeAddress()
---
bolt/include/bolt/Core/MCInstUtils.h | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/bolt/include/bolt/Core/MCInstUtils.h b/bolt/include/bolt/Core/MCInstUtils.h
index 21ed2061a9edc..20682b3f57a13 100644
--- a/bolt/include/bolt/Core/MCInstUtils.h
+++ b/bolt/include/bolt/Core/MCInstUtils.h
@@ -88,7 +88,8 @@ class MCInstReference {
return nullptr;
}
- /// Computes the address of the instruction (or offset from base for PIC).
+ /// Computes the original address of the instruction (or offset from base
+ /// for PIC), assuming the containing function was not modified.
///
/// This function is intended for the use cases like debug printing, as it
/// is only as precise as BinaryContext::computeCodeSize() is and requires
More information about the llvm-commits
mailing list