[llvm] [BOLT] Refactor MCInstReference and move it to Core (NFC) (PR #155846)
    Anatoly Trosinenko via llvm-commits 
    llvm-commits at lists.llvm.org
       
    Mon Sep 29 05:15:06 PDT 2025
    
    
  
https://github.com/atrosinenko updated https://github.com/llvm/llvm-project/pull/155846
>From 4a0d3e8632ee2e77cb64c0fffbbde2e6bcd15c54 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko <atrosinenko at accesssoftek.com>
Date: Wed, 27 Aug 2025 14:19:30 +0300
Subject: [PATCH 1/9] [BOLT] Refactor MCInstReference and move it to Core (NFC)
 (#138655)
Refactor MCInstReference class and move it from PAuth gadget scanner to
Core.
MCInstReference is a class representing a constant reference to an
instruction inside a parent entity - either inside a basic block (which
has a reference to its parent function) or directly inside a function
(when CFG information is not available).
---
 bolt/include/bolt/Core/MCInstUtils.h          | 155 +++++++++++++++
 bolt/include/bolt/Passes/PAuthGadgetScanner.h | 176 +-----------------
 bolt/lib/Core/CMakeLists.txt                  |   1 +
 bolt/lib/Core/MCInstUtils.cpp                 |  56 ++++++
 bolt/lib/Passes/PAuthGadgetScanner.cpp        | 102 +++++-----
 5 files changed, 256 insertions(+), 234 deletions(-)
 create mode 100644 bolt/include/bolt/Core/MCInstUtils.h
 create mode 100644 bolt/lib/Core/MCInstUtils.cpp
diff --git a/bolt/include/bolt/Core/MCInstUtils.h b/bolt/include/bolt/Core/MCInstUtils.h
new file mode 100644
index 0000000000000..2e93dfaf4c275
--- /dev/null
+++ b/bolt/include/bolt/Core/MCInstUtils.h
@@ -0,0 +1,155 @@
+//===- bolt/Core/MCInstUtils.h ----------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BOLT_CORE_MCINSTUTILS_H
+#define BOLT_CORE_MCINSTUTILS_H
+
+#include "bolt/Core/BinaryBasicBlock.h"
+#include <map>
+#include <variant>
+
+namespace llvm {
+namespace bolt {
+
+class BinaryFunction;
+
+/// MCInstReference represents a reference to a constant MCInst as stored either
+/// in a BinaryFunction (i.e. before a CFG is created), or in a BinaryBasicBlock
+/// (after a CFG is created).
+class MCInstReference {
+  using nocfg_const_iterator = std::map<uint32_t, MCInst>::const_iterator;
+
+  // Two cases are possible:
+  // * functions with CFG reconstructed - a function stores a collection of
+  //   basic blocks, each basic block stores a contiguous vector of MCInst
+  // * functions without CFG - there are no basic blocks created,
+  //   the instructions are directly stored in std::map in BinaryFunction
+  //
+  // In both cases, the direct parent of MCInst is stored together with an
+  // iterator pointing to the instruction.
+
+  // Helper struct: CFG is available, the direct parent is a basic block,
+  // iterator's type is `MCInst *`.
+  struct RefInBB {
+    RefInBB(const BinaryBasicBlock *BB, const MCInst *Inst)
+        : BB(BB), It(Inst) {}
+    RefInBB(const RefInBB &Other) = default;
+    RefInBB &operator=(const RefInBB &Other) = default;
+
+    const BinaryBasicBlock *BB;
+    BinaryBasicBlock::const_iterator It;
+
+    bool operator==(const RefInBB &Other) const {
+      return BB == Other.BB && It == Other.It;
+    }
+  };
+
+  // Helper struct: CFG is *not* available, the direct parent is a function,
+  // iterator's type is std::map<uint32_t, MCInst>::iterator (the mapped value
+  // is an instruction's offset).
+  struct RefInBF {
+    RefInBF(const BinaryFunction *BF, nocfg_const_iterator It)
+        : BF(BF), It(It) {}
+    RefInBF(const RefInBF &Other) = default;
+    RefInBF &operator=(const RefInBF &Other) = default;
+
+    const BinaryFunction *BF;
+    nocfg_const_iterator It;
+
+    bool operator==(const RefInBF &Other) const {
+      return BF == Other.BF && It->first == Other.It->first;
+    }
+  };
+
+  std::variant<RefInBB, RefInBF> Reference;
+
+  // Utility methods to be used like this:
+  //
+  //     if (auto *Ref = tryGetRefInBB())
+  //       return Ref->doSomething(...);
+  //     return getRefInBF().doSomethingElse(...);
+  const RefInBB *tryGetRefInBB() const {
+    assert(std::get_if<RefInBB>(&Reference) ||
+           std::get_if<RefInBF>(&Reference));
+    return std::get_if<RefInBB>(&Reference);
+  }
+  const RefInBF &getRefInBF() const {
+    assert(std::get_if<RefInBF>(&Reference));
+    return *std::get_if<RefInBF>(&Reference);
+  }
+
+public:
+  /// Constructs an empty reference.
+  MCInstReference() : Reference(RefInBB(nullptr, nullptr)) {}
+  /// Constructs a reference to the instruction inside the basic block.
+  MCInstReference(const BinaryBasicBlock *BB, const MCInst *Inst)
+      : Reference(RefInBB(BB, Inst)) {
+    assert(BB && Inst && "Neither BB nor Inst should be nullptr");
+  }
+  /// Constructs a reference to the instruction inside the basic block.
+  MCInstReference(const BinaryBasicBlock *BB, unsigned Index)
+      : Reference(RefInBB(BB, &BB->getInstructionAtIndex(Index))) {
+    assert(BB && "Basic block should not be nullptr");
+  }
+  /// Constructs a reference to the instruction inside the function without
+  /// CFG information.
+  MCInstReference(const BinaryFunction *BF, nocfg_const_iterator It)
+      : Reference(RefInBF(BF, It)) {
+    assert(BF && "Function should not be nullptr");
+  }
+
+  /// Locates an instruction inside a function and returns a reference.
+  static MCInstReference get(const MCInst *Inst, const BinaryFunction &BF);
+
+  bool operator==(const MCInstReference &Other) const {
+    return Reference == Other.Reference;
+  }
+
+  const MCInst &getMCInst() const {
+    assert(!empty() && "Empty reference");
+    if (auto *Ref = tryGetRefInBB())
+      return *Ref->It;
+    return getRefInBF().It->second;
+  }
+
+  operator const MCInst &() const { return getMCInst(); }
+
+  bool empty() const {
+    if (auto *Ref = tryGetRefInBB())
+      return Ref->BB == nullptr;
+    return getRefInBF().BF == nullptr;
+  }
+
+  bool hasCFG() const { return !empty() && tryGetRefInBB() != nullptr; }
+
+  const BinaryFunction *getFunction() const {
+    assert(!empty() && "Empty reference");
+    if (auto *Ref = tryGetRefInBB())
+      return Ref->BB->getFunction();
+    return getRefInBF().BF;
+  }
+
+  const BinaryBasicBlock *getBasicBlock() const {
+    assert(!empty() && "Empty reference");
+    if (auto *Ref = tryGetRefInBB())
+      return Ref->BB;
+    return nullptr;
+  }
+
+  raw_ostream &print(raw_ostream &OS) const;
+};
+
+static inline raw_ostream &operator<<(raw_ostream &OS,
+                                      const MCInstReference &Ref) {
+  return Ref.print(OS);
+}
+
+} // namespace bolt
+} // namespace llvm
+
+#endif
diff --git a/bolt/include/bolt/Passes/PAuthGadgetScanner.h b/bolt/include/bolt/Passes/PAuthGadgetScanner.h
index 721fd664a3253..cb865a725d72a 100644
--- a/bolt/include/bolt/Passes/PAuthGadgetScanner.h
+++ b/bolt/include/bolt/Passes/PAuthGadgetScanner.h
@@ -11,187 +11,13 @@
 
 #include "bolt/Core/BinaryContext.h"
 #include "bolt/Core/BinaryFunction.h"
+#include "bolt/Core/MCInstUtils.h"
 #include "bolt/Passes/BinaryPasses.h"
 #include "llvm/Support/raw_ostream.h"
 #include <memory>
 
 namespace llvm {
 namespace bolt {
-
-/// @brief  MCInstReference represents a reference to an MCInst as stored either
-/// in a BinaryFunction (i.e. before a CFG is created), or in a BinaryBasicBlock
-/// (after a CFG is created). It aims to store the necessary information to be
-/// able to find the specific MCInst in either the BinaryFunction or
-/// BinaryBasicBlock data structures later, so that e.g. the InputAddress of
-/// the corresponding instruction can be computed.
-
-struct MCInstInBBReference {
-  BinaryBasicBlock *BB;
-  int64_t BBIndex;
-  MCInstInBBReference(BinaryBasicBlock *BB, int64_t BBIndex)
-      : BB(BB), BBIndex(BBIndex) {}
-  MCInstInBBReference() : BB(nullptr), BBIndex(0) {}
-  static MCInstInBBReference get(const MCInst *Inst, BinaryFunction &BF) {
-    for (BinaryBasicBlock &BB : BF)
-      for (size_t I = 0; I < BB.size(); ++I)
-        if (Inst == &BB.getInstructionAtIndex(I))
-          return MCInstInBBReference(&BB, I);
-    return {};
-  }
-  bool operator==(const MCInstInBBReference &RHS) const {
-    return BB == RHS.BB && BBIndex == RHS.BBIndex;
-  }
-  bool operator<(const MCInstInBBReference &RHS) const {
-    return std::tie(BB, BBIndex) < std::tie(RHS.BB, RHS.BBIndex);
-  }
-  operator MCInst &() const {
-    assert(BB != nullptr);
-    return BB->getInstructionAtIndex(BBIndex);
-  }
-  uint64_t getAddress() const {
-    // 4 bytes per instruction on AArch64.
-    // FIXME: the assumption of 4 byte per instruction needs to be fixed before
-    // this method gets used on any non-AArch64 binaries (but should be fine for
-    // pac-ret analysis, as that is an AArch64-specific feature).
-    return BB->getFunction()->getAddress() + BB->getOffset() + BBIndex * 4;
-  }
-};
-
-raw_ostream &operator<<(raw_ostream &OS, const MCInstInBBReference &);
-
-struct MCInstInBFReference {
-  BinaryFunction *BF;
-  uint64_t Offset;
-  MCInstInBFReference(BinaryFunction *BF, uint64_t Offset)
-      : BF(BF), Offset(Offset) {}
-
-  static MCInstInBFReference get(const MCInst *Inst, BinaryFunction &BF) {
-    for (auto &I : BF.instrs())
-      if (Inst == &I.second)
-        return MCInstInBFReference(&BF, I.first);
-    return {};
-  }
-
-  MCInstInBFReference() : BF(nullptr), Offset(0) {}
-  bool operator==(const MCInstInBFReference &RHS) const {
-    return BF == RHS.BF && Offset == RHS.Offset;
-  }
-  bool operator<(const MCInstInBFReference &RHS) const {
-    return std::tie(BF, Offset) < std::tie(RHS.BF, RHS.Offset);
-  }
-  operator MCInst &() const {
-    assert(BF != nullptr);
-    return *BF->getInstructionAtOffset(Offset);
-  }
-
-  uint64_t getOffset() const { return Offset; }
-
-  uint64_t getAddress() const { return BF->getAddress() + getOffset(); }
-};
-
-raw_ostream &operator<<(raw_ostream &OS, const MCInstInBFReference &);
-
-struct MCInstReference {
-  enum Kind { FunctionParent, BasicBlockParent };
-  Kind ParentKind;
-  union U {
-    MCInstInBBReference BBRef;
-    MCInstInBFReference BFRef;
-    U(MCInstInBBReference BBRef) : BBRef(BBRef) {}
-    U(MCInstInBFReference BFRef) : BFRef(BFRef) {}
-  } U;
-  MCInstReference(MCInstInBBReference BBRef)
-      : ParentKind(BasicBlockParent), U(BBRef) {}
-  MCInstReference(MCInstInBFReference BFRef)
-      : ParentKind(FunctionParent), U(BFRef) {}
-  MCInstReference(BinaryBasicBlock *BB, int64_t BBIndex)
-      : MCInstReference(MCInstInBBReference(BB, BBIndex)) {}
-  MCInstReference(BinaryFunction *BF, uint32_t Offset)
-      : MCInstReference(MCInstInBFReference(BF, Offset)) {}
-
-  static MCInstReference get(const MCInst *Inst, BinaryFunction &BF) {
-    if (BF.hasCFG())
-      return MCInstInBBReference::get(Inst, BF);
-    return MCInstInBFReference::get(Inst, BF);
-  }
-
-  bool operator<(const MCInstReference &RHS) const {
-    if (ParentKind != RHS.ParentKind)
-      return ParentKind < RHS.ParentKind;
-    switch (ParentKind) {
-    case BasicBlockParent:
-      return U.BBRef < RHS.U.BBRef;
-    case FunctionParent:
-      return U.BFRef < RHS.U.BFRef;
-    }
-    llvm_unreachable("");
-  }
-
-  bool operator==(const MCInstReference &RHS) const {
-    if (ParentKind != RHS.ParentKind)
-      return false;
-    switch (ParentKind) {
-    case BasicBlockParent:
-      return U.BBRef == RHS.U.BBRef;
-    case FunctionParent:
-      return U.BFRef == RHS.U.BFRef;
-    }
-    llvm_unreachable("");
-  }
-
-  operator MCInst &() const {
-    switch (ParentKind) {
-    case BasicBlockParent:
-      return U.BBRef;
-    case FunctionParent:
-      return U.BFRef;
-    }
-    llvm_unreachable("");
-  }
-
-  operator bool() const {
-    switch (ParentKind) {
-    case BasicBlockParent:
-      return U.BBRef.BB != nullptr;
-    case FunctionParent:
-      return U.BFRef.BF != nullptr;
-    }
-    llvm_unreachable("");
-  }
-
-  uint64_t getAddress() const {
-    switch (ParentKind) {
-    case BasicBlockParent:
-      return U.BBRef.getAddress();
-    case FunctionParent:
-      return U.BFRef.getAddress();
-    }
-    llvm_unreachable("");
-  }
-
-  BinaryFunction *getFunction() const {
-    switch (ParentKind) {
-    case FunctionParent:
-      return U.BFRef.BF;
-    case BasicBlockParent:
-      return U.BBRef.BB->getFunction();
-    }
-    llvm_unreachable("");
-  }
-
-  BinaryBasicBlock *getBasicBlock() const {
-    switch (ParentKind) {
-    case FunctionParent:
-      return nullptr;
-    case BasicBlockParent:
-      return U.BBRef.BB;
-    }
-    llvm_unreachable("");
-  }
-};
-
-raw_ostream &operator<<(raw_ostream &OS, const MCInstReference &);
-
 namespace PAuthGadgetScanner {
 
 // The report classes are designed to be used in an immutable manner.
diff --git a/bolt/lib/Core/CMakeLists.txt b/bolt/lib/Core/CMakeLists.txt
index fc72dc023c590..58cfcab370f16 100644
--- a/bolt/lib/Core/CMakeLists.txt
+++ b/bolt/lib/Core/CMakeLists.txt
@@ -32,6 +32,7 @@ add_llvm_library(LLVMBOLTCore
   GDBIndex.cpp
   HashUtilities.cpp
   JumpTable.cpp
+  MCInstUtils.cpp
   MCPlusBuilder.cpp
   ParallelUtilities.cpp
   Relocation.cpp
diff --git a/bolt/lib/Core/MCInstUtils.cpp b/bolt/lib/Core/MCInstUtils.cpp
new file mode 100644
index 0000000000000..3cdb9673d4dc0
--- /dev/null
+++ b/bolt/lib/Core/MCInstUtils.cpp
@@ -0,0 +1,56 @@
+//===- bolt/Passes/MCInstUtils.cpp ----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "bolt/Core/MCInstUtils.h"
+#include "bolt/Core/BinaryBasicBlock.h"
+#include "bolt/Core/BinaryFunction.h"
+
+#include <iterator>
+
+using namespace llvm;
+using namespace llvm::bolt;
+
+MCInstReference MCInstReference::get(const MCInst *Inst,
+                                     const BinaryFunction &BF) {
+  if (BF.hasCFG()) {
+    for (BinaryBasicBlock &BB : BF)
+      for (MCInst &MI : BB)
+        if (&MI == Inst)
+          return MCInstReference(&BB, Inst);
+    return {};
+  }
+
+  for (auto I = BF.instrs().begin(), E = BF.instrs().end(); I != E; ++I) {
+    if (&I->second == Inst)
+      return MCInstReference(&BF, I);
+  }
+  return {};
+}
+
+raw_ostream &MCInstReference::print(raw_ostream &OS) const {
+  if (const RefInBB *Ref = tryGetRefInBB()) {
+    OS << "MCInstBBRef<";
+    if (Ref->BB == nullptr) {
+      OS << "BB:(null)";
+    } else {
+      unsigned IndexInBB = std::distance(Ref->BB->begin(), Ref->It);
+      OS << "BB:" << Ref->BB->getName() << ":" << IndexInBB;
+    }
+    OS << ">";
+    return OS;
+  }
+
+  const RefInBF &Ref = getRefInBF();
+  OS << "MCInstBFRef<";
+  if (Ref.BF == nullptr)
+    OS << "BF:(null)";
+  else
+    OS << "BF:" << Ref.BF->getPrintName() << ":" << Ref.It->first;
+  OS << ">";
+  return OS;
+}
diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index 65c84ebc8c4f4..5d884e2d37354 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -24,39 +24,6 @@
 
 namespace llvm {
 namespace bolt {
-
-raw_ostream &operator<<(raw_ostream &OS, const MCInstInBBReference &Ref) {
-  OS << "MCInstBBRef<";
-  if (Ref.BB == nullptr)
-    OS << "BB:(null)";
-  else
-    OS << "BB:" << Ref.BB->getName() << ":" << Ref.BBIndex;
-  OS << ">";
-  return OS;
-}
-
-raw_ostream &operator<<(raw_ostream &OS, const MCInstInBFReference &Ref) {
-  OS << "MCInstBFRef<";
-  if (Ref.BF == nullptr)
-    OS << "BF:(null)";
-  else
-    OS << "BF:" << Ref.BF->getPrintName() << ":" << Ref.getOffset();
-  OS << ">";
-  return OS;
-}
-
-raw_ostream &operator<<(raw_ostream &OS, const MCInstReference &Ref) {
-  switch (Ref.ParentKind) {
-  case MCInstReference::BasicBlockParent:
-    OS << Ref.U.BBRef;
-    return OS;
-  case MCInstReference::FunctionParent:
-    OS << Ref.U.BFRef;
-    return OS;
-  }
-  llvm_unreachable("");
-}
-
 namespace PAuthGadgetScanner {
 
 [[maybe_unused]] static void traceInst(const BinaryContext &BC, StringRef Label,
@@ -91,10 +58,10 @@ template <typename T> static void iterateOverInstrs(BinaryFunction &BF, T Fn) {
   if (BF.hasCFG()) {
     for (BinaryBasicBlock &BB : BF)
       for (int64_t I = 0, E = BB.size(); I < E; ++I)
-        Fn(MCInstInBBReference(&BB, I));
+        Fn(MCInstReference(&BB, I));
   } else {
-    for (auto I : BF.instrs())
-      Fn(MCInstInBFReference(&BF, I.first));
+    for (auto I = BF.instrs().begin(), E = BF.instrs().end(); I != E; ++I)
+      Fn(MCInstReference(&BF, I));
   }
 }
 
@@ -566,7 +533,7 @@ class SrcSafetyAnalysis {
     std::vector<MCInstReference> Result;
     for (const MCInst *Inst : lastWritingInsts(S, ClobberedReg)) {
       MCInstReference Ref = MCInstReference::get(Inst, BF);
-      assert(Ref && "Expected Inst to be found");
+      assert(!Ref.empty() && "Expected Inst to be found");
       Result.push_back(Ref);
     }
     return Result;
@@ -1138,7 +1105,7 @@ class DstSafetyAnalysis {
     std::vector<MCInstReference> Result;
     for (const MCInst *Inst : firstLeakingInsts(S, LeakedReg)) {
       MCInstReference Ref = MCInstReference::get(Inst, BF);
-      assert(Ref && "Expected Inst to be found");
+      assert(!Ref.empty() && "Expected Inst to be found");
       Result.push_back(Ref);
     }
     return Result;
@@ -1345,8 +1312,7 @@ static bool shouldAnalyzeTailCallInst(const BinaryContext &BC,
   // (such as isBranch at the time of writing this comment), some don't (such
   // as isCall). For that reason, call MCInstrDesc's methods explicitly when
   // it is important.
-  const MCInstrDesc &Desc =
-      BC.MII->get(static_cast<const MCInst &>(Inst).getOpcode());
+  const MCInstrDesc &Desc = BC.MII->get(Inst.getMCInst().getOpcode());
   // Tail call should be a branch (but not necessarily an indirect one).
   if (!Desc.isBranch())
     return false;
@@ -1541,7 +1507,7 @@ void FunctionAnalysisContext::findUnsafeUses(
       // This is printed as "[message] in function [name], basic block ...,
       // at address ..." when the issue is reported to the user.
       Reports.push_back(make_generic_report(
-          MCInstReference::get(FirstInst, BF),
+          MCInstReference(&BB, FirstInst),
           "Warning: possibly imprecise CFG, the analysis quality may be "
           "degraded in this function. According to BOLT, unreachable code is "
           "found" /* in function [name]... */));
@@ -1705,31 +1671,49 @@ void Analysis::runOnFunction(BinaryFunction &BF,
   }
 }
 
+// Compute the instruction address for printing (may be slow).
+static uint64_t getAddress(const MCInstReference &Inst) {
+  const BinaryFunction *BF = Inst.getFunction();
+
+  if (Inst.hasCFG()) {
+    const BinaryBasicBlock *BB = Inst.getBasicBlock();
+
+    auto It = static_cast<BinaryBasicBlock::const_iterator>(&Inst.getMCInst());
+    unsigned IndexInBB = std::distance(BB->begin(), It);
+
+    // FIXME: this assumes all instructions are 4 bytes in size. This is true
+    // for AArch64, but it might be good to extract this function so it can be
+    // used elsewhere and for other targets too.
+    return BF->getAddress() + BB->getOffset() + IndexInBB * 4;
+  }
+
+  for (auto I = BF->instrs().begin(), E = BF->instrs().end(); I != E; ++I) {
+    if (&I->second == &Inst.getMCInst())
+      return BF->getAddress() + I->first;
+  }
+  llvm_unreachable("Instruction not found in function");
+}
+
 static void printBB(const BinaryContext &BC, const BinaryBasicBlock *BB,
                     size_t StartIndex = 0, size_t EndIndex = -1) {
   if (EndIndex == (size_t)-1)
     EndIndex = BB->size() - 1;
   const BinaryFunction *BF = BB->getFunction();
   for (unsigned I = StartIndex; I <= EndIndex; ++I) {
-    // FIXME: this assumes all instructions are 4 bytes in size. This is true
-    // for AArch64, but it might be good to extract this function so it can be
-    // used elsewhere and for other targets too.
-    uint64_t Address = BB->getOffset() + BF->getAddress() + 4 * I;
-    const MCInst &Inst = BB->getInstructionAtIndex(I);
+    MCInstReference Inst(BB, I);
     if (BC.MIB->isCFI(Inst))
       continue;
-    BC.printInstruction(outs(), Inst, Address, BF);
+    BC.printInstruction(outs(), Inst, getAddress(Inst), BF);
   }
 }
 
 static void reportFoundGadgetInSingleBBSingleRelatedInst(
     raw_ostream &OS, const BinaryContext &BC, const MCInstReference RelatedInst,
     const MCInstReference Location) {
-  BinaryBasicBlock *BB = Location.getBasicBlock();
-  assert(RelatedInst.ParentKind == MCInstReference::BasicBlockParent);
-  assert(Location.ParentKind == MCInstReference::BasicBlockParent);
-  MCInstInBBReference RelatedInstBB = RelatedInst.U.BBRef;
-  if (BB == RelatedInstBB.BB) {
+  const BinaryBasicBlock *BB = Location.getBasicBlock();
+  assert(RelatedInst.hasCFG());
+  assert(Location.hasCFG());
+  if (BB == RelatedInst.getBasicBlock()) {
     OS << "  This happens in the following basic block:\n";
     printBB(BC, BB);
   }
@@ -1737,16 +1721,16 @@ static void reportFoundGadgetInSingleBBSingleRelatedInst(
 
 void Diagnostic::printBasicInfo(raw_ostream &OS, const BinaryContext &BC,
                                 StringRef IssueKind) const {
-  BinaryFunction *BF = Location.getFunction();
-  BinaryBasicBlock *BB = Location.getBasicBlock();
+  const BinaryBasicBlock *BB = Location.getBasicBlock();
+  const BinaryFunction *BF = Location.getFunction();
 
   OS << "\nGS-PAUTH: " << IssueKind;
   OS << " in function " << BF->getPrintName();
   if (BB)
     OS << ", basic block " << BB->getName();
-  OS << ", at address " << llvm::format("%x", Location.getAddress()) << "\n";
+  OS << ", at address " << llvm::format("%x", getAddress(Location)) << "\n";
   OS << "  The instruction is ";
-  BC.printInstruction(OS, Location, Location.getAddress(), BF);
+  BC.printInstruction(OS, Location, getAddress(Location), BF);
 }
 
 void GadgetDiagnostic::generateReport(raw_ostream &OS,
@@ -1762,19 +1746,19 @@ static void printRelatedInstrs(raw_ostream &OS, const MCInstReference Location,
   // Sort by address to ensure output is deterministic.
   SmallVector<MCInstReference> RI(RelatedInstrs);
   llvm::sort(RI, [](const MCInstReference &A, const MCInstReference &B) {
-    return A.getAddress() < B.getAddress();
+    return getAddress(A) < getAddress(B);
   });
   for (unsigned I = 0; I < RI.size(); ++I) {
     MCInstReference InstRef = RI[I];
     OS << "  " << (I + 1) << ". ";
-    BC.printInstruction(OS, InstRef, InstRef.getAddress(), &BF);
+    BC.printInstruction(OS, InstRef, getAddress(InstRef), &BF);
   };
   if (RelatedInstrs.size() == 1) {
     const MCInstReference RelatedInst = RelatedInstrs[0];
     // Printing the details for the MCInstReference::FunctionParent case
     // is not implemented not to overcomplicate the code, as most functions
     // are expected to have CFG information.
-    if (RelatedInst.ParentKind == MCInstReference::BasicBlockParent)
+    if (RelatedInst.hasCFG())
       reportFoundGadgetInSingleBBSingleRelatedInst(OS, BC, RelatedInst,
                                                    Location);
   }
>From eb32eb8a1c3e6163d4efa606ce8bdd96dc73dd5a Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko <atrosinenko at accesssoftek.com>
Date: Thu, 28 Aug 2025 15:46:05 +0300
Subject: [PATCH 2/9] Move public members to the top of MCInstReference, make
 nocfg_const_iterator public
---
 bolt/include/bolt/Core/MCInstUtils.h | 121 ++++++++++++++-------------
 1 file changed, 61 insertions(+), 60 deletions(-)
diff --git a/bolt/include/bolt/Core/MCInstUtils.h b/bolt/include/bolt/Core/MCInstUtils.h
index 2e93dfaf4c275..82660573afaf3 100644
--- a/bolt/include/bolt/Core/MCInstUtils.h
+++ b/bolt/include/bolt/Core/MCInstUtils.h
@@ -22,68 +22,9 @@ class BinaryFunction;
 /// in a BinaryFunction (i.e. before a CFG is created), or in a BinaryBasicBlock
 /// (after a CFG is created).
 class MCInstReference {
+public:
   using nocfg_const_iterator = std::map<uint32_t, MCInst>::const_iterator;
 
-  // Two cases are possible:
-  // * functions with CFG reconstructed - a function stores a collection of
-  //   basic blocks, each basic block stores a contiguous vector of MCInst
-  // * functions without CFG - there are no basic blocks created,
-  //   the instructions are directly stored in std::map in BinaryFunction
-  //
-  // In both cases, the direct parent of MCInst is stored together with an
-  // iterator pointing to the instruction.
-
-  // Helper struct: CFG is available, the direct parent is a basic block,
-  // iterator's type is `MCInst *`.
-  struct RefInBB {
-    RefInBB(const BinaryBasicBlock *BB, const MCInst *Inst)
-        : BB(BB), It(Inst) {}
-    RefInBB(const RefInBB &Other) = default;
-    RefInBB &operator=(const RefInBB &Other) = default;
-
-    const BinaryBasicBlock *BB;
-    BinaryBasicBlock::const_iterator It;
-
-    bool operator==(const RefInBB &Other) const {
-      return BB == Other.BB && It == Other.It;
-    }
-  };
-
-  // Helper struct: CFG is *not* available, the direct parent is a function,
-  // iterator's type is std::map<uint32_t, MCInst>::iterator (the mapped value
-  // is an instruction's offset).
-  struct RefInBF {
-    RefInBF(const BinaryFunction *BF, nocfg_const_iterator It)
-        : BF(BF), It(It) {}
-    RefInBF(const RefInBF &Other) = default;
-    RefInBF &operator=(const RefInBF &Other) = default;
-
-    const BinaryFunction *BF;
-    nocfg_const_iterator It;
-
-    bool operator==(const RefInBF &Other) const {
-      return BF == Other.BF && It->first == Other.It->first;
-    }
-  };
-
-  std::variant<RefInBB, RefInBF> Reference;
-
-  // Utility methods to be used like this:
-  //
-  //     if (auto *Ref = tryGetRefInBB())
-  //       return Ref->doSomething(...);
-  //     return getRefInBF().doSomethingElse(...);
-  const RefInBB *tryGetRefInBB() const {
-    assert(std::get_if<RefInBB>(&Reference) ||
-           std::get_if<RefInBF>(&Reference));
-    return std::get_if<RefInBB>(&Reference);
-  }
-  const RefInBF &getRefInBF() const {
-    assert(std::get_if<RefInBF>(&Reference));
-    return *std::get_if<RefInBF>(&Reference);
-  }
-
-public:
   /// Constructs an empty reference.
   MCInstReference() : Reference(RefInBB(nullptr, nullptr)) {}
   /// Constructs a reference to the instruction inside the basic block.
@@ -142,6 +83,66 @@ class MCInstReference {
   }
 
   raw_ostream &print(raw_ostream &OS) const;
+
+private:
+  // Two cases are possible:
+  // * functions with CFG reconstructed - a function stores a collection of
+  //   basic blocks, each basic block stores a contiguous vector of MCInst
+  // * functions without CFG - there are no basic blocks created,
+  //   the instructions are directly stored in std::map in BinaryFunction
+  //
+  // In both cases, the direct parent of MCInst is stored together with an
+  // iterator pointing to the instruction.
+
+  // Helper struct: CFG is available, the direct parent is a basic block,
+  // iterator's type is `MCInst *`.
+  struct RefInBB {
+    RefInBB(const BinaryBasicBlock *BB, const MCInst *Inst)
+        : BB(BB), It(Inst) {}
+    RefInBB(const RefInBB &Other) = default;
+    RefInBB &operator=(const RefInBB &Other) = default;
+
+    const BinaryBasicBlock *BB;
+    BinaryBasicBlock::const_iterator It;
+
+    bool operator==(const RefInBB &Other) const {
+      return BB == Other.BB && It == Other.It;
+    }
+  };
+
+  // Helper struct: CFG is *not* available, the direct parent is a function,
+  // iterator's type is std::map<uint32_t, MCInst>::iterator (the mapped value
+  // is an instruction's offset).
+  struct RefInBF {
+    RefInBF(const BinaryFunction *BF, nocfg_const_iterator It)
+        : BF(BF), It(It) {}
+    RefInBF(const RefInBF &Other) = default;
+    RefInBF &operator=(const RefInBF &Other) = default;
+
+    const BinaryFunction *BF;
+    nocfg_const_iterator It;
+
+    bool operator==(const RefInBF &Other) const {
+      return BF == Other.BF && It->first == Other.It->first;
+    }
+  };
+
+  std::variant<RefInBB, RefInBF> Reference;
+
+  // Utility methods to be used like this:
+  //
+  //     if (auto *Ref = tryGetRefInBB())
+  //       return Ref->doSomething(...);
+  //     return getRefInBF().doSomethingElse(...);
+  const RefInBB *tryGetRefInBB() const {
+    assert(std::get_if<RefInBB>(&Reference) ||
+           std::get_if<RefInBF>(&Reference));
+    return std::get_if<RefInBB>(&Reference);
+  }
+  const RefInBF &getRefInBF() const {
+    assert(std::get_if<RefInBF>(&Reference));
+    return *std::get_if<RefInBF>(&Reference);
+  }
 };
 
 static inline raw_ostream &operator<<(raw_ostream &OS,
>From 3d8ac8c8cd8d5015b4f5b4a1dff6c7455e1401fd Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko <atrosinenko at accesssoftek.com>
Date: Thu, 28 Aug 2025 16:19:56 +0300
Subject: [PATCH 3/9] Fix iterating over the instructions of BinaryBasicBlock
---
 bolt/include/bolt/Core/MCInstUtils.h   | 15 ++++++--
 bolt/lib/Core/MCInstUtils.cpp          | 52 +++++++++++++++++++++++++-
 bolt/lib/Passes/PAuthGadgetScanner.cpp | 33 +++-------------
 3 files changed, 66 insertions(+), 34 deletions(-)
diff --git a/bolt/include/bolt/Core/MCInstUtils.h b/bolt/include/bolt/Core/MCInstUtils.h
index 82660573afaf3..a2797c6b31aa2 100644
--- a/bolt/include/bolt/Core/MCInstUtils.h
+++ b/bolt/include/bolt/Core/MCInstUtils.h
@@ -13,6 +13,10 @@
 #include <map>
 #include <variant>
 
+namespace llvm {
+class MCCodeEmitter;
+}
+
 namespace llvm {
 namespace bolt {
 
@@ -54,7 +58,7 @@ class MCInstReference {
   const MCInst &getMCInst() const {
     assert(!empty() && "Empty reference");
     if (auto *Ref = tryGetRefInBB())
-      return *Ref->It;
+      return *Ref->Inst;
     return getRefInBF().It->second;
   }
 
@@ -82,6 +86,9 @@ class MCInstReference {
     return nullptr;
   }
 
+  // MCCodeEmitter is not thread safe.
+  uint64_t getAddress(const MCCodeEmitter *Emitter = nullptr) const;
+
   raw_ostream &print(raw_ostream &OS) const;
 
 private:
@@ -98,15 +105,15 @@ class MCInstReference {
   // iterator's type is `MCInst *`.
   struct RefInBB {
     RefInBB(const BinaryBasicBlock *BB, const MCInst *Inst)
-        : BB(BB), It(Inst) {}
+        : BB(BB), Inst(Inst) {}
     RefInBB(const RefInBB &Other) = default;
     RefInBB &operator=(const RefInBB &Other) = default;
 
     const BinaryBasicBlock *BB;
-    BinaryBasicBlock::const_iterator It;
+    const MCInst *Inst;
 
     bool operator==(const RefInBB &Other) const {
-      return BB == Other.BB && It == Other.It;
+      return BB == Other.BB && Inst == Other.Inst;
     }
   };
 
diff --git a/bolt/lib/Core/MCInstUtils.cpp b/bolt/lib/Core/MCInstUtils.cpp
index 3cdb9673d4dc0..f721c259e9b28 100644
--- a/bolt/lib/Core/MCInstUtils.cpp
+++ b/bolt/lib/Core/MCInstUtils.cpp
@@ -9,12 +9,33 @@
 #include "bolt/Core/MCInstUtils.h"
 #include "bolt/Core/BinaryBasicBlock.h"
 #include "bolt/Core/BinaryFunction.h"
+#include "llvm/ADT/iterator.h"
 
-#include <iterator>
+#include <type_traits>
 
 using namespace llvm;
 using namespace llvm::bolt;
 
+// It is assumed in a few places that BinaryBasicBlock stores its instructions
+// in a contiguous vector. Give this assumption a name to simplify marking the
+// particular places with static_assert.
+using BasicBlockStorageIsVector =
+    std::is_same<BinaryBasicBlock::const_iterator,
+                 std::vector<MCInst>::const_iterator>;
+
+namespace {
+// Cannot reuse MCPlusBuilder::InstructionIterator because it has to be
+// constructed from a non-const std::map iterator.
+class mapped_mcinst_iterator
+    : public iterator_adaptor_base<mapped_mcinst_iterator,
+                                   MCInstReference::nocfg_const_iterator> {
+public:
+  mapped_mcinst_iterator(MCInstReference::nocfg_const_iterator It)
+      : iterator_adaptor_base(It) {}
+  const MCInst &operator*() const { return this->I->second; }
+};
+} // anonymous namespace
+
 MCInstReference MCInstReference::get(const MCInst *Inst,
                                      const BinaryFunction &BF) {
   if (BF.hasCFG()) {
@@ -32,13 +53,40 @@ MCInstReference MCInstReference::get(const MCInst *Inst,
   return {};
 }
 
+uint64_t MCInstReference::getAddress(const MCCodeEmitter *Emitter) const {
+  assert(!empty() && "Taking instruction address by empty reference");
+
+  const BinaryContext &BC = getFunction()->getBinaryContext();
+  if (auto *Ref = tryGetRefInBB()) {
+    static_assert(BasicBlockStorageIsVector::value,
+                  "Cannot use 'const MCInst *' as iterator type");
+    uint64_t AddressOfBB = getFunction()->getAddress() + Ref->BB->getOffset();
+    const MCInst *FirstInstInBB = &*Ref->BB->begin();
+
+    uint64_t OffsetInBB = BC.computeCodeSize(FirstInstInBB, Ref->Inst, Emitter);
+
+    return AddressOfBB + OffsetInBB;
+  }
+
+  auto &Ref = getRefInBF();
+  mapped_mcinst_iterator FirstInstInBF(Ref.BF->instrs().begin());
+  mapped_mcinst_iterator ThisInst(Ref.It);
+
+  uint64_t OffsetInBF = BC.computeCodeSize(FirstInstInBF, ThisInst, Emitter);
+
+  return getFunction()->getAddress() + OffsetInBF;
+}
+
 raw_ostream &MCInstReference::print(raw_ostream &OS) const {
   if (const RefInBB *Ref = tryGetRefInBB()) {
     OS << "MCInstBBRef<";
     if (Ref->BB == nullptr) {
       OS << "BB:(null)";
     } else {
-      unsigned IndexInBB = std::distance(Ref->BB->begin(), Ref->It);
+      static_assert(BasicBlockStorageIsVector::value,
+                    "Cannot use pointer arithmetic on 'const MCInst *'");
+      const MCInst *FirstInstInBB = &*Ref->BB->begin();
+      unsigned IndexInBB = Ref->Inst - FirstInstInBB;
       OS << "BB:" << Ref->BB->getName() << ":" << IndexInBB;
     }
     OS << ">";
diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index 5d884e2d37354..287cbe31df3af 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -1671,29 +1671,6 @@ void Analysis::runOnFunction(BinaryFunction &BF,
   }
 }
 
-// Compute the instruction address for printing (may be slow).
-static uint64_t getAddress(const MCInstReference &Inst) {
-  const BinaryFunction *BF = Inst.getFunction();
-
-  if (Inst.hasCFG()) {
-    const BinaryBasicBlock *BB = Inst.getBasicBlock();
-
-    auto It = static_cast<BinaryBasicBlock::const_iterator>(&Inst.getMCInst());
-    unsigned IndexInBB = std::distance(BB->begin(), It);
-
-    // FIXME: this assumes all instructions are 4 bytes in size. This is true
-    // for AArch64, but it might be good to extract this function so it can be
-    // used elsewhere and for other targets too.
-    return BF->getAddress() + BB->getOffset() + IndexInBB * 4;
-  }
-
-  for (auto I = BF->instrs().begin(), E = BF->instrs().end(); I != E; ++I) {
-    if (&I->second == &Inst.getMCInst())
-      return BF->getAddress() + I->first;
-  }
-  llvm_unreachable("Instruction not found in function");
-}
-
 static void printBB(const BinaryContext &BC, const BinaryBasicBlock *BB,
                     size_t StartIndex = 0, size_t EndIndex = -1) {
   if (EndIndex == (size_t)-1)
@@ -1703,7 +1680,7 @@ static void printBB(const BinaryContext &BC, const BinaryBasicBlock *BB,
     MCInstReference Inst(BB, I);
     if (BC.MIB->isCFI(Inst))
       continue;
-    BC.printInstruction(outs(), Inst, getAddress(Inst), BF);
+    BC.printInstruction(outs(), Inst, Inst.getAddress(), BF);
   }
 }
 
@@ -1728,9 +1705,9 @@ void Diagnostic::printBasicInfo(raw_ostream &OS, const BinaryContext &BC,
   OS << " in function " << BF->getPrintName();
   if (BB)
     OS << ", basic block " << BB->getName();
-  OS << ", at address " << llvm::format("%x", getAddress(Location)) << "\n";
+  OS << ", at address " << llvm::format("%x", Location.getAddress()) << "\n";
   OS << "  The instruction is ";
-  BC.printInstruction(OS, Location, getAddress(Location), BF);
+  BC.printInstruction(OS, Location, Location.getAddress(), BF);
 }
 
 void GadgetDiagnostic::generateReport(raw_ostream &OS,
@@ -1746,12 +1723,12 @@ static void printRelatedInstrs(raw_ostream &OS, const MCInstReference Location,
   // Sort by address to ensure output is deterministic.
   SmallVector<MCInstReference> RI(RelatedInstrs);
   llvm::sort(RI, [](const MCInstReference &A, const MCInstReference &B) {
-    return getAddress(A) < getAddress(B);
+    return A.getAddress() < B.getAddress();
   });
   for (unsigned I = 0; I < RI.size(); ++I) {
     MCInstReference InstRef = RI[I];
     OS << "  " << (I + 1) << ". ";
-    BC.printInstruction(OS, InstRef, getAddress(InstRef), &BF);
+    BC.printInstruction(OS, InstRef, InstRef.getAddress(), &BF);
   };
   if (RelatedInstrs.size() == 1) {
     const MCInstReference RelatedInst = RelatedInstrs[0];
>From 2a25f3ac711f318195f92335333da3a2ac183421 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko <atrosinenko at accesssoftek.com>
Date: Wed, 17 Sep 2025 22:29:14 +0300
Subject: [PATCH 4/9] Use Index inside the BB instead of direct MCInst* in
 RefInBB
---
 bolt/include/bolt/Core/MCInstUtils.h | 40 ++++++++++++++++++----------
 bolt/lib/Core/MCInstUtils.cpp        | 22 +++++++--------
 2 files changed, 35 insertions(+), 27 deletions(-)
diff --git a/bolt/include/bolt/Core/MCInstUtils.h b/bolt/include/bolt/Core/MCInstUtils.h
index a2797c6b31aa2..bd3b3016f706f 100644
--- a/bolt/include/bolt/Core/MCInstUtils.h
+++ b/bolt/include/bolt/Core/MCInstUtils.h
@@ -25,20 +25,21 @@ class BinaryFunction;
 /// MCInstReference represents a reference to a constant MCInst as stored either
 /// in a BinaryFunction (i.e. before a CFG is created), or in a BinaryBasicBlock
 /// (after a CFG is created).
+///
+/// The reference may be invalidated when the function containing the referenced
+/// instruction is modified.
 class MCInstReference {
 public:
   using nocfg_const_iterator = std::map<uint32_t, MCInst>::const_iterator;
 
   /// Constructs an empty reference.
-  MCInstReference() : Reference(RefInBB(nullptr, nullptr)) {}
+  MCInstReference() : Reference(RefInBB(nullptr, /*Index=*/0)) {}
   /// Constructs a reference to the instruction inside the basic block.
   MCInstReference(const BinaryBasicBlock *BB, const MCInst *Inst)
-      : Reference(RefInBB(BB, Inst)) {
-    assert(BB && Inst && "Neither BB nor Inst should be nullptr");
-  }
+      : Reference(RefInBB(BB, getInstIndexInBB(BB, Inst))) {}
   /// Constructs a reference to the instruction inside the basic block.
   MCInstReference(const BinaryBasicBlock *BB, unsigned Index)
-      : Reference(RefInBB(BB, &BB->getInstructionAtIndex(Index))) {
+      : Reference(RefInBB(BB, Index)) {
     assert(BB && "Basic block should not be nullptr");
   }
   /// Constructs a reference to the instruction inside the function without
@@ -57,8 +58,11 @@ class MCInstReference {
 
   const MCInst &getMCInst() const {
     assert(!empty() && "Empty reference");
-    if (auto *Ref = tryGetRefInBB())
-      return *Ref->Inst;
+    if (auto *Ref = tryGetRefInBB()) {
+      [[maybe_unused]] unsigned NumInstructions = Ref->BB->size();
+      assert(Ref->Index < NumInstructions && "Invalid reference");
+      return Ref->BB->getInstructionAtIndex(Ref->Index);
+    }
     return getRefInBF().It->second;
   }
 
@@ -92,6 +96,15 @@ class MCInstReference {
   raw_ostream &print(raw_ostream &OS) const;
 
 private:
+  static unsigned getInstIndexInBB(const BinaryBasicBlock *BB,
+                                   const MCInst *Inst) {
+    assert(BB && Inst && "Neither BB nor Inst should be nullptr");
+    // Usage of pointer arithmetic assumes the instructions are stored in a
+    // vector, see BasicBlockStorageIsVector in MCInstUtils.cpp.
+    const MCInst *FirstInstInBB = &*BB->begin();
+    return Inst - FirstInstInBB;
+  }
+
   // Two cases are possible:
   // * functions with CFG reconstructed - a function stores a collection of
   //   basic blocks, each basic block stores a contiguous vector of MCInst
@@ -99,21 +112,20 @@ class MCInstReference {
   //   the instructions are directly stored in std::map in BinaryFunction
   //
   // In both cases, the direct parent of MCInst is stored together with an
-  // iterator pointing to the instruction.
+  // index or iterator pointing to the instruction.
 
-  // Helper struct: CFG is available, the direct parent is a basic block,
-  // iterator's type is `MCInst *`.
+  // Helper struct: CFG is available, the direct parent is a basic block.
   struct RefInBB {
-    RefInBB(const BinaryBasicBlock *BB, const MCInst *Inst)
-        : BB(BB), Inst(Inst) {}
+    RefInBB(const BinaryBasicBlock *BB, unsigned Index)
+        : BB(BB), Index(Index) {}
     RefInBB(const RefInBB &Other) = default;
     RefInBB &operator=(const RefInBB &Other) = default;
 
     const BinaryBasicBlock *BB;
-    const MCInst *Inst;
+    unsigned Index;
 
     bool operator==(const RefInBB &Other) const {
-      return BB == Other.BB && Inst == Other.Inst;
+      return BB == Other.BB && Index == Other.Index;
     }
   };
 
diff --git a/bolt/lib/Core/MCInstUtils.cpp b/bolt/lib/Core/MCInstUtils.cpp
index f721c259e9b28..2bd07882912f2 100644
--- a/bolt/lib/Core/MCInstUtils.cpp
+++ b/bolt/lib/Core/MCInstUtils.cpp
@@ -17,11 +17,11 @@ using namespace llvm;
 using namespace llvm::bolt;
 
 // It is assumed in a few places that BinaryBasicBlock stores its instructions
-// in a contiguous vector. Give this assumption a name to simplify marking the
-// particular places with static_assert.
+// in a contiguous vector.
 using BasicBlockStorageIsVector =
     std::is_same<BinaryBasicBlock::const_iterator,
                  std::vector<MCInst>::const_iterator>;
+static_assert(BasicBlockStorageIsVector::value);
 
 namespace {
 // Cannot reuse MCPlusBuilder::InstructionIterator because it has to be
@@ -58,12 +58,13 @@ uint64_t MCInstReference::getAddress(const MCCodeEmitter *Emitter) const {
 
   const BinaryContext &BC = getFunction()->getBinaryContext();
   if (auto *Ref = tryGetRefInBB()) {
-    static_assert(BasicBlockStorageIsVector::value,
-                  "Cannot use 'const MCInst *' as iterator type");
     uint64_t AddressOfBB = getFunction()->getAddress() + Ref->BB->getOffset();
     const MCInst *FirstInstInBB = &*Ref->BB->begin();
+    const MCInst *ThisInst = &getMCInst();
 
-    uint64_t OffsetInBB = BC.computeCodeSize(FirstInstInBB, Ref->Inst, Emitter);
+    // Usage of plain 'const MCInst *' as iterators assumes the instructions
+    // are stored in a vector, see BasicBlockStorageIsVector.
+    uint64_t OffsetInBB = BC.computeCodeSize(FirstInstInBB, ThisInst, Emitter);
 
     return AddressOfBB + OffsetInBB;
   }
@@ -80,15 +81,10 @@ uint64_t MCInstReference::getAddress(const MCCodeEmitter *Emitter) const {
 raw_ostream &MCInstReference::print(raw_ostream &OS) const {
   if (const RefInBB *Ref = tryGetRefInBB()) {
     OS << "MCInstBBRef<";
-    if (Ref->BB == nullptr) {
+    if (Ref->BB == nullptr)
       OS << "BB:(null)";
-    } else {
-      static_assert(BasicBlockStorageIsVector::value,
-                    "Cannot use pointer arithmetic on 'const MCInst *'");
-      const MCInst *FirstInstInBB = &*Ref->BB->begin();
-      unsigned IndexInBB = Ref->Inst - FirstInstInBB;
-      OS << "BB:" << Ref->BB->getName() << ":" << IndexInBB;
-    }
+    else
+      OS << "BB:" << Ref->BB->getName() << ":" << Ref->Index;
     OS << ">";
     return OS;
   }
>From b06809883ed96d1c00793834bd05bcff51f032d9 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko <atrosinenko at accesssoftek.com>
Date: Tue, 23 Sep 2025 14:40:36 +0300
Subject: [PATCH 5/9] Rename getAddress to computeAddress, add a comment
---
 bolt/include/bolt/Core/MCInstUtils.h   | 13 +++++++++++--
 bolt/lib/Core/MCInstUtils.cpp          |  2 +-
 bolt/lib/Passes/PAuthGadgetScanner.cpp | 21 ++++++++++++---------
 3 files changed, 24 insertions(+), 12 deletions(-)
diff --git a/bolt/include/bolt/Core/MCInstUtils.h b/bolt/include/bolt/Core/MCInstUtils.h
index bd3b3016f706f..4226437463283 100644
--- a/bolt/include/bolt/Core/MCInstUtils.h
+++ b/bolt/include/bolt/Core/MCInstUtils.h
@@ -90,8 +90,17 @@ class MCInstReference {
     return nullptr;
   }
 
-  // MCCodeEmitter is not thread safe.
-  uint64_t getAddress(const MCCodeEmitter *Emitter = nullptr) const;
+  /// Computes the address of the instruction (or offset from base for PIC).
+  ///
+  /// This function is intended for the use cases like debug printing, as it
+  /// is only as precise as BinaryContext::computeCodeSize() is and requires
+  /// iterating over the prefix of the basic block (when CFG is available) or
+  /// of the function (when CFG is unavailable).
+  ///
+  /// MCCodeEmitter is not thread safe and the default instance from
+  /// BinaryContext is used by default, thus pass an instance explicitly if
+  /// this function may be called from multithreaded code.
+  uint64_t computeAddress(const MCCodeEmitter *Emitter = nullptr) const;
 
   raw_ostream &print(raw_ostream &OS) const;
 
diff --git a/bolt/lib/Core/MCInstUtils.cpp b/bolt/lib/Core/MCInstUtils.cpp
index 2bd07882912f2..bc17d1c306122 100644
--- a/bolt/lib/Core/MCInstUtils.cpp
+++ b/bolt/lib/Core/MCInstUtils.cpp
@@ -53,7 +53,7 @@ MCInstReference MCInstReference::get(const MCInst *Inst,
   return {};
 }
 
-uint64_t MCInstReference::getAddress(const MCCodeEmitter *Emitter) const {
+uint64_t MCInstReference::computeAddress(const MCCodeEmitter *Emitter) const {
   assert(!empty() && "Taking instruction address by empty reference");
 
   const BinaryContext &BC = getFunction()->getBinaryContext();
diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index 287cbe31df3af..d29c817407eae 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -1680,7 +1680,7 @@ static void printBB(const BinaryContext &BC, const BinaryBasicBlock *BB,
     MCInstReference Inst(BB, I);
     if (BC.MIB->isCFI(Inst))
       continue;
-    BC.printInstruction(outs(), Inst, Inst.getAddress(), BF);
+    BC.printInstruction(outs(), Inst, Inst.computeAddress(), BF);
   }
 }
 
@@ -1700,14 +1700,15 @@ void Diagnostic::printBasicInfo(raw_ostream &OS, const BinaryContext &BC,
                                 StringRef IssueKind) const {
   const BinaryBasicBlock *BB = Location.getBasicBlock();
   const BinaryFunction *BF = Location.getFunction();
+  const uint64_t Address = Location.computeAddress();
 
   OS << "\nGS-PAUTH: " << IssueKind;
   OS << " in function " << BF->getPrintName();
   if (BB)
     OS << ", basic block " << BB->getName();
-  OS << ", at address " << llvm::format("%x", Location.getAddress()) << "\n";
+  OS << ", at address " << llvm::format("%x", Address) << "\n";
   OS << "  The instruction is ";
-  BC.printInstruction(OS, Location, Location.getAddress(), BF);
+  BC.printInstruction(OS, Location, Address, BF);
 }
 
 void GadgetDiagnostic::generateReport(raw_ostream &OS,
@@ -1721,15 +1722,17 @@ static void printRelatedInstrs(raw_ostream &OS, const MCInstReference Location,
   const BinaryContext &BC = BF.getBinaryContext();
 
   // Sort by address to ensure output is deterministic.
-  SmallVector<MCInstReference> RI(RelatedInstrs);
-  llvm::sort(RI, [](const MCInstReference &A, const MCInstReference &B) {
-    return A.getAddress() < B.getAddress();
-  });
+  SmallVector<std::pair<uint64_t, MCInstReference>> RI;
+  for (auto &InstRef : RelatedInstrs)
+    RI.push_back(std::make_pair(InstRef.computeAddress(), InstRef));
+  llvm::sort(RI, [](auto A, auto B) { return A.first < B.first; });
+
   for (unsigned I = 0; I < RI.size(); ++I) {
-    MCInstReference InstRef = RI[I];
+    auto [Address, InstRef] = RI[I];
     OS << "  " << (I + 1) << ". ";
-    BC.printInstruction(OS, InstRef, InstRef.getAddress(), &BF);
+    BC.printInstruction(OS, InstRef, InstRef.computeAddress(), &BF);
   };
+
   if (RelatedInstrs.size() == 1) {
     const MCInstReference RelatedInst = RelatedInstrs[0];
     // Printing the details for the MCInstReference::FunctionParent case
>From 0558f1b3892087c43d8d2659cddc9aca6f3ef479 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko <atrosinenko at accesssoftek.com>
Date: Tue, 23 Sep 2025 14:51:04 +0300
Subject: [PATCH 6/9] Use references instead of pointers expected to be
 non-NULL
---
 bolt/include/bolt/Core/MCInstUtils.h   | 29 ++++++++++++--------------
 bolt/lib/Core/MCInstUtils.cpp          | 10 ++++-----
 bolt/lib/Passes/PAuthGadgetScanner.cpp | 18 ++++++++--------
 3 files changed, 27 insertions(+), 30 deletions(-)
diff --git a/bolt/include/bolt/Core/MCInstUtils.h b/bolt/include/bolt/Core/MCInstUtils.h
index 4226437463283..21ed2061a9edc 100644
--- a/bolt/include/bolt/Core/MCInstUtils.h
+++ b/bolt/include/bolt/Core/MCInstUtils.h
@@ -34,23 +34,21 @@ class MCInstReference {
 
   /// Constructs an empty reference.
   MCInstReference() : Reference(RefInBB(nullptr, /*Index=*/0)) {}
+
   /// Constructs a reference to the instruction inside the basic block.
-  MCInstReference(const BinaryBasicBlock *BB, const MCInst *Inst)
-      : Reference(RefInBB(BB, getInstIndexInBB(BB, Inst))) {}
+  MCInstReference(const BinaryBasicBlock &BB, const MCInst &Inst)
+      : Reference(RefInBB(&BB, getInstIndexInBB(BB, Inst))) {}
   /// Constructs a reference to the instruction inside the basic block.
-  MCInstReference(const BinaryBasicBlock *BB, unsigned Index)
-      : Reference(RefInBB(BB, Index)) {
-    assert(BB && "Basic block should not be nullptr");
-  }
+  MCInstReference(const BinaryBasicBlock &BB, unsigned Index)
+      : Reference(RefInBB(&BB, Index)) {}
+
   /// Constructs a reference to the instruction inside the function without
   /// CFG information.
-  MCInstReference(const BinaryFunction *BF, nocfg_const_iterator It)
-      : Reference(RefInBF(BF, It)) {
-    assert(BF && "Function should not be nullptr");
-  }
+  MCInstReference(const BinaryFunction &BF, nocfg_const_iterator It)
+      : Reference(RefInBF(&BF, It)) {}
 
   /// Locates an instruction inside a function and returns a reference.
-  static MCInstReference get(const MCInst *Inst, const BinaryFunction &BF);
+  static MCInstReference get(const MCInst &Inst, const BinaryFunction &BF);
 
   bool operator==(const MCInstReference &Other) const {
     return Reference == Other.Reference;
@@ -105,13 +103,12 @@ class MCInstReference {
   raw_ostream &print(raw_ostream &OS) const;
 
 private:
-  static unsigned getInstIndexInBB(const BinaryBasicBlock *BB,
-                                   const MCInst *Inst) {
-    assert(BB && Inst && "Neither BB nor Inst should be nullptr");
+  static unsigned getInstIndexInBB(const BinaryBasicBlock &BB,
+                                   const MCInst &Inst) {
     // Usage of pointer arithmetic assumes the instructions are stored in a
     // vector, see BasicBlockStorageIsVector in MCInstUtils.cpp.
-    const MCInst *FirstInstInBB = &*BB->begin();
-    return Inst - FirstInstInBB;
+    const MCInst *FirstInstInBB = &*BB.begin();
+    return &Inst - FirstInstInBB;
   }
 
   // Two cases are possible:
diff --git a/bolt/lib/Core/MCInstUtils.cpp b/bolt/lib/Core/MCInstUtils.cpp
index bc17d1c306122..6742dd4110775 100644
--- a/bolt/lib/Core/MCInstUtils.cpp
+++ b/bolt/lib/Core/MCInstUtils.cpp
@@ -36,19 +36,19 @@ class mapped_mcinst_iterator
 };
 } // anonymous namespace
 
-MCInstReference MCInstReference::get(const MCInst *Inst,
+MCInstReference MCInstReference::get(const MCInst &Inst,
                                      const BinaryFunction &BF) {
   if (BF.hasCFG()) {
     for (BinaryBasicBlock &BB : BF)
       for (MCInst &MI : BB)
-        if (&MI == Inst)
-          return MCInstReference(&BB, Inst);
+        if (&MI == &Inst)
+          return MCInstReference(BB, Inst);
     return {};
   }
 
   for (auto I = BF.instrs().begin(), E = BF.instrs().end(); I != E; ++I) {
-    if (&I->second == Inst)
-      return MCInstReference(&BF, I);
+    if (&I->second == &Inst)
+      return MCInstReference(BF, I);
   }
   return {};
 }
diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index d29c817407eae..e9520a9819939 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -58,10 +58,10 @@ template <typename T> static void iterateOverInstrs(BinaryFunction &BF, T Fn) {
   if (BF.hasCFG()) {
     for (BinaryBasicBlock &BB : BF)
       for (int64_t I = 0, E = BB.size(); I < E; ++I)
-        Fn(MCInstReference(&BB, I));
+        Fn(MCInstReference(BB, I));
   } else {
     for (auto I = BF.instrs().begin(), E = BF.instrs().end(); I != E; ++I)
-      Fn(MCInstReference(&BF, I));
+      Fn(MCInstReference(BF, I));
   }
 }
 
@@ -532,7 +532,7 @@ class SrcSafetyAnalysis {
 
     std::vector<MCInstReference> Result;
     for (const MCInst *Inst : lastWritingInsts(S, ClobberedReg)) {
-      MCInstReference Ref = MCInstReference::get(Inst, BF);
+      MCInstReference Ref = MCInstReference::get(*Inst, BF);
       assert(!Ref.empty() && "Expected Inst to be found");
       Result.push_back(Ref);
     }
@@ -1104,7 +1104,7 @@ class DstSafetyAnalysis {
 
     std::vector<MCInstReference> Result;
     for (const MCInst *Inst : firstLeakingInsts(S, LeakedReg)) {
-      MCInstReference Ref = MCInstReference::get(Inst, BF);
+      MCInstReference Ref = MCInstReference::get(*Inst, BF);
       assert(!Ref.empty() && "Expected Inst to be found");
       Result.push_back(Ref);
     }
@@ -1507,7 +1507,7 @@ void FunctionAnalysisContext::findUnsafeUses(
       // This is printed as "[message] in function [name], basic block ...,
       // at address ..." when the issue is reported to the user.
       Reports.push_back(make_generic_report(
-          MCInstReference(&BB, FirstInst),
+          MCInstReference(BB, *FirstInst),
           "Warning: possibly imprecise CFG, the analysis quality may be "
           "degraded in this function. According to BOLT, unreachable code is "
           "found" /* in function [name]... */));
@@ -1671,11 +1671,11 @@ void Analysis::runOnFunction(BinaryFunction &BF,
   }
 }
 
-static void printBB(const BinaryContext &BC, const BinaryBasicBlock *BB,
+static void printBB(const BinaryContext &BC, const BinaryBasicBlock &BB,
                     size_t StartIndex = 0, size_t EndIndex = -1) {
   if (EndIndex == (size_t)-1)
-    EndIndex = BB->size() - 1;
-  const BinaryFunction *BF = BB->getFunction();
+    EndIndex = BB.size() - 1;
+  const BinaryFunction *BF = BB.getFunction();
   for (unsigned I = StartIndex; I <= EndIndex; ++I) {
     MCInstReference Inst(BB, I);
     if (BC.MIB->isCFI(Inst))
@@ -1692,7 +1692,7 @@ static void reportFoundGadgetInSingleBBSingleRelatedInst(
   assert(Location.hasCFG());
   if (BB == RelatedInst.getBasicBlock()) {
     OS << "  This happens in the following basic block:\n";
-    printBB(BC, BB);
+    printBB(BC, *BB);
   }
 }
 
>From 5a7414cf1b7ce5883af02cd6ab886614ed84f5c7 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko <atrosinenko at accesssoftek.com>
Date: Tue, 23 Sep 2025 15:54:34 +0300
Subject: [PATCH 7/9] Never return an empty reference from MCInstReference::get
---
 bolt/lib/Core/MCInstUtils.cpp          |  7 ++++---
 bolt/lib/Passes/PAuthGadgetScanner.cpp | 14 ++++----------
 2 files changed, 8 insertions(+), 13 deletions(-)
diff --git a/bolt/lib/Core/MCInstUtils.cpp b/bolt/lib/Core/MCInstUtils.cpp
index 6742dd4110775..6f6b9ef638424 100644
--- a/bolt/lib/Core/MCInstUtils.cpp
+++ b/bolt/lib/Core/MCInstUtils.cpp
@@ -39,18 +39,19 @@ class mapped_mcinst_iterator
 MCInstReference MCInstReference::get(const MCInst &Inst,
                                      const BinaryFunction &BF) {
   if (BF.hasCFG()) {
-    for (BinaryBasicBlock &BB : BF)
+    for (BinaryBasicBlock &BB : BF) {
       for (MCInst &MI : BB)
         if (&MI == &Inst)
           return MCInstReference(BB, Inst);
-    return {};
+    }
+    llvm_unreachable("Inst is not contained in BF");
   }
 
   for (auto I = BF.instrs().begin(), E = BF.instrs().end(); I != E; ++I) {
     if (&I->second == &Inst)
       return MCInstReference(BF, I);
   }
-  return {};
+  llvm_unreachable("Inst is not contained in BF");
 }
 
 uint64_t MCInstReference::computeAddress(const MCCodeEmitter *Emitter) const {
diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index e9520a9819939..bd990d8201679 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -531,11 +531,8 @@ class SrcSafetyAnalysis {
     const SrcState &S = getStateBefore(Inst);
 
     std::vector<MCInstReference> Result;
-    for (const MCInst *Inst : lastWritingInsts(S, ClobberedReg)) {
-      MCInstReference Ref = MCInstReference::get(*Inst, BF);
-      assert(!Ref.empty() && "Expected Inst to be found");
-      Result.push_back(Ref);
-    }
+    for (const MCInst *Inst : lastWritingInsts(S, ClobberedReg))
+      Result.push_back(MCInstReference::get(*Inst, BF));
     return Result;
   }
 };
@@ -1103,11 +1100,8 @@ class DstSafetyAnalysis {
     const DstState &S = getStateAfter(Inst);
 
     std::vector<MCInstReference> Result;
-    for (const MCInst *Inst : firstLeakingInsts(S, LeakedReg)) {
-      MCInstReference Ref = MCInstReference::get(*Inst, BF);
-      assert(!Ref.empty() && "Expected Inst to be found");
-      Result.push_back(Ref);
-    }
+    for (const MCInst *Inst : firstLeakingInsts(S, LeakedReg))
+      Result.push_back(MCInstReference::get(*Inst, BF));
     return Result;
   }
 };
>From ec6bff9d37a2dba56bd38833b3d4c8be2b27f4e0 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko <atrosinenko at accesssoftek.com>
Date: Fri, 26 Sep 2025 18:55:01 +0300
Subject: [PATCH 8/9] Clarify the description of computeAddress()
---
 bolt/include/bolt/Core/MCInstUtils.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/bolt/include/bolt/Core/MCInstUtils.h b/bolt/include/bolt/Core/MCInstUtils.h
index 21ed2061a9edc..20682b3f57a13 100644
--- a/bolt/include/bolt/Core/MCInstUtils.h
+++ b/bolt/include/bolt/Core/MCInstUtils.h
@@ -88,7 +88,8 @@ class MCInstReference {
     return nullptr;
   }
 
-  /// Computes the address of the instruction (or offset from base for PIC).
+  /// Computes the original address of the instruction (or offset from base
+  /// for PIC), assuming the containing function was not modified.
   ///
   /// This function is intended for the use cases like debug printing, as it
   /// is only as precise as BinaryContext::computeCodeSize() is and requires
>From 2fc734ce57ead327840a5d62a7980e5eaa70eac3 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko <atrosinenko at accesssoftek.com>
Date: Mon, 29 Sep 2025 15:00:48 +0300
Subject: [PATCH 9/9] Obtain offset from std::map key when CFG is unavailable
---
 bolt/include/bolt/Core/MCInstUtils.h |  3 +--
 bolt/lib/Core/MCInstUtils.cpp        | 19 +------------------
 2 files changed, 2 insertions(+), 20 deletions(-)
diff --git a/bolt/include/bolt/Core/MCInstUtils.h b/bolt/include/bolt/Core/MCInstUtils.h
index 20682b3f57a13..eb56629c61c7d 100644
--- a/bolt/include/bolt/Core/MCInstUtils.h
+++ b/bolt/include/bolt/Core/MCInstUtils.h
@@ -93,8 +93,7 @@ class MCInstReference {
   ///
   /// This function is intended for the use cases like debug printing, as it
   /// is only as precise as BinaryContext::computeCodeSize() is and requires
-  /// iterating over the prefix of the basic block (when CFG is available) or
-  /// of the function (when CFG is unavailable).
+  /// iterating over the prefix of the basic block (when CFG is available).
   ///
   /// MCCodeEmitter is not thread safe and the default instance from
   /// BinaryContext is used by default, thus pass an instance explicitly if
diff --git a/bolt/lib/Core/MCInstUtils.cpp b/bolt/lib/Core/MCInstUtils.cpp
index 6f6b9ef638424..472a5c6bf956b 100644
--- a/bolt/lib/Core/MCInstUtils.cpp
+++ b/bolt/lib/Core/MCInstUtils.cpp
@@ -9,7 +9,6 @@
 #include "bolt/Core/MCInstUtils.h"
 #include "bolt/Core/BinaryBasicBlock.h"
 #include "bolt/Core/BinaryFunction.h"
-#include "llvm/ADT/iterator.h"
 
 #include <type_traits>
 
@@ -23,19 +22,6 @@ using BasicBlockStorageIsVector =
                  std::vector<MCInst>::const_iterator>;
 static_assert(BasicBlockStorageIsVector::value);
 
-namespace {
-// Cannot reuse MCPlusBuilder::InstructionIterator because it has to be
-// constructed from a non-const std::map iterator.
-class mapped_mcinst_iterator
-    : public iterator_adaptor_base<mapped_mcinst_iterator,
-                                   MCInstReference::nocfg_const_iterator> {
-public:
-  mapped_mcinst_iterator(MCInstReference::nocfg_const_iterator It)
-      : iterator_adaptor_base(It) {}
-  const MCInst &operator*() const { return this->I->second; }
-};
-} // anonymous namespace
-
 MCInstReference MCInstReference::get(const MCInst &Inst,
                                      const BinaryFunction &BF) {
   if (BF.hasCFG()) {
@@ -71,10 +57,7 @@ uint64_t MCInstReference::computeAddress(const MCCodeEmitter *Emitter) const {
   }
 
   auto &Ref = getRefInBF();
-  mapped_mcinst_iterator FirstInstInBF(Ref.BF->instrs().begin());
-  mapped_mcinst_iterator ThisInst(Ref.It);
-
-  uint64_t OffsetInBF = BC.computeCodeSize(FirstInstInBF, ThisInst, Emitter);
+  uint64_t OffsetInBF = Ref.It->first;
 
   return getFunction()->getAddress() + OffsetInBF;
 }
    
    
More information about the llvm-commits
mailing list