[llvm] [BOLT] Refactor MCInstReference and move it to Core (NFC) (PR #155846)

Maksim Panchenko via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 26 11:47:41 PDT 2025


================
@@ -0,0 +1,101 @@
+//===- bolt/Passes/MCInstUtils.cpp ----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "bolt/Core/MCInstUtils.h"
+#include "bolt/Core/BinaryBasicBlock.h"
+#include "bolt/Core/BinaryFunction.h"
+#include "llvm/ADT/iterator.h"
+
+#include <type_traits>
+
+using namespace llvm;
+using namespace llvm::bolt;
+
+// It is assumed in a few places that BinaryBasicBlock stores its instructions
+// in a contiguous vector.
+using BasicBlockStorageIsVector =
+    std::is_same<BinaryBasicBlock::const_iterator,
+                 std::vector<MCInst>::const_iterator>;
+static_assert(BasicBlockStorageIsVector::value);
+
+namespace {
+// Cannot reuse MCPlusBuilder::InstructionIterator because it has to be
+// constructed from a non-const std::map iterator.
+class mapped_mcinst_iterator
+    : public iterator_adaptor_base<mapped_mcinst_iterator,
+                                   MCInstReference::nocfg_const_iterator> {
+public:
+  mapped_mcinst_iterator(MCInstReference::nocfg_const_iterator It)
+      : iterator_adaptor_base(It) {}
+  const MCInst &operator*() const { return this->I->second; }
+};
+} // anonymous namespace
+
+MCInstReference MCInstReference::get(const MCInst &Inst,
+                                     const BinaryFunction &BF) {
+  if (BF.hasCFG()) {
+    for (BinaryBasicBlock &BB : BF) {
+      for (MCInst &MI : BB)
+        if (&MI == &Inst)
+          return MCInstReference(BB, Inst);
+    }
+    llvm_unreachable("Inst is not contained in BF");
+  }
+
+  for (auto I = BF.instrs().begin(), E = BF.instrs().end(); I != E; ++I) {
+    if (&I->second == &Inst)
+      return MCInstReference(BF, I);
+  }
+  llvm_unreachable("Inst is not contained in BF");
+}
+
+uint64_t MCInstReference::computeAddress(const MCCodeEmitter *Emitter) const {
+  assert(!empty() && "Taking instruction address by empty reference");
+
+  const BinaryContext &BC = getFunction()->getBinaryContext();
+  if (auto *Ref = tryGetRefInBB()) {
+    uint64_t AddressOfBB = getFunction()->getAddress() + Ref->BB->getOffset();
+    const MCInst *FirstInstInBB = &*Ref->BB->begin();
+    const MCInst *ThisInst = &getMCInst();
+
+    // Usage of plain 'const MCInst *' as iterators assumes the instructions
+    // are stored in a vector, see BasicBlockStorageIsVector.
+    uint64_t OffsetInBB = BC.computeCodeSize(FirstInstInBB, ThisInst, Emitter);
+
+    return AddressOfBB + OffsetInBB;
+  }
+
+  auto &Ref = getRefInBF();
+  mapped_mcinst_iterator FirstInstInBF(Ref.BF->instrs().begin());
+  mapped_mcinst_iterator ThisInst(Ref.It);
+
+  uint64_t OffsetInBF = BC.computeCodeSize(FirstInstInBF, ThisInst, Emitter);
----------------
maksfb wrote:

Without the CFG, the offset of the instruction is stored as a key in `BF.instrs()` map. There's no need to recompute it.

With this comment addressed, the PR should be ready to go.

https://github.com/llvm/llvm-project/pull/155846


More information about the llvm-commits mailing list