[llvm] [BOLT][binary-analysis] Add initial pac-ret gadget scanner (PR #122304)

Kristof Beyls via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 15 05:55:02 PST 2025


================
@@ -0,0 +1,266 @@
+//===- bolt/Passes/NonPacProtectedRetAnalysis.h -----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BOLT_PASSES_NONPACPROTECTEDRETANALYSIS_H
+#define BOLT_PASSES_NONPACPROTECTEDRETANALYSIS_H
+
+#include "bolt/Core/BinaryContext.h"
+#include "bolt/Core/BinaryFunction.h"
+#include "bolt/Passes/BinaryPasses.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Testing/Annotations/Annotations.h"
+#include <memory>
+
+namespace llvm {
+namespace bolt {
+
+/// @brief  MCInstReference represents a reference to an MCInst as stored either
+/// in a BinaryFunction (i.e. before a CFG is created), or in a BinaryBasicBlock
+/// (after a CFG is created). It aims to store the necessary information to be
+/// able to find the specific MCInst in either the BinaryFunction or
+/// BinaryBasicBlock data structures later, so that e.g. the InputAddress of
+/// the corresponding instruction can be computed.
+
+struct MCInstInBBReference {
+  BinaryBasicBlock *BB;
+  int64_t BBIndex;
+  MCInstInBBReference(BinaryBasicBlock *BB, int64_t BBIndex)
+      : BB(BB), BBIndex(BBIndex) {}
+  MCInstInBBReference() : BB(nullptr), BBIndex(0) {}
+  static MCInstInBBReference get(const MCInst *Inst, BinaryFunction &BF) {
+    for (BinaryBasicBlock &BB : BF)
+      for (size_t I = 0; I < BB.size(); ++I)
+        if (Inst == &(BB.getInstructionAtIndex(I)))
+          return MCInstInBBReference(&BB, I);
+    return {};
+  }
+  bool operator==(const MCInstInBBReference &RHS) const {
+    return BB == RHS.BB && BBIndex == RHS.BBIndex;
+  }
+  bool operator<(const MCInstInBBReference &RHS) const {
+    if (BB != RHS.BB)
+      return BB < RHS.BB;
+    return BBIndex < RHS.BBIndex;
+  }
+  operator MCInst &() const {
+    assert(BB != nullptr);
+    return BB->getInstructionAtIndex(BBIndex);
+  }
+  uint64_t getAddress() const {
+    // 4 bytes per instruction on AArch64;
+    // FIXME: the assumption of 4 byte per instruction needs to be fixed before
+    // this method gets used on any non-AArch64 binaries (but should be fine for
+    // pac-ret analysis, as that is an AArch64-specific feature).
+    return BB->getFunction()->getAddress() + BB->getOffset() + BBIndex * 4;
+  }
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const MCInstInBBReference &);
+
+struct MCInstInBFReference {
+  BinaryFunction *BF;
+  uint32_t Offset;
+  MCInstInBFReference(BinaryFunction *BF, uint32_t Offset)
+      : BF(BF), Offset(Offset) {}
+  MCInstInBFReference() : BF(nullptr) {}
+  bool operator==(const MCInstInBFReference &RHS) const {
+    return BF == RHS.BF && Offset == RHS.Offset;
+  }
+  bool operator<(const MCInstInBFReference &RHS) const {
+    if (BF != RHS.BF)
+      return BF < RHS.BF;
+    return Offset < RHS.Offset;
+  }
+  operator MCInst &() const {
+    assert(BF != nullptr);
+    return *(BF->getInstructionAtOffset(Offset));
+  }
+
+  uint64_t getOffset() const { return Offset; }
+
+  uint64_t getAddress() const { return BF->getAddress() + getOffset(); }
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const MCInstInBFReference &);
+
+struct MCInstReference {
+  enum Kind { FunctionParent, BasicBlockParent };
+  Kind ParentKind;
+  union U {
+    MCInstInBBReference BBRef;
+    MCInstInBFReference BFRef;
+    U(MCInstInBBReference BBRef) : BBRef(BBRef) {}
+    U(MCInstInBFReference BFRef) : BFRef(BFRef) {}
+  } U;
+  MCInstReference(MCInstInBBReference BBRef)
+      : ParentKind(BasicBlockParent), U(BBRef) {}
+  MCInstReference(MCInstInBFReference BFRef)
+      : ParentKind(FunctionParent), U(BFRef) {}
+  MCInstReference(class BinaryBasicBlock *BB, int64_t BBIndex)
+      : MCInstReference(MCInstInBBReference(BB, BBIndex)) {}
+  MCInstReference(class BinaryFunction *BF, uint32_t Offset)
+      : MCInstReference(MCInstInBFReference(BF, Offset)) {}
+
+  bool operator<(const MCInstReference &RHS) const {
+    if (ParentKind != RHS.ParentKind)
+      return ParentKind < RHS.ParentKind;
+    switch (ParentKind) {
+    case BasicBlockParent:
+      return U.BBRef < RHS.U.BBRef;
+    case FunctionParent:
+      return U.BFRef < RHS.U.BFRef;
+    }
+    llvm_unreachable("");
+  }
+
+  bool operator==(const MCInstReference &RHS) const {
+    if (ParentKind != RHS.ParentKind)
+      return false;
+    switch (ParentKind) {
+    case BasicBlockParent:
+      return U.BBRef == RHS.U.BBRef;
+    case FunctionParent:
+      return U.BFRef == RHS.U.BFRef;
+    }
+    llvm_unreachable("");
+  }
+
+  operator MCInst &() const {
+    switch (ParentKind) {
+    case BasicBlockParent:
+      return U.BBRef;
+    case FunctionParent:
+      return U.BFRef;
+    }
+    llvm_unreachable("");
+  }
+
+  uint64_t getAddress() const {
+    switch (ParentKind) {
+    case BasicBlockParent:
+      return U.BBRef.getAddress();
+    case FunctionParent:
+      return U.BFRef.getAddress();
+    }
+    llvm_unreachable("");
+  }
+
+  BinaryFunction *getFunction() const {
+    switch (ParentKind) {
+    case FunctionParent:
+      return U.BFRef.BF;
+    case BasicBlockParent:
+      return U.BBRef.BB->getFunction();
+    }
+    llvm_unreachable("");
+  }
+
+  BinaryBasicBlock *getBasicBlock() const {
+    switch (ParentKind) {
+    case FunctionParent:
+      return nullptr;
+    case BasicBlockParent:
+      return U.BBRef.BB;
+    }
+    llvm_unreachable("");
+  }
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const MCInstReference &);
+
+struct GeneralDiagnostic {
+  std::string Text;
+  GeneralDiagnostic(const std::string &Text) : Text(Text) {}
+  bool operator==(const GeneralDiagnostic &RHS) const {
+    return Text == RHS.Text;
+  }
+};
+
+struct NonPacProtectedRetAnnotation {
----------------
kbeyls wrote:

With the changes in commit f44f9bf, the code now has quite a few different classes (and variables?) etc with long names containing the string "NonPacProtectedRet".
Could this be improved by introduced a namespace `NonPacProtectedRet` or `NonPacProtectedRetAnalysis`, so that we can remove that part from class names for classes in the namespace. This should make the code substantially easier to read?

https://github.com/llvm/llvm-project/pull/122304


More information about the llvm-commits mailing list