[llvm] [BOLT][binary-analysis] Add initial pac-ret gadget scanner (PR #122304)

Anatoly Trosinenko via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 17 03:37:32 PST 2025


================
@@ -0,0 +1,522 @@
+//===- bolt/Passes/NonPacProtectedRetAnalysis.cpp -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a pass that looks for any AArch64 return instructions
+// that may not be protected by PAuth authentication instructions when needed.
+//
+//===----------------------------------------------------------------------===//
+
+#include "bolt/Passes/NonPacProtectedRetAnalysis.h"
+#include "bolt/Core/ParallelUtilities.h"
+#include "bolt/Passes/DataflowAnalysis.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/Format.h"
+#include <memory>
+
+#define DEBUG_TYPE "bolt-nonpacprotectedret"
+
+namespace llvm {
+namespace bolt {
+
+raw_ostream &operator<<(raw_ostream &OS, const MCInstInBBReference &Ref) {
+  OS << "MCInstBBRef<";
+  if (Ref.BB == nullptr)
+    OS << "BB:(null)";
+  else
+    OS << "BB:" << Ref.BB->getName() << ":" << Ref.BBIndex;
+  OS << ">";
+  return OS;
+}
+
+raw_ostream &operator<<(raw_ostream &OS, const MCInstInBFReference &Ref) {
+  OS << "MCInstBFRef<";
+  if (Ref.BF == nullptr)
+    OS << "BF:(null)";
+  else
+    OS << "BF:" << Ref.BF->getPrintName() << ":" << Ref.getOffset();
+  OS << ">";
+  return OS;
+}
+
+raw_ostream &operator<<(raw_ostream &OS, const MCInstReference &Ref) {
+  switch (Ref.ParentKind) {
+  case MCInstReference::BasicBlockParent:
+    OS << Ref.U.BBRef;
+    return OS;
+  case MCInstReference::FunctionParent:
+    OS << Ref.U.BFRef;
+    return OS;
+  }
+  llvm_unreachable("");
+}
+
+raw_ostream &operator<<(raw_ostream &OS, const GeneralDiagnostic &Diag) {
+  OS << "diag<'" << Diag.Text << "'>";
+  return OS;
+}
+
+namespace NonPacProtectedRetAnalysis {
+
+// The security property that is checked is:
+// When a register is used as the address to jump to in a return instruction,
+// that register must either:
+// (a) never be changed within this function, i.e. have the same value as when
+//     the function started, or
+// (b) the last write to the register must be by an authentication instruction.
+
+// This property is checked by using dataflow analysis to keep track of which
+// registers have been written (def-ed), since last authenticated. Those are
+// exactly the registers containing values that should not be trusted (as they
+// could have changed since the last time they were authenticated). For pac-ret,
+// any return instruction using such a register is a gadget to be reported. For
+// PAuthABI, probably at least any indirect control flow using such a register
+// should be reported.
+
+// Furthermore, when producing a diagnostic for a found non-pac-ret protected
+// return, the analysis also lists the last instructions that wrote to the
+// register used in the return instruction.
+// The total set of registers used in return instructions in a given function is
+// small. It almost always is just `X30`.
+// In order to reduce the memory consumption of storing this additional state
+// during the dataflow analysis, this is computed by running the dataflow
+// analysis twice:
+// 1. In the first run, the dataflow analysis only keeps track of the security
+//    property: i.e. which registers have been overwritten since the last
+//    time they've been authenticated.
+// 2. If the first run finds any return instructions using a register last
+//    written by a non-authenticating instruction, the dataflow analysis will
+//    be run a second time. The first run will return which registers are used
+//    in the gadgets to be reported. This information is used in the second run
+//    to also track which instructions last wrote to those registers.
+
+struct State {
+  /// A BitVector containing the registers that have been clobbered, and
+  /// not authenticated.
+  BitVector NonAutClobRegs;
+  /// A vector of sets, only used in the second data flow run.
+  /// Each element in the vector represents one of the registers for which we
+  /// track the set of last instructions that wrote to this register. For
+  /// pac-ret analysis, the expectation is that almost all return instructions
+  /// only use register `X30`, and therefore, this vector will probably have
+  /// length 1 in the second run.
+  std::vector<SmallPtrSet<const MCInst *, 4>> LastInstWritingReg;
+  State() {}
+  State(unsigned NumRegs, unsigned NumRegsToTrack)
+      : NonAutClobRegs(NumRegs), LastInstWritingReg(NumRegsToTrack) {}
+  State &operator|=(const State &StateIn) {
+    NonAutClobRegs |= StateIn.NonAutClobRegs;
+    for (unsigned I = 0; I < LastInstWritingReg.size(); ++I)
+      for (const MCInst *J : StateIn.LastInstWritingReg[I])
+        LastInstWritingReg[I].insert(J);
+    return *this;
+  }
+  bool operator==(const State &RHS) const {
+    return NonAutClobRegs == RHS.NonAutClobRegs &&
+           LastInstWritingReg == RHS.LastInstWritingReg;
+  }
+  bool operator!=(const State &RHS) const { return !((*this) == RHS); }
+};
+
+static void printLastInsts(
+    raw_ostream &OS,
+    const std::vector<SmallPtrSet<const MCInst *, 4>> &LastInstWritingReg) {
+  OS << "Insts: ";
+  for (unsigned I = 0; I < LastInstWritingReg.size(); ++I) {
+    auto &Set = LastInstWritingReg[I];
+    OS << "[" << I << "](";
+    for (const MCInst *MCInstP : Set)
+      OS << MCInstP << " ";
+    OS << ")";
+  }
+}
+
+raw_ostream &operator<<(raw_ostream &OS, const State &S) {
+  OS << "pacret-state<";
+  OS << "NonAutClobRegs: " << S.NonAutClobRegs << ", ";
+  printLastInsts(OS, S.LastInstWritingReg);
+  OS << ">";
+  return OS;
+}
+
+class PacStatePrinter {
+public:
+  void print(raw_ostream &OS, const State &State) const;
+  explicit PacStatePrinter(const BinaryContext &BC) : BC(BC) {}
+
+private:
+  const BinaryContext &BC;
+};
+
+void PacStatePrinter::print(raw_ostream &OS, const State &S) const {
+  RegStatePrinter RegStatePrinter(BC);
+  OS << "pacret-state<";
+  OS << "NonAutClobRegs: ";
+  RegStatePrinter.print(OS, S.NonAutClobRegs);
+  OS << ", ";
+  printLastInsts(OS, S.LastInstWritingReg);
+  OS << ">";
+}
+
+class PacRetAnalysis
+    : public DataflowAnalysis<PacRetAnalysis, State, /*Backward=*/false,
+                              PacStatePrinter> {
+  using Parent =
+      DataflowAnalysis<PacRetAnalysis, State, false, PacStatePrinter>;
+  friend Parent;
+
+public:
+  PacRetAnalysis(BinaryFunction &BF, MCPlusBuilder::AllocatorIdTy AllocId,
+                 const std::vector<MCPhysReg> &RegsToTrackInstsFor)
+      : Parent(BF, AllocId), NumRegs(BF.getBinaryContext().MRI->getNumRegs()),
+        RegsToTrackInstsFor(RegsToTrackInstsFor),
+        TrackingLastInsts(!RegsToTrackInstsFor.empty()),
+        Reg2StateIdx(RegsToTrackInstsFor.empty()
+                         ? 0
+                         : *llvm::max_element(RegsToTrackInstsFor) + 1,
+                     -1) {
+    for (unsigned I = 0; I < RegsToTrackInstsFor.size(); ++I)
+      Reg2StateIdx[RegsToTrackInstsFor[I]] = I;
+  }
+  virtual ~PacRetAnalysis() {}
+
+protected:
+  const unsigned NumRegs;
+  /// RegToTrackInstsFor is the set of registers for which the dataflow analysis
+  /// must compute which the last set of instructions writing to it are.
+  const std::vector<MCPhysReg> RegsToTrackInstsFor;
+  const bool TrackingLastInsts;
+  /// Reg2StateIdx maps Register to the index in the vector used in State to
+  /// track which instructions last wrote to this register.
+  std::vector<uint16_t> Reg2StateIdx;
+
+  SmallPtrSet<const MCInst *, 4> &lastWritingInsts(State &S,
+                                                   MCPhysReg Reg) const {
+    assert(Reg < Reg2StateIdx.size());
----------------
atrosinenko wrote:

> In the latest commit I ended up adding `assert(isTrackingReg(Reg));`

This looks reasonable, thank you!

> IIUC assert(isTrackingReg(Reg)) would catch the same overflow as well as reference to S.LastInstWritingReg[-1]

My wording was not clear, sorry. My idea was that `isTrackingReg(Reg)` assertion is strictly stronger than `Reg < Reg2StateIdx.size()`, meaning `isTrackingReg(Reg)` implies `Reg < Reg2StateIdx.size()` and additionally prevents accessing `S.LastInstWritingReg[-1]`. But even assuming this is correct, it is not obvious anyway, so adding another assertion looks fine.

> Or maybe just use `assert(Reg < Reg2StateIdx.size() && Reg2StateIdx[Reg])`

Sorry, that message should mention `... && Reg2StateIdx[Reg] != -1`, of course.

https://github.com/llvm/llvm-project/pull/122304


More information about the llvm-commits mailing list