[llvm] [BOLT] Gadget scanner: refactor analysis of RET instructions (PR #131897)

Anatoly Trosinenko via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 21 01:22:00 PDT 2025


https://github.com/atrosinenko updated https://github.com/llvm/llvm-project/pull/131897

>From d0b3760ee4bac6b3c17a0423468cea452c256358 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko <atrosinenko at accesssoftek.com>
Date: Mon, 17 Mar 2025 19:28:25 +0300
Subject: [PATCH 1/2] [BOLT] Gadget scanner: refactor analysis of RET
 instructions

In preparation for implementing detection of more gadget kinds,
refactor checking for non-protected return instructions.
---
 bolt/include/bolt/Passes/PAuthGadgetScanner.h |  23 ++-
 bolt/lib/Passes/PAuthGadgetScanner.cpp        | 138 ++++++++++--------
 2 files changed, 95 insertions(+), 66 deletions(-)

diff --git a/bolt/include/bolt/Passes/PAuthGadgetScanner.h b/bolt/include/bolt/Passes/PAuthGadgetScanner.h
index 2d8109f8ca43b..f102f1080e2e8 100644
--- a/bolt/include/bolt/Passes/PAuthGadgetScanner.h
+++ b/bolt/include/bolt/Passes/PAuthGadgetScanner.h
@@ -199,19 +199,34 @@ struct Report {
   virtual void generateReport(raw_ostream &OS,
                               const BinaryContext &BC) const = 0;
 
+  virtual const ArrayRef<MCPhysReg> getAffectedRegisters() const { return {}; }
+  virtual void
+  setOverwritingInstrs(const std::vector<MCInstReference> &Instrs) {}
+
   void printBasicInfo(raw_ostream &OS, const BinaryContext &BC,
                       StringRef IssueKind) const;
 };
 
 struct GadgetReport : public Report {
   const GadgetKind &Kind;
+  SmallVector<MCPhysReg> AffectedRegisters;
   std::vector<MCInstReference> OverwritingInstrs;
 
   GadgetReport(const GadgetKind &Kind, MCInstReference Location,
-               std::vector<MCInstReference> OverwritingInstrs)
-      : Report(Location), Kind(Kind), OverwritingInstrs(OverwritingInstrs) {}
+               const BitVector &AffectedRegisters)
+      : Report(Location), Kind(Kind),
+        AffectedRegisters(AffectedRegisters.set_bits()) {}
 
   void generateReport(raw_ostream &OS, const BinaryContext &BC) const override;
+
+  const ArrayRef<MCPhysReg> getAffectedRegisters() const override {
+    return AffectedRegisters;
+  }
+
+  void
+  setOverwritingInstrs(const std::vector<MCInstReference> &Instrs) override {
+    OverwritingInstrs = Instrs;
+  }
 };
 
 /// Report with a free-form message attached.
@@ -224,7 +239,6 @@ struct GenericReport : public Report {
 };
 
 struct FunctionAnalysisResult {
-  SmallSet<MCPhysReg, 1> RegistersAffected;
   std::vector<std::shared_ptr<Report>> Diagnostics;
 };
 
@@ -232,8 +246,7 @@ class Analysis : public BinaryFunctionPass {
   void runOnFunction(BinaryFunction &Function,
                      MCPlusBuilder::AllocatorIdTy AllocatorId);
   FunctionAnalysisResult
-  computeDfState(PacRetAnalysis &PRA, BinaryFunction &BF,
-                 MCPlusBuilder::AllocatorIdTy AllocatorId);
+  computeDfState(BinaryFunction &BF, MCPlusBuilder::AllocatorIdTy AllocatorId);
 
   std::map<const BinaryFunction *, FunctionAnalysisResult> AnalysisResults;
   std::mutex AnalysisResultsMutex;
diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index 6f29399691cb8..9bffd59b01070 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -353,7 +353,7 @@ class PacRetAnalysis
 public:
   std::vector<MCInstReference>
   getLastClobberingInsts(const MCInst Ret, BinaryFunction &BF,
-                         const BitVector &UsedDirtyRegs) const {
+                         const ArrayRef<MCPhysReg> UsedDirtyRegs) const {
     if (RegsToTrackInstsFor.empty())
       return {};
     auto MaybeState = getStateAt(Ret);
@@ -362,7 +362,7 @@ class PacRetAnalysis
     const State &S = *MaybeState;
     // Due to aliasing registers, multiple registers may have been tracked.
     std::set<const MCInst *> LastWritingInsts;
-    for (MCPhysReg TrackedReg : UsedDirtyRegs.set_bits()) {
+    for (MCPhysReg TrackedReg : UsedDirtyRegs) {
       for (const MCInst *Inst : lastWritingInsts(S, TrackedReg))
         LastWritingInsts.insert(Inst);
     }
@@ -376,57 +376,81 @@ class PacRetAnalysis
   }
 };
 
+static std::shared_ptr<Report> tryCheckReturn(const BinaryContext &BC,
+                                              const MCInstReference &Inst,
+                                              const State &S) {
+  static const GadgetKind RetKind("non-protected ret found");
+  if (!BC.MIB->isReturn(Inst))
+    return nullptr;
+
+  ErrorOr<MCPhysReg> MaybeRetReg = BC.MIB->getRegUsedAsRetDest(Inst);
+  if (MaybeRetReg.getError()) {
+    return std::make_shared<GenericReport>(
+        Inst, "Warning: pac-ret analysis could not analyze this return "
+              "instruction");
+  }
+  MCPhysReg RetReg = *MaybeRetReg;
+  LLVM_DEBUG({
+    traceInst(BC, "Found RET inst", Inst);
+    traceReg(BC, "RetReg", RetReg);
+    traceReg(BC, "Authenticated reg", BC.MIB->getAuthenticatedReg(Inst));
+  });
+  if (BC.MIB->isAuthenticationOfReg(Inst, RetReg))
+    return nullptr;
+  BitVector UsedDirtyRegs = S.NonAutClobRegs;
+  LLVM_DEBUG({ traceRegMask(BC, "NonAutClobRegs at Ret", UsedDirtyRegs); });
+  UsedDirtyRegs &= BC.MIB->getAliases(RetReg, /*OnlySmaller=*/true);
+  LLVM_DEBUG({ traceRegMask(BC, "Intersection with RetReg", UsedDirtyRegs); });
+  if (!UsedDirtyRegs.any())
+    return nullptr;
+
+  return std::make_shared<GadgetReport>(RetKind, Inst, UsedDirtyRegs);
+}
+
 FunctionAnalysisResult
-Analysis::computeDfState(PacRetAnalysis &PRA, BinaryFunction &BF,
+Analysis::computeDfState(BinaryFunction &BF,
                          MCPlusBuilder::AllocatorIdTy AllocatorId) {
+  FunctionAnalysisResult Result;
+
+  PacRetAnalysis PRA(BF, AllocatorId, {});
   PRA.run();
   LLVM_DEBUG({
     dbgs() << " After PacRetAnalysis:\n";
     BF.dump();
   });
 
-  FunctionAnalysisResult Result;
-  // Now scan the CFG for non-authenticating return instructions that use an
-  // overwritten, non-authenticated register as return address.
   BinaryContext &BC = BF.getBinaryContext();
   for (BinaryBasicBlock &BB : BF) {
-    for (int64_t I = BB.size() - 1; I >= 0; --I) {
-      MCInst &Inst = BB.getInstructionAtIndex(I);
-      if (BC.MIB->isReturn(Inst)) {
-        ErrorOr<MCPhysReg> MaybeRetReg = BC.MIB->getRegUsedAsRetDest(Inst);
-        if (MaybeRetReg.getError()) {
-          Result.Diagnostics.push_back(std::make_shared<GenericReport>(
-              MCInstInBBReference(&BB, I),
-              "Warning: pac-ret analysis could not analyze this return "
-              "instruction"));
-          continue;
-        }
-        MCPhysReg RetReg = *MaybeRetReg;
-        LLVM_DEBUG({
-          traceInst(BC, "Found RET inst", Inst);
-          traceReg(BC, "RetReg", RetReg);
-          traceReg(BC, "Authenticated reg", BC.MIB->getAuthenticatedReg(Inst));
-        });
-        if (BC.MIB->isAuthenticationOfReg(Inst, RetReg))
-          break;
-        BitVector UsedDirtyRegs = PRA.getStateAt(Inst)->NonAutClobRegs;
-        LLVM_DEBUG(
-            { traceRegMask(BC, "NonAutClobRegs at Ret", UsedDirtyRegs); });
-        UsedDirtyRegs &= BC.MIB->getAliases(RetReg, /*OnlySmaller=*/true);
-        LLVM_DEBUG(
-            { traceRegMask(BC, "Intersection with RetReg", UsedDirtyRegs); });
-        if (UsedDirtyRegs.any()) {
-          static const GadgetKind RetKind("non-protected ret found");
-          // This return instruction needs to be reported
-          Result.Diagnostics.push_back(std::make_shared<GadgetReport>(
-              RetKind, MCInstInBBReference(&BB, I),
-              PRA.getLastClobberingInsts(Inst, BF, UsedDirtyRegs)));
-          for (MCPhysReg RetRegWithGadget : UsedDirtyRegs.set_bits())
-            Result.RegistersAffected.insert(RetRegWithGadget);
-        }
-      }
+    for (int64_t I = 0, E = BB.size(); I < E; ++I) {
+      MCInstReference Inst(&BB, I);
+      const State &S = *PRA.getStateAt(Inst);
+
+      if (auto Report = tryCheckReturn(BC, Inst, S))
+        Result.Diagnostics.push_back(Report);
     }
   }
+
+  if (Result.Diagnostics.empty())
+    return Result;
+
+  // Redo the analysis, but now also track which instructions last wrote
+  // to any of the registers in RetRegsWithGadgets, so that better
+  // diagnostics can be produced.
+
+  SmallSet<MCPhysReg, 4> RegsToTrack;
+  for (auto Report : Result.Diagnostics)
+    for (MCPhysReg Reg : Report->getAffectedRegisters())
+      RegsToTrack.insert(Reg);
+
+  std::vector<MCPhysReg> RegsToTrackVec(RegsToTrack.begin(), RegsToTrack.end());
+
+  PacRetAnalysis PRWIA(BF, AllocatorId, RegsToTrackVec);
+  PRWIA.run();
+  for (auto Report : Result.Diagnostics) {
+    Report->setOverwritingInstrs(PRWIA.getLastClobberingInsts(
+        Report->Location, BF, Report->getAffectedRegisters()));
+  }
+
   return Result;
 }
 
@@ -438,27 +462,19 @@ void Analysis::runOnFunction(BinaryFunction &BF,
     BF.dump();
   });
 
-  if (BF.hasCFG()) {
-    PacRetAnalysis PRA(BF, AllocatorId, {});
-    FunctionAnalysisResult FAR = computeDfState(PRA, BF, AllocatorId);
-    if (!FAR.RegistersAffected.empty()) {
-      // Redo the analysis, but now also track which instructions last wrote
-      // to any of the registers in RetRegsWithGadgets, so that better
-      // diagnostics can be produced.
-      std::vector<MCPhysReg> RegsToTrack;
-      for (MCPhysReg R : FAR.RegistersAffected)
-        RegsToTrack.push_back(R);
-      PacRetAnalysis PRWIA(BF, AllocatorId, RegsToTrack);
-      FAR = computeDfState(PRWIA, BF, AllocatorId);
-    }
+  if (!BF.hasCFG())
+    return;
 
-    // `runOnFunction` is typically getting called from multiple threads in
-    // parallel. Therefore, use a lock to avoid data races when storing the
-    // result of the analysis in the `AnalysisResults` map.
-    {
-      std::lock_guard<std::mutex> Lock(AnalysisResultsMutex);
-      AnalysisResults[&BF] = FAR;
-    }
+  FunctionAnalysisResult FAR = computeDfState(BF, AllocatorId);
+  if (FAR.Diagnostics.empty())
+    return;
+
+  // `runOnFunction` is typically getting called from multiple threads in
+  // parallel. Therefore, use a lock to avoid data races when storing the
+  // result of the analysis in the `AnalysisResults` map.
+  {
+    std::lock_guard<std::mutex> Lock(AnalysisResultsMutex);
+    AnalysisResults[&BF] = FAR;
   }
 }
 

>From d9dbf0ac4155ce853436bf1f2aa12b4abb7bab5e Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko <atrosinenko at accesssoftek.com>
Date: Thu, 20 Mar 2025 15:01:34 +0300
Subject: [PATCH 2/2] Fix debug output

---
 bolt/lib/Passes/PAuthGadgetScanner.cpp                 |  7 +++++++
 .../binary-analysis/AArch64/gs-pauth-debug-output.s    | 10 +++-------
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index 9bffd59b01070..4f7be17327b49 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -446,7 +446,14 @@ Analysis::computeDfState(BinaryFunction &BF,
 
   PacRetAnalysis PRWIA(BF, AllocatorId, RegsToTrackVec);
   PRWIA.run();
+  LLVM_DEBUG({
+    dbgs() << " After detailed PacRetAnalysis:\n";
+    BF.dump();
+  });
+
   for (auto Report : Result.Diagnostics) {
+    LLVM_DEBUG(
+        { traceInst(BC, "Attaching clobbering info to", Report->Location); });
     Report->setOverwritingInstrs(PRWIA.getLastClobberingInsts(
         Report->Location, BF, Report->getAffectedRegisters()));
   }
diff --git a/bolt/test/binary-analysis/AArch64/gs-pauth-debug-output.s b/bolt/test/binary-analysis/AArch64/gs-pauth-debug-output.s
index a1546f685d28c..18e0fbfb850e7 100644
--- a/bolt/test/binary-analysis/AArch64/gs-pauth-debug-output.s
+++ b/bolt/test/binary-analysis/AArch64/gs-pauth-debug-output.s
@@ -118,20 +118,16 @@ clobber:
 // CHECK-NEXT:   .. result: (pacret-state<NonAutClobRegs: W30 , Insts: [0](0x{{[0-9a-f]+}} )>)
 // CHECK-NEXT:  PacRetAnalysis::ComputeNext(   ret     x30, pacret-state<NonAutClobRegs: W30 , Insts: [0](0x{{[0-9a-f]+}} )>)
 // CHECK-NEXT:   .. result: (pacret-state<NonAutClobRegs: W30 , Insts: [0](0x{{[0-9a-f]+}} )>)
-// CHECK-NEXT:  After PacRetAnalysis:
+// CHECK-NEXT:  After detailed PacRetAnalysis:
 // CHECK-NEXT: Binary Function "clobber"  {
 // ...
 // CHECK:      End of Function "clobber"
 
 // The analysis was re-computed with register tracking, as an issue was found in this function.
-// Re-checking the instructions:
+// Iterating over the reports and attaching clobbering info:
 
 // CHECK-EMPTY:
-// CHECK-NEXT:   Found RET inst:     00000000:         ret # PacRetAnalysis: pacret-state<NonAutClobRegs: BitVector, Insts: [0](0x{{[0-9a-f]+}} )>
-// CHECK-NEXT:     RetReg: LR
-// CHECK-NEXT:     Authenticated reg: (none)
-// CHECK-NEXT:     NonAutClobRegs at Ret: W30
-// CHECK-NEXT:     Intersection with RetReg: W30
+// CHECK-NEXT:   Attaching clobbering info to:     00000000:         ret # PacRetAnalysis: pacret-state<NonAutClobRegs: BitVector, Insts: [0](0x{{[0-9a-f]+}} )>
 
 
 // CHECK-LABEL:Analyzing in function main, AllocatorId 1



More information about the llvm-commits mailing list