[llvm] [BOLT] Gadget scanner: refactor analysis of RET instructions (PR #131897)
Anatoly Trosinenko via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 21 08:52:55 PDT 2025
https://github.com/atrosinenko updated https://github.com/llvm/llvm-project/pull/131897
>From d0b3760ee4bac6b3c17a0423468cea452c256358 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko <atrosinenko at accesssoftek.com>
Date: Mon, 17 Mar 2025 19:28:25 +0300
Subject: [PATCH 1/3] [BOLT] Gadget scanner: refactor analysis of RET
instructions
In preparation for implementing detection of more gadget kinds,
refactor checking for non-protected return instructions.
---
bolt/include/bolt/Passes/PAuthGadgetScanner.h | 23 ++-
bolt/lib/Passes/PAuthGadgetScanner.cpp | 138 ++++++++++--------
2 files changed, 95 insertions(+), 66 deletions(-)
diff --git a/bolt/include/bolt/Passes/PAuthGadgetScanner.h b/bolt/include/bolt/Passes/PAuthGadgetScanner.h
index 2d8109f8ca43b..f102f1080e2e8 100644
--- a/bolt/include/bolt/Passes/PAuthGadgetScanner.h
+++ b/bolt/include/bolt/Passes/PAuthGadgetScanner.h
@@ -199,19 +199,34 @@ struct Report {
virtual void generateReport(raw_ostream &OS,
const BinaryContext &BC) const = 0;
+ virtual const ArrayRef<MCPhysReg> getAffectedRegisters() const { return {}; }
+ virtual void
+ setOverwritingInstrs(const std::vector<MCInstReference> &Instrs) {}
+
void printBasicInfo(raw_ostream &OS, const BinaryContext &BC,
StringRef IssueKind) const;
};
struct GadgetReport : public Report {
const GadgetKind &Kind;
+ SmallVector<MCPhysReg> AffectedRegisters;
std::vector<MCInstReference> OverwritingInstrs;
GadgetReport(const GadgetKind &Kind, MCInstReference Location,
- std::vector<MCInstReference> OverwritingInstrs)
- : Report(Location), Kind(Kind), OverwritingInstrs(OverwritingInstrs) {}
+ const BitVector &AffectedRegisters)
+ : Report(Location), Kind(Kind),
+ AffectedRegisters(AffectedRegisters.set_bits()) {}
void generateReport(raw_ostream &OS, const BinaryContext &BC) const override;
+
+ const ArrayRef<MCPhysReg> getAffectedRegisters() const override {
+ return AffectedRegisters;
+ }
+
+ void
+ setOverwritingInstrs(const std::vector<MCInstReference> &Instrs) override {
+ OverwritingInstrs = Instrs;
+ }
};
/// Report with a free-form message attached.
@@ -224,7 +239,6 @@ struct GenericReport : public Report {
};
struct FunctionAnalysisResult {
- SmallSet<MCPhysReg, 1> RegistersAffected;
std::vector<std::shared_ptr<Report>> Diagnostics;
};
@@ -232,8 +246,7 @@ class Analysis : public BinaryFunctionPass {
void runOnFunction(BinaryFunction &Function,
MCPlusBuilder::AllocatorIdTy AllocatorId);
FunctionAnalysisResult
- computeDfState(PacRetAnalysis &PRA, BinaryFunction &BF,
- MCPlusBuilder::AllocatorIdTy AllocatorId);
+ computeDfState(BinaryFunction &BF, MCPlusBuilder::AllocatorIdTy AllocatorId);
std::map<const BinaryFunction *, FunctionAnalysisResult> AnalysisResults;
std::mutex AnalysisResultsMutex;
diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index 6f29399691cb8..9bffd59b01070 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -353,7 +353,7 @@ class PacRetAnalysis
public:
std::vector<MCInstReference>
getLastClobberingInsts(const MCInst Ret, BinaryFunction &BF,
- const BitVector &UsedDirtyRegs) const {
+ const ArrayRef<MCPhysReg> UsedDirtyRegs) const {
if (RegsToTrackInstsFor.empty())
return {};
auto MaybeState = getStateAt(Ret);
@@ -362,7 +362,7 @@ class PacRetAnalysis
const State &S = *MaybeState;
// Due to aliasing registers, multiple registers may have been tracked.
std::set<const MCInst *> LastWritingInsts;
- for (MCPhysReg TrackedReg : UsedDirtyRegs.set_bits()) {
+ for (MCPhysReg TrackedReg : UsedDirtyRegs) {
for (const MCInst *Inst : lastWritingInsts(S, TrackedReg))
LastWritingInsts.insert(Inst);
}
@@ -376,57 +376,81 @@ class PacRetAnalysis
}
};
+static std::shared_ptr<Report> tryCheckReturn(const BinaryContext &BC,
+ const MCInstReference &Inst,
+ const State &S) {
+ static const GadgetKind RetKind("non-protected ret found");
+ if (!BC.MIB->isReturn(Inst))
+ return nullptr;
+
+ ErrorOr<MCPhysReg> MaybeRetReg = BC.MIB->getRegUsedAsRetDest(Inst);
+ if (MaybeRetReg.getError()) {
+ return std::make_shared<GenericReport>(
+ Inst, "Warning: pac-ret analysis could not analyze this return "
+ "instruction");
+ }
+ MCPhysReg RetReg = *MaybeRetReg;
+ LLVM_DEBUG({
+ traceInst(BC, "Found RET inst", Inst);
+ traceReg(BC, "RetReg", RetReg);
+ traceReg(BC, "Authenticated reg", BC.MIB->getAuthenticatedReg(Inst));
+ });
+ if (BC.MIB->isAuthenticationOfReg(Inst, RetReg))
+ return nullptr;
+ BitVector UsedDirtyRegs = S.NonAutClobRegs;
+ LLVM_DEBUG({ traceRegMask(BC, "NonAutClobRegs at Ret", UsedDirtyRegs); });
+ UsedDirtyRegs &= BC.MIB->getAliases(RetReg, /*OnlySmaller=*/true);
+ LLVM_DEBUG({ traceRegMask(BC, "Intersection with RetReg", UsedDirtyRegs); });
+ if (!UsedDirtyRegs.any())
+ return nullptr;
+
+ return std::make_shared<GadgetReport>(RetKind, Inst, UsedDirtyRegs);
+}
+
FunctionAnalysisResult
-Analysis::computeDfState(PacRetAnalysis &PRA, BinaryFunction &BF,
+Analysis::computeDfState(BinaryFunction &BF,
MCPlusBuilder::AllocatorIdTy AllocatorId) {
+ FunctionAnalysisResult Result;
+
+ PacRetAnalysis PRA(BF, AllocatorId, {});
PRA.run();
LLVM_DEBUG({
dbgs() << " After PacRetAnalysis:\n";
BF.dump();
});
- FunctionAnalysisResult Result;
- // Now scan the CFG for non-authenticating return instructions that use an
- // overwritten, non-authenticated register as return address.
BinaryContext &BC = BF.getBinaryContext();
for (BinaryBasicBlock &BB : BF) {
- for (int64_t I = BB.size() - 1; I >= 0; --I) {
- MCInst &Inst = BB.getInstructionAtIndex(I);
- if (BC.MIB->isReturn(Inst)) {
- ErrorOr<MCPhysReg> MaybeRetReg = BC.MIB->getRegUsedAsRetDest(Inst);
- if (MaybeRetReg.getError()) {
- Result.Diagnostics.push_back(std::make_shared<GenericReport>(
- MCInstInBBReference(&BB, I),
- "Warning: pac-ret analysis could not analyze this return "
- "instruction"));
- continue;
- }
- MCPhysReg RetReg = *MaybeRetReg;
- LLVM_DEBUG({
- traceInst(BC, "Found RET inst", Inst);
- traceReg(BC, "RetReg", RetReg);
- traceReg(BC, "Authenticated reg", BC.MIB->getAuthenticatedReg(Inst));
- });
- if (BC.MIB->isAuthenticationOfReg(Inst, RetReg))
- break;
- BitVector UsedDirtyRegs = PRA.getStateAt(Inst)->NonAutClobRegs;
- LLVM_DEBUG(
- { traceRegMask(BC, "NonAutClobRegs at Ret", UsedDirtyRegs); });
- UsedDirtyRegs &= BC.MIB->getAliases(RetReg, /*OnlySmaller=*/true);
- LLVM_DEBUG(
- { traceRegMask(BC, "Intersection with RetReg", UsedDirtyRegs); });
- if (UsedDirtyRegs.any()) {
- static const GadgetKind RetKind("non-protected ret found");
- // This return instruction needs to be reported
- Result.Diagnostics.push_back(std::make_shared<GadgetReport>(
- RetKind, MCInstInBBReference(&BB, I),
- PRA.getLastClobberingInsts(Inst, BF, UsedDirtyRegs)));
- for (MCPhysReg RetRegWithGadget : UsedDirtyRegs.set_bits())
- Result.RegistersAffected.insert(RetRegWithGadget);
- }
- }
+ for (int64_t I = 0, E = BB.size(); I < E; ++I) {
+ MCInstReference Inst(&BB, I);
+ const State &S = *PRA.getStateAt(Inst);
+
+ if (auto Report = tryCheckReturn(BC, Inst, S))
+ Result.Diagnostics.push_back(Report);
}
}
+
+ if (Result.Diagnostics.empty())
+ return Result;
+
+ // Redo the analysis, but now also track which instructions last wrote
+ // to any of the registers in RetRegsWithGadgets, so that better
+ // diagnostics can be produced.
+
+ SmallSet<MCPhysReg, 4> RegsToTrack;
+ for (auto Report : Result.Diagnostics)
+ for (MCPhysReg Reg : Report->getAffectedRegisters())
+ RegsToTrack.insert(Reg);
+
+ std::vector<MCPhysReg> RegsToTrackVec(RegsToTrack.begin(), RegsToTrack.end());
+
+ PacRetAnalysis PRWIA(BF, AllocatorId, RegsToTrackVec);
+ PRWIA.run();
+ for (auto Report : Result.Diagnostics) {
+ Report->setOverwritingInstrs(PRWIA.getLastClobberingInsts(
+ Report->Location, BF, Report->getAffectedRegisters()));
+ }
+
return Result;
}
@@ -438,27 +462,19 @@ void Analysis::runOnFunction(BinaryFunction &BF,
BF.dump();
});
- if (BF.hasCFG()) {
- PacRetAnalysis PRA(BF, AllocatorId, {});
- FunctionAnalysisResult FAR = computeDfState(PRA, BF, AllocatorId);
- if (!FAR.RegistersAffected.empty()) {
- // Redo the analysis, but now also track which instructions last wrote
- // to any of the registers in RetRegsWithGadgets, so that better
- // diagnostics can be produced.
- std::vector<MCPhysReg> RegsToTrack;
- for (MCPhysReg R : FAR.RegistersAffected)
- RegsToTrack.push_back(R);
- PacRetAnalysis PRWIA(BF, AllocatorId, RegsToTrack);
- FAR = computeDfState(PRWIA, BF, AllocatorId);
- }
+ if (!BF.hasCFG())
+ return;
- // `runOnFunction` is typically getting called from multiple threads in
- // parallel. Therefore, use a lock to avoid data races when storing the
- // result of the analysis in the `AnalysisResults` map.
- {
- std::lock_guard<std::mutex> Lock(AnalysisResultsMutex);
- AnalysisResults[&BF] = FAR;
- }
+ FunctionAnalysisResult FAR = computeDfState(BF, AllocatorId);
+ if (FAR.Diagnostics.empty())
+ return;
+
+ // `runOnFunction` is typically getting called from multiple threads in
+ // parallel. Therefore, use a lock to avoid data races when storing the
+ // result of the analysis in the `AnalysisResults` map.
+ {
+ std::lock_guard<std::mutex> Lock(AnalysisResultsMutex);
+ AnalysisResults[&BF] = FAR;
}
}
>From d9dbf0ac4155ce853436bf1f2aa12b4abb7bab5e Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko <atrosinenko at accesssoftek.com>
Date: Thu, 20 Mar 2025 15:01:34 +0300
Subject: [PATCH 2/3] Fix debug output
---
bolt/lib/Passes/PAuthGadgetScanner.cpp | 7 +++++++
.../binary-analysis/AArch64/gs-pauth-debug-output.s | 10 +++-------
2 files changed, 10 insertions(+), 7 deletions(-)
diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index 9bffd59b01070..4f7be17327b49 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -446,7 +446,14 @@ Analysis::computeDfState(BinaryFunction &BF,
PacRetAnalysis PRWIA(BF, AllocatorId, RegsToTrackVec);
PRWIA.run();
+ LLVM_DEBUG({
+ dbgs() << " After detailed PacRetAnalysis:\n";
+ BF.dump();
+ });
+
for (auto Report : Result.Diagnostics) {
+ LLVM_DEBUG(
+ { traceInst(BC, "Attaching clobbering info to", Report->Location); });
Report->setOverwritingInstrs(PRWIA.getLastClobberingInsts(
Report->Location, BF, Report->getAffectedRegisters()));
}
diff --git a/bolt/test/binary-analysis/AArch64/gs-pauth-debug-output.s b/bolt/test/binary-analysis/AArch64/gs-pauth-debug-output.s
index a1546f685d28c..18e0fbfb850e7 100644
--- a/bolt/test/binary-analysis/AArch64/gs-pauth-debug-output.s
+++ b/bolt/test/binary-analysis/AArch64/gs-pauth-debug-output.s
@@ -118,20 +118,16 @@ clobber:
// CHECK-NEXT: .. result: (pacret-state<NonAutClobRegs: W30 , Insts: [0](0x{{[0-9a-f]+}} )>)
// CHECK-NEXT: PacRetAnalysis::ComputeNext( ret x30, pacret-state<NonAutClobRegs: W30 , Insts: [0](0x{{[0-9a-f]+}} )>)
// CHECK-NEXT: .. result: (pacret-state<NonAutClobRegs: W30 , Insts: [0](0x{{[0-9a-f]+}} )>)
-// CHECK-NEXT: After PacRetAnalysis:
+// CHECK-NEXT: After detailed PacRetAnalysis:
// CHECK-NEXT: Binary Function "clobber" {
// ...
// CHECK: End of Function "clobber"
// The analysis was re-computed with register tracking, as an issue was found in this function.
-// Re-checking the instructions:
+// Iterating over the reports and attaching clobbering info:
// CHECK-EMPTY:
-// CHECK-NEXT: Found RET inst: 00000000: ret # PacRetAnalysis: pacret-state<NonAutClobRegs: BitVector, Insts: [0](0x{{[0-9a-f]+}} )>
-// CHECK-NEXT: RetReg: LR
-// CHECK-NEXT: Authenticated reg: (none)
-// CHECK-NEXT: NonAutClobRegs at Ret: W30
-// CHECK-NEXT: Intersection with RetReg: W30
+// CHECK-NEXT: Attaching clobbering info to: 00000000: ret # PacRetAnalysis: pacret-state<NonAutClobRegs: BitVector, Insts: [0](0x{{[0-9a-f]+}} )>
// CHECK-LABEL:Analyzing in function main, AllocatorId 1
>From f8b47c592f3d46d892f70430f2689d4941e75b4d Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko <atrosinenko at accesssoftek.com>
Date: Fri, 21 Mar 2025 18:19:03 +0300
Subject: [PATCH 3/3] Address review comments
---
bolt/include/bolt/Passes/PAuthGadgetScanner.h | 26 +++++++----
bolt/lib/Passes/PAuthGadgetScanner.cpp | 43 +++++++++++--------
2 files changed, 42 insertions(+), 27 deletions(-)
diff --git a/bolt/include/bolt/Passes/PAuthGadgetScanner.h b/bolt/include/bolt/Passes/PAuthGadgetScanner.h
index f102f1080e2e8..1a8abffa09c46 100644
--- a/bolt/include/bolt/Passes/PAuthGadgetScanner.h
+++ b/bolt/include/bolt/Passes/PAuthGadgetScanner.h
@@ -199,18 +199,25 @@ struct Report {
virtual void generateReport(raw_ostream &OS,
const BinaryContext &BC) const = 0;
+ // The two methods below are called by Analysis::computeDetailedInfo when
+ // iterating over the reports.
virtual const ArrayRef<MCPhysReg> getAffectedRegisters() const { return {}; }
- virtual void
- setOverwritingInstrs(const std::vector<MCInstReference> &Instrs) {}
+ virtual void setOverwritingInstrs(const ArrayRef<MCInstReference> Instrs) {}
void printBasicInfo(raw_ostream &OS, const BinaryContext &BC,
StringRef IssueKind) const;
};
struct GadgetReport : public Report {
+ // The particular kind of gadget that is detected.
const GadgetKind &Kind;
+ // The set of registers related to this gadget report (possibly empty).
SmallVector<MCPhysReg> AffectedRegisters;
- std::vector<MCInstReference> OverwritingInstrs;
+ // The instructions that clobber the affected registers.
+ // There is no one-to-one correspondence with AffectedRegisters: for example,
+ // the same register can be overwritten by different instructions in different
+ // preceding basic blocks.
+ SmallVector<MCInstReference> OverwritingInstrs;
GadgetReport(const GadgetKind &Kind, MCInstReference Location,
const BitVector &AffectedRegisters)
@@ -223,9 +230,8 @@ struct GadgetReport : public Report {
return AffectedRegisters;
}
- void
- setOverwritingInstrs(const std::vector<MCInstReference> &Instrs) override {
- OverwritingInstrs = Instrs;
+ void setOverwritingInstrs(const ArrayRef<MCInstReference> Instrs) override {
+ OverwritingInstrs.assign(Instrs.begin(), Instrs.end());
}
};
@@ -245,8 +251,12 @@ struct FunctionAnalysisResult {
class Analysis : public BinaryFunctionPass {
void runOnFunction(BinaryFunction &Function,
MCPlusBuilder::AllocatorIdTy AllocatorId);
- FunctionAnalysisResult
- computeDfState(BinaryFunction &BF, MCPlusBuilder::AllocatorIdTy AllocatorId);
+ FunctionAnalysisResult findGadgets(BinaryFunction &BF,
+ MCPlusBuilder::AllocatorIdTy AllocatorId);
+
+ void computeDetailedInfo(BinaryFunction &BF,
+ MCPlusBuilder::AllocatorIdTy AllocatorId,
+ FunctionAnalysisResult &Result);
std::map<const BinaryFunction *, FunctionAnalysisResult> AnalysisResults;
std::mutex AnalysisResultsMutex;
diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index 4f7be17327b49..84c209ac838f8 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -376,9 +376,9 @@ class PacRetAnalysis
}
};
-static std::shared_ptr<Report> tryCheckReturn(const BinaryContext &BC,
- const MCInstReference &Inst,
- const State &S) {
+static std::shared_ptr<Report>
+shouldReportReturnGadget(const BinaryContext &BC, const MCInstReference &Inst,
+ const State &S) {
static const GadgetKind RetKind("non-protected ret found");
if (!BC.MIB->isReturn(Inst))
return nullptr;
@@ -408,8 +408,8 @@ static std::shared_ptr<Report> tryCheckReturn(const BinaryContext &BC,
}
FunctionAnalysisResult
-Analysis::computeDfState(BinaryFunction &BF,
- MCPlusBuilder::AllocatorIdTy AllocatorId) {
+Analysis::findGadgets(BinaryFunction &BF,
+ MCPlusBuilder::AllocatorIdTy AllocatorId) {
FunctionAnalysisResult Result;
PacRetAnalysis PRA(BF, AllocatorId, {});
@@ -425,25 +425,25 @@ Analysis::computeDfState(BinaryFunction &BF,
MCInstReference Inst(&BB, I);
const State &S = *PRA.getStateAt(Inst);
- if (auto Report = tryCheckReturn(BC, Inst, S))
+ if (auto Report = shouldReportReturnGadget(BC, Inst, S))
Result.Diagnostics.push_back(Report);
}
}
+ return Result;
+}
- if (Result.Diagnostics.empty())
- return Result;
-
- // Redo the analysis, but now also track which instructions last wrote
- // to any of the registers in RetRegsWithGadgets, so that better
- // diagnostics can be produced.
+void Analysis::computeDetailedInfo(BinaryFunction &BF,
+ MCPlusBuilder::AllocatorIdTy AllocatorId,
+ FunctionAnalysisResult &Result) {
+ BinaryContext &BC = BF.getBinaryContext();
+ // Collect the affected registers across all gadgets found in this function.
SmallSet<MCPhysReg, 4> RegsToTrack;
for (auto Report : Result.Diagnostics)
- for (MCPhysReg Reg : Report->getAffectedRegisters())
- RegsToTrack.insert(Reg);
-
+ RegsToTrack.insert_range(Report->getAffectedRegisters());
std::vector<MCPhysReg> RegsToTrackVec(RegsToTrack.begin(), RegsToTrack.end());
+ // Re-compute the analysis with register tracking.
PacRetAnalysis PRWIA(BF, AllocatorId, RegsToTrackVec);
PRWIA.run();
LLVM_DEBUG({
@@ -451,14 +451,13 @@ Analysis::computeDfState(BinaryFunction &BF,
BF.dump();
});
+ // Augment gadget reports.
for (auto Report : Result.Diagnostics) {
LLVM_DEBUG(
{ traceInst(BC, "Attaching clobbering info to", Report->Location); });
Report->setOverwritingInstrs(PRWIA.getLastClobberingInsts(
Report->Location, BF, Report->getAffectedRegisters()));
}
-
- return Result;
}
void Analysis::runOnFunction(BinaryFunction &BF,
@@ -472,10 +471,16 @@ void Analysis::runOnFunction(BinaryFunction &BF,
if (!BF.hasCFG())
return;
- FunctionAnalysisResult FAR = computeDfState(BF, AllocatorId);
+ FunctionAnalysisResult FAR = findGadgets(BF, AllocatorId);
if (FAR.Diagnostics.empty())
return;
+ // Redo the analysis, but now also track which instructions last wrote
+ // to any of the registers in RetRegsWithGadgets, so that better
+ // diagnostics can be produced.
+
+ computeDetailedInfo(BF, AllocatorId, FAR);
+
// `runOnFunction` is typically getting called from multiple threads in
// parallel. Therefore, use a lock to avoid data races when storing the
// result of the analysis in the `AnalysisResults` map.
@@ -540,7 +545,7 @@ void GadgetReport::generateReport(raw_ostream &OS,
<< " instructions that write to the affected registers after any "
"authentication are:\n";
// Sort by address to ensure output is deterministic.
- std::vector<MCInstReference> OI = OverwritingInstrs;
+ SmallVector<MCInstReference> OI = OverwritingInstrs;
llvm::sort(OI, [](const MCInstReference &A, const MCInstReference &B) {
return A.getAddress() < B.getAddress();
});
More information about the llvm-commits
mailing list