[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: clarify MCPlusBuilder callbacks interface (PR #136147)
Anatoly Trosinenko via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Apr 18 09:35:00 PDT 2025
https://github.com/atrosinenko updated https://github.com/llvm/llvm-project/pull/136147
>From 27ce9e02da3aa11a9e699f9caca6e1fea0fff15d Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko <atrosinenko at accesssoftek.com>
Date: Thu, 17 Apr 2025 15:40:05 +0300
Subject: [PATCH] [BOLT] Gadget scanner: clarify MCPlusBuilder callbacks
interface
Clarify the semantics of `getAuthenticatedReg` and remove a redundant
`isAuthenticationOfReg` method, as combined auth+something instructions
(such as `retaa` on AArch64) should be handled carefully, especially
when searching for authentication oracles: usually, such instructions
cannot be authentication oracles and only some of them actually write
an authenticated pointer to a register (such as "ldra x0, [x1]!").
Use `std::optional<MCPhysReg>` returned type instead of plain MCPhysReg
and returning `getNoRegister()` as a "not applicable" indication.
Document a few existing methods, add information about preconditions.
---
bolt/include/bolt/Core/MCPlusBuilder.h | 61 ++++++++++-----
bolt/lib/Passes/PAuthGadgetScanner.cpp | 64 +++++++++-------
.../Target/AArch64/AArch64MCPlusBuilder.cpp | 76 ++++++++-----------
.../AArch64/gs-pauth-debug-output.s | 3 -
.../AArch64/gs-pauth-signing-oracles.s | 20 +++++
5 files changed, 130 insertions(+), 94 deletions(-)
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
index 132d58f3f9f79..83ad70ea97076 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -562,30 +562,50 @@ class MCPlusBuilder {
return {};
}
- virtual ErrorOr<MCPhysReg> getAuthenticatedReg(const MCInst &Inst) const {
- llvm_unreachable("not implemented");
- return getNoRegister();
- }
-
- virtual bool isAuthenticationOfReg(const MCInst &Inst,
- MCPhysReg AuthenticatedReg) const {
+ /// Returns the register where an authenticated pointer is written to by Inst,
+ /// or std::nullopt if not authenticating any register.
+ ///
+ /// Sets IsChecked if the instruction always checks authenticated pointer,
+ /// i.e. it either returns a successfully authenticated pointer or terminates
+ /// the program abnormally (such as "ldra x0, [x1]!" on AArch64, which crashes
+ /// on authentication failure even if FEAT_FPAC is not implemented).
+ virtual std::optional<MCPhysReg>
+ getWrittenAuthenticatedReg(const MCInst &Inst, bool &IsChecked) const {
llvm_unreachable("not implemented");
- return false;
+ return std::nullopt;
}
- virtual MCPhysReg getSignedReg(const MCInst &Inst) const {
+ /// Returns the register signed by Inst, or std::nullopt if not signing any
+ /// register.
+ ///
+ /// The returned register is assumed to be both input and output operand,
+ /// as it is done on AArch64.
+ virtual std::optional<MCPhysReg> getSignedReg(const MCInst &Inst) const {
llvm_unreachable("not implemented");
- return getNoRegister();
+ return std::nullopt;
}
- virtual ErrorOr<MCPhysReg> getRegUsedAsRetDest(const MCInst &Inst) const {
+ /// Returns the register used as a return address. Returns std::nullopt if
+ /// not applicable, such as reading the return address from a system register
+ /// or from the stack.
+ ///
+ /// Sets IsAuthenticatedInternally if the instruction accepts a signed
+ /// pointer as its operand and authenticates it internally.
+ ///
+ /// Should only be called when isReturn(Inst) is true.
+ virtual std::optional<MCPhysReg>
+ getRegUsedAsRetDest(const MCInst &Inst,
+ bool &IsAuthenticatedInternally) const {
llvm_unreachable("not implemented");
- return getNoRegister();
+ return std::nullopt;
}
/// Returns the register used as the destination of an indirect branch or call
/// instruction. Sets IsAuthenticatedInternally if the instruction accepts
/// a signed pointer as its operand and authenticates it internally.
+ ///
+ /// Should only be called if isIndirectCall(Inst) or isIndirectBranch(Inst)
+ /// returns true.
virtual MCPhysReg
getRegUsedAsIndirectBranchDest(const MCInst &Inst,
bool &IsAuthenticatedInternally) const {
@@ -602,14 +622,14 @@ class MCPlusBuilder {
/// controlled, under the Pointer Authentication threat model.
///
/// If the instruction does not write to any register satisfying the above
- /// two conditions, NoRegister is returned.
+ /// two conditions, std::nullopt is returned.
///
/// The Pointer Authentication threat model assumes an attacker is able to
/// modify any writable memory, but not executable code (due to W^X).
- virtual MCPhysReg
+ virtual std::optional<MCPhysReg>
getMaterializedAddressRegForPtrAuth(const MCInst &Inst) const {
llvm_unreachable("not implemented");
- return getNoRegister();
+ return std::nullopt;
}
/// Analyzes if this instruction can safely perform address arithmetics
@@ -622,10 +642,13 @@ class MCPlusBuilder {
/// controlled, provided InReg and executable code are not. Please note that
/// registers other than InReg as well as the contents of memory which is
/// writable by the process should be considered attacker-controlled.
+ ///
+ /// The instruction should not write any values derived from InReg anywhere,
+ /// except for OutReg.
virtual std::optional<std::pair<MCPhysReg, MCPhysReg>>
analyzeAddressArithmeticsForPtrAuth(const MCInst &Inst) const {
llvm_unreachable("not implemented");
- return std::make_pair(getNoRegister(), getNoRegister());
+ return std::nullopt;
}
/// Analyzes if a pointer is checked to be authenticated successfully
@@ -670,10 +693,10 @@ class MCPlusBuilder {
///
/// Use this function for simple, single-instruction patterns instead of
/// its getAuthCheckedReg(BB) counterpart.
- virtual MCPhysReg getAuthCheckedReg(const MCInst &Inst,
- bool MayOverwrite) const {
+ virtual std::optional<MCPhysReg> getAuthCheckedReg(const MCInst &Inst,
+ bool MayOverwrite) const {
llvm_unreachable("not implemented");
- return getNoRegister();
+ return std::nullopt;
}
virtual bool isTerminator(const MCInst &Inst) const;
diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index b3081f034e8ee..2e4d8ff933a65 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -367,17 +367,15 @@ class SrcSafetyAnalysis {
SmallVector<MCPhysReg> getRegsMadeSafeToDeref(const MCInst &Point,
const SrcState &Cur) const {
SmallVector<MCPhysReg> Regs;
- const MCPhysReg NoReg = BC.MIB->getNoRegister();
// A signed pointer can be authenticated, or
- ErrorOr<MCPhysReg> AutReg = BC.MIB->getAuthenticatedReg(Point);
- if (AutReg && *AutReg != NoReg)
+ bool Dummy = false;
+ if (auto AutReg = BC.MIB->getWrittenAuthenticatedReg(Point, Dummy))
Regs.push_back(*AutReg);
// ... a safe address can be materialized, or
- MCPhysReg NewAddrReg = BC.MIB->getMaterializedAddressRegForPtrAuth(Point);
- if (NewAddrReg != NoReg)
- Regs.push_back(NewAddrReg);
+ if (auto NewAddrReg = BC.MIB->getMaterializedAddressRegForPtrAuth(Point))
+ Regs.push_back(*NewAddrReg);
// ... an address can be updated in a safe manner, producing the result
// which is as trusted as the input address.
@@ -393,13 +391,20 @@ class SrcSafetyAnalysis {
SmallVector<MCPhysReg> getRegsMadeTrusted(const MCInst &Point,
const SrcState &Cur) const {
SmallVector<MCPhysReg> Regs;
- const MCPhysReg NoReg = BC.MIB->getNoRegister();
// An authenticated pointer can be checked, or
- MCPhysReg CheckedReg =
+ std::optional<MCPhysReg> CheckedReg =
BC.MIB->getAuthCheckedReg(Point, /*MayOverwrite=*/false);
- if (CheckedReg != NoReg && Cur.SafeToDerefRegs[CheckedReg])
- Regs.push_back(CheckedReg);
+ if (CheckedReg && Cur.SafeToDerefRegs[*CheckedReg])
+ Regs.push_back(*CheckedReg);
+
+ // ... a pointer can be authenticated by an instruction that always checks
+ // the pointer, or
+ bool IsChecked = false;
+ std::optional<MCPhysReg> AutReg =
+ BC.MIB->getWrittenAuthenticatedReg(Point, IsChecked);
+ if (AutReg && IsChecked)
+ Regs.push_back(*AutReg);
if (CheckerSequenceInfo.contains(&Point)) {
MCPhysReg CheckedReg;
@@ -414,9 +419,8 @@ class SrcSafetyAnalysis {
}
// ... a safe address can be materialized, or
- MCPhysReg NewAddrReg = BC.MIB->getMaterializedAddressRegForPtrAuth(Point);
- if (NewAddrReg != NoReg)
- Regs.push_back(NewAddrReg);
+ if (auto NewAddrReg = BC.MIB->getMaterializedAddressRegForPtrAuth(Point))
+ Regs.push_back(*NewAddrReg);
// ... an address can be updated in a safe manner, producing the result
// which is as trusted as the input address.
@@ -733,25 +737,28 @@ shouldReportReturnGadget(const BinaryContext &BC, const MCInstReference &Inst,
if (!BC.MIB->isReturn(Inst))
return std::nullopt;
- ErrorOr<MCPhysReg> MaybeRetReg = BC.MIB->getRegUsedAsRetDest(Inst);
- if (MaybeRetReg.getError()) {
+ bool IsAuthenticated = false;
+ std::optional<MCPhysReg> RetReg =
+ BC.MIB->getRegUsedAsRetDest(Inst, IsAuthenticated);
+ if (!RetReg) {
return make_generic_report(
Inst, "Warning: pac-ret analysis could not analyze this return "
"instruction");
}
- MCPhysReg RetReg = *MaybeRetReg;
+ if (IsAuthenticated)
+ return std::nullopt;
+
+ assert(*RetReg != BC.MIB->getNoRegister());
LLVM_DEBUG({
traceInst(BC, "Found RET inst", Inst);
- traceReg(BC, "RetReg", RetReg);
- traceReg(BC, "Authenticated reg", BC.MIB->getAuthenticatedReg(Inst));
+ traceReg(BC, "RetReg", *RetReg);
+ traceRegMask(BC, "SafeToDerefRegs", S.SafeToDerefRegs);
});
- if (BC.MIB->isAuthenticationOfReg(Inst, RetReg))
- return std::nullopt;
- LLVM_DEBUG({ traceRegMask(BC, "SafeToDerefRegs", S.SafeToDerefRegs); });
- if (S.SafeToDerefRegs[RetReg])
+
+ if (S.SafeToDerefRegs[*RetReg])
return std::nullopt;
- return make_report(RetKind, Inst, RetReg);
+ return make_report(RetKind, Inst, *RetReg);
}
static std::optional<BriefReport<MCPhysReg>>
@@ -784,19 +791,20 @@ shouldReportSigningOracle(const BinaryContext &BC, const MCInstReference &Inst,
const SrcState &S) {
static const GadgetKind SigningOracleKind("signing oracle found");
- MCPhysReg SignedReg = BC.MIB->getSignedReg(Inst);
- if (SignedReg == BC.MIB->getNoRegister())
+ std::optional<MCPhysReg> SignedReg = BC.MIB->getSignedReg(Inst);
+ if (!SignedReg)
return std::nullopt;
+ assert(*SignedReg != BC.MIB->getNoRegister());
LLVM_DEBUG({
traceInst(BC, "Found sign inst", Inst);
- traceReg(BC, "Signed reg", SignedReg);
+ traceReg(BC, "Signed reg", *SignedReg);
traceRegMask(BC, "TrustedRegs", S.TrustedRegs);
});
- if (S.TrustedRegs[SignedReg])
+ if (S.TrustedRegs[*SignedReg])
return std::nullopt;
- return make_report(SigningOracleKind, Inst, SignedReg);
+ return make_report(SigningOracleKind, Inst, *SignedReg);
}
template <typename T> static void iterateOverInstrs(BinaryFunction &BF, T Fn) {
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index 6d01c97b8d8dd..5363d0cfdc5e7 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -196,7 +196,10 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
return {AArch64::LR};
}
- ErrorOr<MCPhysReg> getAuthenticatedReg(const MCInst &Inst) const override {
+ std::optional<MCPhysReg>
+ getWrittenAuthenticatedReg(const MCInst &Inst,
+ bool &IsChecked) const override {
+ IsChecked = false;
switch (Inst.getOpcode()) {
case AArch64::AUTIAZ:
case AArch64::AUTIBZ:
@@ -206,33 +209,12 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
case AArch64::AUTIBSPPCi:
case AArch64::AUTIASPPCr:
case AArch64::AUTIBSPPCr:
- case AArch64::RETAA:
- case AArch64::RETAB:
- case AArch64::RETAASPPCi:
- case AArch64::RETABSPPCi:
- case AArch64::RETAASPPCr:
- case AArch64::RETABSPPCr:
return AArch64::LR;
-
case AArch64::AUTIA1716:
case AArch64::AUTIB1716:
case AArch64::AUTIA171615:
case AArch64::AUTIB171615:
return AArch64::X17;
-
- case AArch64::ERETAA:
- case AArch64::ERETAB:
- // The ERETA{A,B} instructions use either register ELR_EL1, ELR_EL2 or
- // ELR_EL3, depending on the current Exception Level at run-time.
- //
- // Furthermore, these registers are not modelled by LLVM as a regular
- // MCPhysReg.... So there is no way to indicate that through the current
- // API.
- //
- // Therefore, return an error to indicate that LLVM/BOLT cannot model
- // this.
- return make_error_code(std::errc::result_out_of_range);
-
case AArch64::AUTIA:
case AArch64::AUTIB:
case AArch64::AUTDA:
@@ -242,22 +224,18 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
case AArch64::AUTDZA:
case AArch64::AUTDZB:
return Inst.getOperand(0).getReg();
-
- // FIXME: BL?RA(A|B)Z? and LDRA(A|B) should probably be handled here too.
-
+ case AArch64::LDRAAwriteback:
+ case AArch64::LDRABwriteback:
+ // Note that LDRA(A|B)indexed are not listed here, as they do not write
+ // an authenticated pointer back to the register.
+ IsChecked = true;
+ return Inst.getOperand(2).getReg();
default:
- return getNoRegister();
+ return std::nullopt;
}
}
- bool isAuthenticationOfReg(const MCInst &Inst, MCPhysReg Reg) const override {
- if (Reg == getNoRegister())
- return false;
- ErrorOr<MCPhysReg> AuthenticatedReg = getAuthenticatedReg(Inst);
- return AuthenticatedReg.getError() ? false : *AuthenticatedReg == Reg;
- }
-
- MCPhysReg getSignedReg(const MCInst &Inst) const override {
+ std::optional<MCPhysReg> getSignedReg(const MCInst &Inst) const override {
switch (Inst.getOpcode()) {
case AArch64::PACIA:
case AArch64::PACIB:
@@ -283,26 +261,36 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
case AArch64::PACIB171615:
return AArch64::X17;
default:
- return getNoRegister();
+ return std::nullopt;
}
}
- ErrorOr<MCPhysReg> getRegUsedAsRetDest(const MCInst &Inst) const override {
+ std::optional<MCPhysReg>
+ getRegUsedAsRetDest(const MCInst &Inst,
+ bool &IsAuthenticatedInternally) const override {
assert(isReturn(Inst));
switch (Inst.getOpcode()) {
case AArch64::RET:
+ IsAuthenticatedInternally = false;
return Inst.getOperand(0).getReg();
+
case AArch64::RETAA:
case AArch64::RETAB:
case AArch64::RETAASPPCi:
case AArch64::RETABSPPCi:
case AArch64::RETAASPPCr:
case AArch64::RETABSPPCr:
+ IsAuthenticatedInternally = true;
return AArch64::LR;
case AArch64::ERET:
case AArch64::ERETAA:
case AArch64::ERETAB:
- return make_error_code(std::errc::result_out_of_range);
+ // The ERET* instructions use either register ELR_EL1, ELR_EL2 or
+ // ELR_EL3, depending on the current Exception Level at run-time.
+ //
+ // Furthermore, these registers are not modelled by LLVM as a regular
+ // MCPhysReg, so there is no way to indicate that through the current API.
+ return std::nullopt;
default:
llvm_unreachable("Unhandled return instruction");
}
@@ -332,7 +320,7 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
}
}
- MCPhysReg
+ std::optional<MCPhysReg>
getMaterializedAddressRegForPtrAuth(const MCInst &Inst) const override {
switch (Inst.getOpcode()) {
case AArch64::ADR:
@@ -343,7 +331,7 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
// this instruction), so the produced value is not attacker-controlled.
return Inst.getOperand(0).getReg();
default:
- return getNoRegister();
+ return std::nullopt;
}
}
@@ -483,8 +471,8 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
}
}
- MCPhysReg getAuthCheckedReg(const MCInst &Inst,
- bool MayOverwrite) const override {
+ std::optional<MCPhysReg> getAuthCheckedReg(const MCInst &Inst,
+ bool MayOverwrite) const override {
// Cannot trivially reuse AArch64InstrInfo::getMemOperandWithOffsetWidth()
// method as it accepts an instance of MachineInstr, not MCInst.
const MCInstrDesc &Desc = Info->get(Inst.getOpcode());
@@ -532,10 +520,10 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
// the resulting address arbitrarily.
for (unsigned I = BaseRegIndex + 1, E = Desc.getNumOperands(); I < E; ++I)
if (Inst.getOperand(I).isReg())
- return getNoRegister();
+ return std::nullopt;
if (!MayOverwrite && ClobbersBaseRegExceptWriteback(BaseRegIndex))
- return getNoRegister();
+ return std::nullopt;
return Inst.getOperand(BaseRegIndex).getReg();
}
@@ -543,7 +531,7 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
// Store instructions are not handled yet, as they are not important for
// pauthtest ABI. Though, they could be handled similar to loads, if needed.
- return getNoRegister();
+ return std::nullopt;
}
bool isADRP(const MCInst &Inst) const override {
diff --git a/bolt/test/binary-analysis/AArch64/gs-pauth-debug-output.s b/bolt/test/binary-analysis/AArch64/gs-pauth-debug-output.s
index 078a8aca72d9c..82494d834a15c 100644
--- a/bolt/test/binary-analysis/AArch64/gs-pauth-debug-output.s
+++ b/bolt/test/binary-analysis/AArch64/gs-pauth-debug-output.s
@@ -119,7 +119,6 @@ simple:
// PAUTH-NEXT: SafeToDerefRegs: W0 X0 W0_HI{{[ \t]*$}}
// CHECK-NEXT: Found RET inst: 00000000: ret # DataflowSrcSafetyAnalysis: src-state<SafeToDerefRegs: BitVector, TrustedRegs: BitVector, Insts: >
// CHECK-NEXT: RetReg: LR
-// CHECK-NEXT: Authenticated reg: (none)
// CHECK-NEXT: SafeToDerefRegs: LR W30 W30_HI{{[ \t]*$}}
.globl clobber
@@ -146,7 +145,6 @@ clobber:
// CHECK-EMPTY:
// CHECK-NEXT: Found RET inst: 00000000: ret # DataflowSrcSafetyAnalysis: src-state<SafeToDerefRegs: BitVector, TrustedRegs: BitVector, Insts: >
// CHECK-NEXT: RetReg: LR
-// CHECK-NEXT: Authenticated reg: (none)
// CHECK-NEXT: SafeToDerefRegs: W30_HI{{[ \t]*$}}
// CHECK-EMPTY:
// CHECK-NEXT: Running detailed src register safety analysis...
@@ -225,7 +223,6 @@ nocfg:
// PAUTH-NEXT: SafeToDerefRegs: LR W0 W30 X0 W0_HI W30_HI
// CHECK-NEXT: Found RET inst: 00000000: ret # Offset: 8 # CFGUnawareSrcSafetyAnalysis: src-state<SafeToDerefRegs: BitVector, TrustedRegs: BitVector, Insts: >
// CHECK-NEXT: RetReg: LR
-// CHECK-NEXT: Authenticated reg: (none)
// CHECK-NEXT: SafeToDerefRegs:
// CHECK-EMPTY:
// CHECK-NEXT: Running detailed src register safety analysis...
diff --git a/bolt/test/binary-analysis/AArch64/gs-pauth-signing-oracles.s b/bolt/test/binary-analysis/AArch64/gs-pauth-signing-oracles.s
index 10fcf08cf158c..9b10879094a03 100644
--- a/bolt/test/binary-analysis/AArch64/gs-pauth-signing-oracles.s
+++ b/bolt/test/binary-analysis/AArch64/gs-pauth-signing-oracles.s
@@ -985,6 +985,26 @@ inst_pacnbibsppc:
ret
.size inst_pacnbibsppc, .-inst_pacnbibsppc
+// Test that write-back forms of LDRA(A|B) instructions are handled properly.
+
+ .globl inst_ldraa_wb
+ .type inst_ldraa_wb, at function
+inst_ldraa_wb:
+// CHECK-NOT: inst_ldraa_wb
+ ldraa x2, [x0]!
+ pacda x0, x1
+ ret
+ .size inst_ldraa_wb, .-inst_ldraa_wb
+
+ .globl inst_ldrab_wb
+ .type inst_ldrab_wb, at function
+inst_ldrab_wb:
+// CHECK-NOT: inst_ldrab_wb
+ ldraa x2, [x0]!
+ pacda x0, x1
+ ret
+ .size inst_ldrab_wb, .-inst_ldrab_wb
+
.globl main
.type main, at function
main:
More information about the llvm-branch-commits
mailing list