[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: clarify MCPlusBuilder callbacks interface (PR #136147)

Anatoly Trosinenko via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Fri Apr 18 11:43:45 PDT 2025


https://github.com/atrosinenko updated https://github.com/llvm/llvm-project/pull/136147

>From e86dd8f18b4f18bbc5876bc7e4640c34b04c877f Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko <atrosinenko at accesssoftek.com>
Date: Thu, 17 Apr 2025 15:40:05 +0300
Subject: [PATCH] [BOLT] Gadget scanner: clarify MCPlusBuilder callbacks
 interface

Clarify the semantics of `getAuthenticatedReg` and remove a redundant
`isAuthenticationOfReg` method, as combined auth+something instructions
(such as `retaa` on AArch64) should be handled carefully, especially
when searching for authentication oracles: usually, such instructions
cannot be authentication oracles and only some of them actually write
an authenticated pointer to a register (such as "ldra x0, [x1]!").

Use `std::optional<MCPhysReg>` returned type instead of plain MCPhysReg
and returning `getNoRegister()` as a "not applicable" indication.

Document a few existing methods, add information about preconditions.
---
 bolt/include/bolt/Core/MCPlusBuilder.h        | 61 ++++++++++-----
 bolt/lib/Passes/PAuthGadgetScanner.cpp        | 64 +++++++++-------
 .../Target/AArch64/AArch64MCPlusBuilder.cpp   | 76 ++++++++-----------
 .../AArch64/gs-pauth-debug-output.s           |  3 -
 .../AArch64/gs-pauth-signing-oracles.s        | 20 +++++
 5 files changed, 130 insertions(+), 94 deletions(-)

diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
index 132d58f3f9f79..83ad70ea97076 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -562,30 +562,50 @@ class MCPlusBuilder {
     return {};
   }
 
-  virtual ErrorOr<MCPhysReg> getAuthenticatedReg(const MCInst &Inst) const {
-    llvm_unreachable("not implemented");
-    return getNoRegister();
-  }
-
-  virtual bool isAuthenticationOfReg(const MCInst &Inst,
-                                     MCPhysReg AuthenticatedReg) const {
+  /// Returns the register where an authenticated pointer is written to by Inst,
+  /// or std::nullopt if not authenticating any register.
+  ///
+  /// Sets IsChecked if the instruction always checks authenticated pointer,
+  /// i.e. it either returns a successfully authenticated pointer or terminates
+  /// the program abnormally (such as "ldra x0, [x1]!" on AArch64, which crashes
+  /// on authentication failure even if FEAT_FPAC is not implemented).
+  virtual std::optional<MCPhysReg>
+  getWrittenAuthenticatedReg(const MCInst &Inst, bool &IsChecked) const {
     llvm_unreachable("not implemented");
-    return false;
+    return std::nullopt;
   }
 
-  virtual MCPhysReg getSignedReg(const MCInst &Inst) const {
+  /// Returns the register signed by Inst, or std::nullopt if not signing any
+  /// register.
+  ///
+  /// The returned register is assumed to be both input and output operand,
+  /// as it is done on AArch64.
+  virtual std::optional<MCPhysReg> getSignedReg(const MCInst &Inst) const {
     llvm_unreachable("not implemented");
-    return getNoRegister();
+    return std::nullopt;
   }
 
-  virtual ErrorOr<MCPhysReg> getRegUsedAsRetDest(const MCInst &Inst) const {
+  /// Returns the register used as a return address. Returns std::nullopt if
+  /// not applicable, such as reading the return address from a system register
+  /// or from the stack.
+  ///
+  /// Sets IsAuthenticatedInternally if the instruction accepts a signed
+  /// pointer as its operand and authenticates it internally.
+  ///
+  /// Should only be called when isReturn(Inst) is true.
+  virtual std::optional<MCPhysReg>
+  getRegUsedAsRetDest(const MCInst &Inst,
+                      bool &IsAuthenticatedInternally) const {
     llvm_unreachable("not implemented");
-    return getNoRegister();
+    return std::nullopt;
   }
 
   /// Returns the register used as the destination of an indirect branch or call
   /// instruction. Sets IsAuthenticatedInternally if the instruction accepts
   /// a signed pointer as its operand and authenticates it internally.
+  ///
+  /// Should only be called if isIndirectCall(Inst) or isIndirectBranch(Inst)
+  /// returns true.
   virtual MCPhysReg
   getRegUsedAsIndirectBranchDest(const MCInst &Inst,
                                  bool &IsAuthenticatedInternally) const {
@@ -602,14 +622,14 @@ class MCPlusBuilder {
   ///    controlled, under the Pointer Authentication threat model.
   ///
   /// If the instruction does not write to any register satisfying the above
-  /// two conditions, NoRegister is returned.
+  /// two conditions, std::nullopt is returned.
   ///
   /// The Pointer Authentication threat model assumes an attacker is able to
   /// modify any writable memory, but not executable code (due to W^X).
-  virtual MCPhysReg
+  virtual std::optional<MCPhysReg>
   getMaterializedAddressRegForPtrAuth(const MCInst &Inst) const {
     llvm_unreachable("not implemented");
-    return getNoRegister();
+    return std::nullopt;
   }
 
   /// Analyzes if this instruction can safely perform address arithmetics
@@ -622,10 +642,13 @@ class MCPlusBuilder {
   /// controlled, provided InReg and executable code are not. Please note that
   /// registers other than InReg as well as the contents of memory which is
   /// writable by the process should be considered attacker-controlled.
+  ///
+  /// The instruction should not write any values derived from InReg anywhere,
+  /// except for OutReg.
   virtual std::optional<std::pair<MCPhysReg, MCPhysReg>>
   analyzeAddressArithmeticsForPtrAuth(const MCInst &Inst) const {
     llvm_unreachable("not implemented");
-    return std::make_pair(getNoRegister(), getNoRegister());
+    return std::nullopt;
   }
 
   /// Analyzes if a pointer is checked to be authenticated successfully
@@ -670,10 +693,10 @@ class MCPlusBuilder {
   ///
   /// Use this function for simple, single-instruction patterns instead of
   /// its getAuthCheckedReg(BB) counterpart.
-  virtual MCPhysReg getAuthCheckedReg(const MCInst &Inst,
-                                      bool MayOverwrite) const {
+  virtual std::optional<MCPhysReg> getAuthCheckedReg(const MCInst &Inst,
+                                                     bool MayOverwrite) const {
     llvm_unreachable("not implemented");
-    return getNoRegister();
+    return std::nullopt;
   }
 
   virtual bool isTerminator(const MCInst &Inst) const;
diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index e8ef4b6e3f663..ee5e4f0ed2cd9 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -367,17 +367,15 @@ class SrcSafetyAnalysis {
   SmallVector<MCPhysReg> getRegsMadeSafeToDeref(const MCInst &Point,
                                                 const SrcState &Cur) const {
     SmallVector<MCPhysReg> Regs;
-    const MCPhysReg NoReg = BC.MIB->getNoRegister();
 
     // A signed pointer can be authenticated, or
-    ErrorOr<MCPhysReg> AutReg = BC.MIB->getAuthenticatedReg(Point);
-    if (AutReg && *AutReg != NoReg)
+    bool Dummy = false;
+    if (auto AutReg = BC.MIB->getWrittenAuthenticatedReg(Point, Dummy))
       Regs.push_back(*AutReg);
 
     // ... a safe address can be materialized, or
-    MCPhysReg NewAddrReg = BC.MIB->getMaterializedAddressRegForPtrAuth(Point);
-    if (NewAddrReg != NoReg)
-      Regs.push_back(NewAddrReg);
+    if (auto NewAddrReg = BC.MIB->getMaterializedAddressRegForPtrAuth(Point))
+      Regs.push_back(*NewAddrReg);
 
     // ... an address can be updated in a safe manner, producing the result
     // which is as trusted as the input address.
@@ -393,13 +391,20 @@ class SrcSafetyAnalysis {
   SmallVector<MCPhysReg> getRegsMadeTrusted(const MCInst &Point,
                                             const SrcState &Cur) const {
     SmallVector<MCPhysReg> Regs;
-    const MCPhysReg NoReg = BC.MIB->getNoRegister();
 
     // An authenticated pointer can be checked, or
-    MCPhysReg CheckedReg =
+    std::optional<MCPhysReg> CheckedReg =
         BC.MIB->getAuthCheckedReg(Point, /*MayOverwrite=*/false);
-    if (CheckedReg != NoReg && Cur.SafeToDerefRegs[CheckedReg])
-      Regs.push_back(CheckedReg);
+    if (CheckedReg && Cur.SafeToDerefRegs[*CheckedReg])
+      Regs.push_back(*CheckedReg);
+
+    // ... a pointer can be authenticated by an instruction that always checks
+    // the pointer, or
+    bool IsChecked = false;
+    std::optional<MCPhysReg> AutReg =
+        BC.MIB->getWrittenAuthenticatedReg(Point, IsChecked);
+    if (AutReg && IsChecked)
+      Regs.push_back(*AutReg);
 
     if (CheckerSequenceInfo.contains(&Point)) {
       MCPhysReg CheckedReg;
@@ -414,9 +419,8 @@ class SrcSafetyAnalysis {
     }
 
     // ... a safe address can be materialized, or
-    MCPhysReg NewAddrReg = BC.MIB->getMaterializedAddressRegForPtrAuth(Point);
-    if (NewAddrReg != NoReg)
-      Regs.push_back(NewAddrReg);
+    if (auto NewAddrReg = BC.MIB->getMaterializedAddressRegForPtrAuth(Point))
+      Regs.push_back(*NewAddrReg);
 
     // ... an address can be updated in a safe manner, producing the result
     // which is as trusted as the input address.
@@ -731,25 +735,28 @@ shouldReportReturnGadget(const BinaryContext &BC, const MCInstReference &Inst,
   if (!BC.MIB->isReturn(Inst))
     return std::nullopt;
 
-  ErrorOr<MCPhysReg> MaybeRetReg = BC.MIB->getRegUsedAsRetDest(Inst);
-  if (MaybeRetReg.getError()) {
+  bool IsAuthenticated = false;
+  std::optional<MCPhysReg> RetReg =
+      BC.MIB->getRegUsedAsRetDest(Inst, IsAuthenticated);
+  if (!RetReg) {
     return make_generic_report(
         Inst, "Warning: pac-ret analysis could not analyze this return "
               "instruction");
   }
-  MCPhysReg RetReg = *MaybeRetReg;
+  if (IsAuthenticated)
+    return std::nullopt;
+
+  assert(*RetReg != BC.MIB->getNoRegister());
   LLVM_DEBUG({
     traceInst(BC, "Found RET inst", Inst);
-    traceReg(BC, "RetReg", RetReg);
-    traceReg(BC, "Authenticated reg", BC.MIB->getAuthenticatedReg(Inst));
+    traceReg(BC, "RetReg", *RetReg);
+    traceRegMask(BC, "SafeToDerefRegs", S.SafeToDerefRegs);
   });
-  if (BC.MIB->isAuthenticationOfReg(Inst, RetReg))
-    return std::nullopt;
-  LLVM_DEBUG({ traceRegMask(BC, "SafeToDerefRegs", S.SafeToDerefRegs); });
-  if (S.SafeToDerefRegs[RetReg])
+
+  if (S.SafeToDerefRegs[*RetReg])
     return std::nullopt;
 
-  return make_report(RetKind, Inst, RetReg);
+  return make_report(RetKind, Inst, *RetReg);
 }
 
 static std::optional<BriefReport<MCPhysReg>>
@@ -782,19 +789,20 @@ shouldReportSigningOracle(const BinaryContext &BC, const MCInstReference &Inst,
                           const SrcState &S) {
   static const GadgetKind SigningOracleKind("signing oracle found");
 
-  MCPhysReg SignedReg = BC.MIB->getSignedReg(Inst);
-  if (SignedReg == BC.MIB->getNoRegister())
+  std::optional<MCPhysReg> SignedReg = BC.MIB->getSignedReg(Inst);
+  if (!SignedReg)
     return std::nullopt;
 
+  assert(*SignedReg != BC.MIB->getNoRegister());
   LLVM_DEBUG({
     traceInst(BC, "Found sign inst", Inst);
-    traceReg(BC, "Signed reg", SignedReg);
+    traceReg(BC, "Signed reg", *SignedReg);
     traceRegMask(BC, "TrustedRegs", S.TrustedRegs);
   });
-  if (S.TrustedRegs[SignedReg])
+  if (S.TrustedRegs[*SignedReg])
     return std::nullopt;
 
-  return make_report(SigningOracleKind, Inst, SignedReg);
+  return make_report(SigningOracleKind, Inst, *SignedReg);
 }
 
 template <typename T> static void iterateOverInstrs(BinaryFunction &BF, T Fn) {
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index 6d01c97b8d8dd..5363d0cfdc5e7 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -196,7 +196,10 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
     return {AArch64::LR};
   }
 
-  ErrorOr<MCPhysReg> getAuthenticatedReg(const MCInst &Inst) const override {
+  std::optional<MCPhysReg>
+  getWrittenAuthenticatedReg(const MCInst &Inst,
+                             bool &IsChecked) const override {
+    IsChecked = false;
     switch (Inst.getOpcode()) {
     case AArch64::AUTIAZ:
     case AArch64::AUTIBZ:
@@ -206,33 +209,12 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
     case AArch64::AUTIBSPPCi:
     case AArch64::AUTIASPPCr:
     case AArch64::AUTIBSPPCr:
-    case AArch64::RETAA:
-    case AArch64::RETAB:
-    case AArch64::RETAASPPCi:
-    case AArch64::RETABSPPCi:
-    case AArch64::RETAASPPCr:
-    case AArch64::RETABSPPCr:
       return AArch64::LR;
-
     case AArch64::AUTIA1716:
     case AArch64::AUTIB1716:
     case AArch64::AUTIA171615:
     case AArch64::AUTIB171615:
       return AArch64::X17;
-
-    case AArch64::ERETAA:
-    case AArch64::ERETAB:
-      // The ERETA{A,B} instructions use either register ELR_EL1, ELR_EL2 or
-      // ELR_EL3, depending on the current Exception Level at run-time.
-      //
-      // Furthermore, these registers are not modelled by LLVM as a regular
-      // MCPhysReg.... So there is no way to indicate that through the current
-      // API.
-      //
-      // Therefore, return an error to indicate that LLVM/BOLT cannot model
-      // this.
-      return make_error_code(std::errc::result_out_of_range);
-
     case AArch64::AUTIA:
     case AArch64::AUTIB:
     case AArch64::AUTDA:
@@ -242,22 +224,18 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
     case AArch64::AUTDZA:
     case AArch64::AUTDZB:
       return Inst.getOperand(0).getReg();
-
-      // FIXME: BL?RA(A|B)Z? and LDRA(A|B) should probably be handled here too.
-
+    case AArch64::LDRAAwriteback:
+    case AArch64::LDRABwriteback:
+      // Note that LDRA(A|B)indexed are not listed here, as they do not write
+      // an authenticated pointer back to the register.
+      IsChecked = true;
+      return Inst.getOperand(2).getReg();
     default:
-      return getNoRegister();
+      return std::nullopt;
     }
   }
 
-  bool isAuthenticationOfReg(const MCInst &Inst, MCPhysReg Reg) const override {
-    if (Reg == getNoRegister())
-      return false;
-    ErrorOr<MCPhysReg> AuthenticatedReg = getAuthenticatedReg(Inst);
-    return AuthenticatedReg.getError() ? false : *AuthenticatedReg == Reg;
-  }
-
-  MCPhysReg getSignedReg(const MCInst &Inst) const override {
+  std::optional<MCPhysReg> getSignedReg(const MCInst &Inst) const override {
     switch (Inst.getOpcode()) {
     case AArch64::PACIA:
     case AArch64::PACIB:
@@ -283,26 +261,36 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
     case AArch64::PACIB171615:
       return AArch64::X17;
     default:
-      return getNoRegister();
+      return std::nullopt;
     }
   }
 
-  ErrorOr<MCPhysReg> getRegUsedAsRetDest(const MCInst &Inst) const override {
+  std::optional<MCPhysReg>
+  getRegUsedAsRetDest(const MCInst &Inst,
+                      bool &IsAuthenticatedInternally) const override {
     assert(isReturn(Inst));
     switch (Inst.getOpcode()) {
     case AArch64::RET:
+      IsAuthenticatedInternally = false;
       return Inst.getOperand(0).getReg();
+
     case AArch64::RETAA:
     case AArch64::RETAB:
     case AArch64::RETAASPPCi:
     case AArch64::RETABSPPCi:
     case AArch64::RETAASPPCr:
     case AArch64::RETABSPPCr:
+      IsAuthenticatedInternally = true;
       return AArch64::LR;
     case AArch64::ERET:
     case AArch64::ERETAA:
     case AArch64::ERETAB:
-      return make_error_code(std::errc::result_out_of_range);
+      // The ERET* instructions use either register ELR_EL1, ELR_EL2 or
+      // ELR_EL3, depending on the current Exception Level at run-time.
+      //
+      // Furthermore, these registers are not modelled by LLVM as a regular
+      // MCPhysReg, so there is no way to indicate that through the current API.
+      return std::nullopt;
     default:
       llvm_unreachable("Unhandled return instruction");
     }
@@ -332,7 +320,7 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
     }
   }
 
-  MCPhysReg
+  std::optional<MCPhysReg>
   getMaterializedAddressRegForPtrAuth(const MCInst &Inst) const override {
     switch (Inst.getOpcode()) {
     case AArch64::ADR:
@@ -343,7 +331,7 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
       // this instruction), so the produced value is not attacker-controlled.
       return Inst.getOperand(0).getReg();
     default:
-      return getNoRegister();
+      return std::nullopt;
     }
   }
 
@@ -483,8 +471,8 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
     }
   }
 
-  MCPhysReg getAuthCheckedReg(const MCInst &Inst,
-                              bool MayOverwrite) const override {
+  std::optional<MCPhysReg> getAuthCheckedReg(const MCInst &Inst,
+                                             bool MayOverwrite) const override {
     // Cannot trivially reuse AArch64InstrInfo::getMemOperandWithOffsetWidth()
     // method as it accepts an instance of MachineInstr, not MCInst.
     const MCInstrDesc &Desc = Info->get(Inst.getOpcode());
@@ -532,10 +520,10 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
       // the resulting address arbitrarily.
       for (unsigned I = BaseRegIndex + 1, E = Desc.getNumOperands(); I < E; ++I)
         if (Inst.getOperand(I).isReg())
-          return getNoRegister();
+          return std::nullopt;
 
       if (!MayOverwrite && ClobbersBaseRegExceptWriteback(BaseRegIndex))
-        return getNoRegister();
+        return std::nullopt;
 
       return Inst.getOperand(BaseRegIndex).getReg();
     }
@@ -543,7 +531,7 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
     // Store instructions are not handled yet, as they are not important for
     // pauthtest ABI. Though, they could be handled similar to loads, if needed.
 
-    return getNoRegister();
+    return std::nullopt;
   }
 
   bool isADRP(const MCInst &Inst) const override {
diff --git a/bolt/test/binary-analysis/AArch64/gs-pauth-debug-output.s b/bolt/test/binary-analysis/AArch64/gs-pauth-debug-output.s
index 078a8aca72d9c..82494d834a15c 100644
--- a/bolt/test/binary-analysis/AArch64/gs-pauth-debug-output.s
+++ b/bolt/test/binary-analysis/AArch64/gs-pauth-debug-output.s
@@ -119,7 +119,6 @@ simple:
 // PAUTH-NEXT:     SafeToDerefRegs: W0 X0 W0_HI{{[ \t]*$}}
 // CHECK-NEXT:   Found RET inst:     00000000:         ret # DataflowSrcSafetyAnalysis: src-state<SafeToDerefRegs: BitVector, TrustedRegs: BitVector, Insts: >
 // CHECK-NEXT:     RetReg: LR
-// CHECK-NEXT:     Authenticated reg: (none)
 // CHECK-NEXT:     SafeToDerefRegs: LR W30 W30_HI{{[ \t]*$}}
 
         .globl  clobber
@@ -146,7 +145,6 @@ clobber:
 // CHECK-EMPTY:
 // CHECK-NEXT:   Found RET inst:     00000000:         ret # DataflowSrcSafetyAnalysis: src-state<SafeToDerefRegs: BitVector, TrustedRegs: BitVector, Insts: >
 // CHECK-NEXT:     RetReg: LR
-// CHECK-NEXT:     Authenticated reg: (none)
 // CHECK-NEXT:     SafeToDerefRegs: W30_HI{{[ \t]*$}}
 // CHECK-EMPTY:
 // CHECK-NEXT: Running detailed src register safety analysis...
@@ -225,7 +223,6 @@ nocfg:
 // PAUTH-NEXT:     SafeToDerefRegs: LR W0 W30 X0 W0_HI W30_HI
 // CHECK-NEXT:   Found RET inst:     00000000:         ret # Offset: 8 # CFGUnawareSrcSafetyAnalysis: src-state<SafeToDerefRegs: BitVector, TrustedRegs: BitVector, Insts: >
 // CHECK-NEXT:     RetReg: LR
-// CHECK-NEXT:     Authenticated reg: (none)
 // CHECK-NEXT:     SafeToDerefRegs:
 // CHECK-EMPTY:
 // CHECK-NEXT: Running detailed src register safety analysis...
diff --git a/bolt/test/binary-analysis/AArch64/gs-pauth-signing-oracles.s b/bolt/test/binary-analysis/AArch64/gs-pauth-signing-oracles.s
index 10fcf08cf158c..9b10879094a03 100644
--- a/bolt/test/binary-analysis/AArch64/gs-pauth-signing-oracles.s
+++ b/bolt/test/binary-analysis/AArch64/gs-pauth-signing-oracles.s
@@ -985,6 +985,26 @@ inst_pacnbibsppc:
         ret
         .size inst_pacnbibsppc, .-inst_pacnbibsppc
 
+// Test that write-back forms of LDRA(A|B) instructions are handled properly.
+
+        .globl  inst_ldraa_wb
+        .type   inst_ldraa_wb, at function
+inst_ldraa_wb:
+// CHECK-NOT: inst_ldraa_wb
+        ldraa   x2, [x0]!
+        pacda   x0, x1
+        ret
+        .size inst_ldraa_wb, .-inst_ldraa_wb
+
+        .globl  inst_ldrab_wb
+        .type   inst_ldrab_wb, at function
+inst_ldrab_wb:
+// CHECK-NOT: inst_ldrab_wb
+        ldraa   x2, [x0]!
+        pacda   x0, x1
+        ret
+        .size inst_ldrab_wb, .-inst_ldrab_wb
+
         .globl  main
         .type   main, at function
 main:



More information about the llvm-branch-commits mailing list