[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: Detect address materialization and arithmetics (PR #132540)
Anatoly Trosinenko via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Mar 26 09:42:06 PDT 2025
https://github.com/atrosinenko updated https://github.com/llvm/llvm-project/pull/132540
>From 53f6310e26cb02a18d99a9350ff8162ea0ed22b6 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko <atrosinenko at accesssoftek.com>
Date: Thu, 20 Mar 2025 20:15:07 +0300
Subject: [PATCH] [BOLT] Gadget scanner: Detect address materialization and
arithmetics
In addition to authenticated pointers, consider the contents of a
register safe if it was
* written by PC-relative address computation
* updated by an arithmetic instruction whose input address is safe
---
bolt/include/bolt/Core/MCPlusBuilder.h | 16 ++
bolt/lib/Passes/PAuthGadgetScanner.cpp | 92 +++++--
.../Target/AArch64/AArch64MCPlusBuilder.cpp | 30 +++
.../AArch64/gs-pacret-autiasp.s | 15 --
.../gs-pauth-address-materialization.s | 228 ++++++++++++++++++
.../binary-analysis/AArch64/lit.local.cfg | 3 +-
6 files changed, 345 insertions(+), 39 deletions(-)
create mode 100644 bolt/test/binary-analysis/AArch64/gs-pauth-address-materialization.s
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
index 8b6dc14121480..e94f82d00349a 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -587,6 +587,22 @@ class MCPlusBuilder {
return getNoRegister();
}
+ virtual MCPhysReg getSafelyMaterializedAddressReg(const MCInst &Inst) const {
+ llvm_unreachable("not implemented");
+ return getNoRegister();
+ }
+
+ /// Analyzes if this instruction can safely perform address arithmetics.
+ ///
+ /// If the first element of the returned pair is no-register, this instruction
+ /// is considered unknown. Otherwise, (output, input) pair is returned,
+ /// so that output is as trusted as input is.
+ virtual std::pair<MCPhysReg, MCPhysReg>
+ analyzeSafeAddressArithmetics(const MCInst &Inst) const {
+ llvm_unreachable("not implemented");
+ return std::make_pair(getNoRegister(), getNoRegister());
+ }
+
virtual bool isTerminator(const MCInst &Inst) const;
virtual bool isNoop(const MCInst &Inst) const {
diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index a3b320c545734..16da08551a34d 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -335,6 +335,50 @@ class PacRetAnalysis
});
}
+ BitVector getClobberedRegs(const MCInst &Point) const {
+ BitVector Clobbered(NumRegs, false);
+ // Assume a call can clobber all registers, including callee-saved
+ // registers. There's a good chance that callee-saved registers will be
+ // saved on the stack at some point during execution of the callee.
+ // Therefore they should also be considered as potentially modified by an
+ // attacker/written to.
+ // Also, not all functions may respect the AAPCS ABI rules about
+ // caller/callee-saved registers.
+ if (BC.MIB->isCall(Point))
+ Clobbered.set();
+ else
+ BC.MIB->getClobberedRegs(Point, Clobbered);
+ return Clobbered;
+ }
+
+ // Returns all registers that can be treated as if they are written by an
+ // authentication instruction.
+ SmallVector<MCPhysReg> getAuthenticatedRegs(const MCInst &Point,
+ const State &Cur) const {
+ SmallVector<MCPhysReg> Regs;
+ const MCPhysReg NoReg = BC.MIB->getNoRegister();
+
+ // A signed pointer can be authenticated, or
+ ErrorOr<MCPhysReg> AutReg = BC.MIB->getAuthenticatedReg(Point);
+ if (AutReg && *AutReg != NoReg)
+ Regs.push_back(*AutReg);
+
+ // ... a safe address can be materialized, or
+ MCPhysReg NewAddrReg = BC.MIB->getSafelyMaterializedAddressReg(Point);
+ if (NewAddrReg != NoReg)
+ Regs.push_back(NewAddrReg);
+
+ // ... an address can be updated in a safe manner, producing the result
+ // which is as trusted as the input address.
+ MCPhysReg ArithResult, ArithSrc;
+ std::tie(ArithResult, ArithSrc) =
+ BC.MIB->analyzeSafeAddressArithmetics(Point);
+ if (ArithResult != NoReg && Cur.SafeToDerefRegs[ArithSrc])
+ Regs.push_back(ArithResult);
+
+ return Regs;
+ }
+
State computeNext(const MCInst &Point, const State &Cur) {
PacStatePrinter P(BC);
LLVM_DEBUG({
@@ -355,19 +399,20 @@ class PacRetAnalysis
return State();
}
+ // First, compute various properties of the instruction, taking the state
+ // before its execution into account, if necessary.
+
+ BitVector Clobbered = getClobberedRegs(Point);
+ // Compute the set of registers that can be considered as written by
+ // an authentication instruction. This includes operations that are
+ // *strictly better* than authentication, such as materializing a
+ // PC-relative constant.
+ SmallVector<MCPhysReg> AuthenticatedOrBetter =
+ getAuthenticatedRegs(Point, Cur);
+
+ // Then, compute the state after this instruction is executed.
State Next = Cur;
- BitVector Clobbered(NumRegs, false);
- // Assume a call can clobber all registers, including callee-saved
- // registers. There's a good chance that callee-saved registers will be
- // saved on the stack at some point during execution of the callee.
- // Therefore they should also be considered as potentially modified by an
- // attacker/written to.
- // Also, not all functions may respect the AAPCS ABI rules about
- // caller/callee-saved registers.
- if (BC.MIB->isCall(Point))
- Clobbered.set();
- else
- BC.MIB->getClobberedRegs(Point, Clobbered);
+
Next.SafeToDerefRegs.reset(Clobbered);
// Keep track of this instruction if it writes to any of the registers we
// need to track that for:
@@ -375,17 +420,18 @@ class PacRetAnalysis
if (Clobbered[Reg])
lastWritingInsts(Next, Reg) = {&Point};
- ErrorOr<MCPhysReg> AutReg = BC.MIB->getAuthenticatedReg(Point);
- if (AutReg && *AutReg != BC.MIB->getNoRegister()) {
- // The sub-registers of *AutReg are also trusted now, but not its
- // super-registers (as they retain untrusted register units).
- BitVector AuthenticatedSubregs =
- BC.MIB->getAliases(*AutReg, /*OnlySmaller=*/true);
- for (MCPhysReg Reg : AuthenticatedSubregs.set_bits()) {
- Next.SafeToDerefRegs.set(Reg);
- if (RegsToTrackInstsFor.isTracked(Reg))
- lastWritingInsts(Next, Reg).clear();
- }
+ // After accounting for clobbered registers in general, override the state
+ // according to authentication and other *special cases* of clobbering.
+
+ // The sub-registers of each authenticated register are also trusted now,
+ // but not their super-registers (as they retain untrusted register units).
+ BitVector AuthenticatedSubregs(NumRegs);
+ for (MCPhysReg AutReg : AuthenticatedOrBetter)
+ AuthenticatedSubregs |= BC.MIB->getAliases(AutReg, /*OnlySmaller=*/true);
+ for (MCPhysReg Reg : AuthenticatedSubregs.set_bits()) {
+ Next.SafeToDerefRegs.set(Reg);
+ if (RegsToTrackInstsFor.isTracked(Reg))
+ lastWritingInsts(Next, Reg).clear();
}
LLVM_DEBUG({
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index 9b01b39251c29..8cc03fb694c07 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -304,6 +304,36 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
}
}
+ MCPhysReg getSafelyMaterializedAddressReg(const MCInst &Inst) const override {
+ switch (Inst.getOpcode()) {
+ case AArch64::ADR:
+ case AArch64::ADRP:
+ return Inst.getOperand(0).getReg();
+ default:
+ return getNoRegister();
+ }
+ }
+
+ std::pair<MCPhysReg, MCPhysReg>
+ analyzeSafeAddressArithmetics(const MCInst &Inst) const override {
+ switch (Inst.getOpcode()) {
+ default:
+ return std::make_pair(getNoRegister(), getNoRegister());
+ case AArch64::ADDXri:
+ case AArch64::SUBXri:
+ return std::make_pair(Inst.getOperand(0).getReg(),
+ Inst.getOperand(1).getReg());
+ case AArch64::ORRXrs:
+ // "mov Xd, Xm" is equivalent to "orr Xd, XZR, Xm, lsl #0"
+ if (Inst.getOperand(1).getReg() != AArch64::XZR ||
+ Inst.getOperand(3).getImm() != 0)
+ return std::make_pair(getNoRegister(), getNoRegister());
+
+ return std::make_pair(Inst.getOperand(0).getReg(),
+ Inst.getOperand(2).getReg());
+ }
+ }
+
bool isADRP(const MCInst &Inst) const override {
return Inst.getOpcode() == AArch64::ADRP;
}
diff --git a/bolt/test/binary-analysis/AArch64/gs-pacret-autiasp.s b/bolt/test/binary-analysis/AArch64/gs-pacret-autiasp.s
index 01b7cec3272e6..d506ec13f4895 100644
--- a/bolt/test/binary-analysis/AArch64/gs-pacret-autiasp.s
+++ b/bolt/test/binary-analysis/AArch64/gs-pacret-autiasp.s
@@ -141,24 +141,9 @@ f_nonx30_ret_ok:
stp x29, x30, [sp, #-16]!
mov x29, sp
bl g
- add x0, x0, #3
ldp x29, x30, [sp], #16
- // FIXME: Should the scanner understand that an authenticated register (below x30,
- // after the autiasp instruction), is OK to be moved to another register
- // and then that register being used to return?
- // This respects that pac-ret hardening intent, but the scanner currently
- // will produce a false positive for this.
- // Is it worthwhile to make the scanner more complex for this case?
- // So far, scanning many millions of instructions across a linux distro,
- // I haven't encountered such an example.
- // The ".if 0" block below tests this case and currently fails.
-.if 0
autiasp
mov x16, x30
-.else
- mov x16, x30
- autia x16, sp
-.endif
// CHECK-NOT: function f_nonx30_ret_ok
ret x16
.size f_nonx30_ret_ok, .-f_nonx30_ret_ok
diff --git a/bolt/test/binary-analysis/AArch64/gs-pauth-address-materialization.s b/bolt/test/binary-analysis/AArch64/gs-pauth-address-materialization.s
new file mode 100644
index 0000000000000..cdb26aff505ed
--- /dev/null
+++ b/bolt/test/binary-analysis/AArch64/gs-pauth-address-materialization.s
@@ -0,0 +1,228 @@
+// RUN: %clang %cflags -march=armv8.3-a %s -o %t.exe
+// RUN: llvm-bolt-binary-analysis --scanners=pauth %t.exe 2>&1 | FileCheck %s
+
+// Test various patterns that should or should not be considered safe
+// materialization of PC-relative addresses.
+//
+// Note that while "instructions that write to the affected registers"
+// section of the report is still technically correct, it does not necessarily
+// mentions the instructions that are used incorrectly.
+//
+// FIXME: Switch to PAC* instructions instead of indirect tail call for testing
+// if a register is considered safe when detection of signing oracles is
+// implemented, as it is more traditional usage of PC-relative constants.
+// Moreover, using PAC instructions would improve test robustness, as
+// handling of *calls* can be influenced by what BOLT classifies as a
+// tail call, for example.
+
+ .text
+
+// Define a function that is reachable by ADR instruction.
+ .type sym, at function
+sym:
+ ret
+ .size sym, .-sym
+
+ .globl good_adr
+ .type good_adr, at function
+good_adr:
+// CHECK-NOT: good_adr
+ adr x0, sym
+ br x0
+ .size good_adr, .-good_adr
+
+ .globl good_adrp
+ .type good_adrp, at function
+good_adrp:
+// CHECK-NOT: good_adrp
+ adrp x0, sym
+ br x0
+ .size good_adrp, .-good_adrp
+
+ .globl good_adrp_add
+ .type good_adrp_add, at function
+good_adrp_add:
+// CHECK-NOT: good_adrp_add
+ adrp x0, sym
+ add x0, x0, :lo12:sym
+ br x0
+ .size good_adrp_add, .-good_adrp_add
+
+ .globl good_adrp_add_with_const_offset
+ .type good_adrp_add_with_const_offset, at function
+good_adrp_add_with_const_offset:
+// CHECK-NOT: good_adrp_add_with_const_offset
+ adrp x0, sym
+ add x0, x0, :lo12:sym
+ add x0, x0, #8
+ br x0
+ .size good_adrp_add_with_const_offset, .-good_adrp_add_with_const_offset
+
+ .globl bad_adrp_with_nonconst_offset
+ .type bad_adrp_with_nonconst_offset, at function
+bad_adrp_with_nonconst_offset:
+// CHECK-LABEL: GS-PAUTH: non-protected call found in function bad_adrp_with_nonconst_offset, basic block {{[^,]+}}, at address
+// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: br x0 # TAILCALL
+// CHECK-NEXT: The 1 instructions that write to the affected registers after any authentication are:
+// CHECK-NEXT: 1. {{[0-9a-f]+}}: add x0, x0, x1
+// CHECK-NEXT: This happens in the following basic block:
+// CHECK-NEXT: {{[0-9a-f]+}}: adrp x0, #{{.*}}
+// CHECK-NEXT: {{[0-9a-f]+}}: add x0, x0, x1
+// CHECK-NEXT: {{[0-9a-f]+}}: br x0 # TAILCALL
+ adrp x0, sym
+ add x0, x0, x1
+ br x0
+ .size bad_adrp_with_nonconst_offset, .-bad_adrp_with_nonconst_offset
+
+ .globl bad_split_adrp
+ .type bad_split_adrp, at function
+bad_split_adrp:
+// CHECK-LABEL: GS-PAUTH: non-protected call found in function bad_split_adrp, basic block {{[^,]+}}, at address
+// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: br x0 # UNKNOWN CONTROL FLOW
+// CHECK-NEXT: The 1 instructions that write to the affected registers after any authentication are:
+// CHECK-NEXT: 1. {{[0-9a-f]+}}: add x0, x0, #0x{{[0-9a-f]+}}
+// CHECK-NEXT: This happens in the following basic block:
+// CHECK-NEXT: {{[0-9a-f]+}}: add x0, x0, #0x{{[0-9a-f]+}}
+// CHECK-NEXT: {{[0-9a-f]+}}: br x0 # UNKNOWN CONTROL FLOW
+ cbz x2, 1f
+ adrp x0, sym
+1:
+ add x0, x0, :lo12:sym
+ br x0
+ .size bad_split_adrp, .-bad_split_adrp
+
+// Materialization of absolute addresses is not expected.
+
+ .globl bad_immediate_constant
+ .type bad_immediate_constant, at function
+bad_immediate_constant:
+// CHECK-LABEL: GS-PAUTH: non-protected call found in function bad_immediate_constant, basic block {{[^,]+}}, at address
+// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: br x0 # TAILCALL
+// CHECK-NEXT: The 1 instructions that write to the affected registers after any authentication are:
+// CHECK-NEXT: 1. {{[0-9a-f]+}}: mov x0, #{{.*}}
+// CHECK-NEXT: This happens in the following basic block:
+// CHECK-NEXT: {{[0-9a-f]+}}: mov x0, #{{.*}}
+// CHECK-NEXT: {{[0-9a-f]+}}: br x0 # TAILCALL
+ movz x0, #1234
+ br x0
+ .size bad_immediate_constant, .-bad_immediate_constant
+
+// Any ADR or ADRP instruction followed by any number of increments/decrements
+// by constant is considered safe.
+
+ .globl good_adr_with_add
+ .type good_adr_with_add, at function
+good_adr_with_add:
+// CHECK-NOT: good_adr_with_add
+ adr x0, sym
+ add x0, x0, :lo12:sym
+ br x0
+ .size good_adr_with_add, .-good_adr_with_add
+
+ .globl good_adrp_with_add_non_consecutive
+ .type good_adrp_with_add_non_consecutive, at function
+good_adrp_with_add_non_consecutive:
+// CHECK-NOT: good_adrp_with_add_non_consecutive
+ adrp x0, sym
+ mul x1, x2, x3
+ add x0, x0, :lo12:sym
+ br x0
+ .size good_adrp_with_add_non_consecutive, .-good_adrp_with_add_non_consecutive
+
+ .globl good_many_offsets
+ .type good_many_offsets, at function
+good_many_offsets:
+// CHECK-NOT: good_many_offsets
+ adrp x0, sym
+ add x1, x0, #8
+ add x2, x1, :lo12:sym
+ br x2
+ .size good_many_offsets, .-good_many_offsets
+
+// MOV Xd, Xm (which is an alias of ORR Xd, XZR, Xm) is handled as part of
+// support for address arithmetics, but ORR in general is not.
+
+ .globl good_mov_reg
+ .type good_mov_reg, at function
+good_mov_reg:
+// CHECK-NOT: good_mov_reg
+ adrp x0, sym
+ mov x1, x0
+ orr x2, xzr, x1 // the same as "mov x2, x1"
+ br x2
+ .size good_mov_reg, .-good_mov_reg
+
+ .globl bad_orr_not_xzr
+ .type bad_orr_not_xzr, at function
+bad_orr_not_xzr:
+// CHECK-LABEL: GS-PAUTH: non-protected call found in function bad_orr_not_xzr, basic block {{[^,]+}}, at address
+// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: br x2 # TAILCALL
+// CHECK-NEXT: The 1 instructions that write to the affected registers after any authentication are:
+// CHECK-NEXT: 1. {{[0-9a-f]+}}: orr x2, x1, x0
+// CHECK-NEXT: This happens in the following basic block:
+// CHECK-NEXT: {{[0-9a-f]+}}: adrp x0, #{{(0x)?[0-9a-f]+}}
+// CHECK-NEXT: {{[0-9a-f]+}}: mov x1, #0
+// CHECK-NEXT: {{[0-9a-f]+}}: orr x2, x1, x0
+// CHECK-NEXT: {{[0-9a-f]+}}: br x2 # TAILCALL
+ adrp x0, sym
+ movz x1, #0
+ orr x2, x1, x0
+ br x2
+ .size bad_orr_not_xzr, .-bad_orr_not_xzr
+
+ .globl bad_orr_not_lsl0
+ .type bad_orr_not_lsl0, at function
+bad_orr_not_lsl0:
+// CHECK-LABEL: GS-PAUTH: non-protected call found in function bad_orr_not_lsl0, basic block {{[^,]+}}, at address
+// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: br x2 # TAILCALL
+// CHECK-NEXT: The 1 instructions that write to the affected registers after any authentication are:
+// CHECK-NEXT: 1. {{[0-9a-f]+}}: orr x2, xzr, x0, lsl #1
+// CHECK-NEXT: This happens in the following basic block:
+// CHECK-NEXT: {{[0-9a-f]+}}: adrp x0, #{{(0x)?[0-9a-f]+}}
+// CHECK-NEXT: {{[0-9a-f]+}}: orr x2, xzr, x0, lsl #1
+// CHECK-NEXT: {{[0-9a-f]+}}: br x2 # TAILCALL
+ adrp x0, sym
+ orr x2, xzr, x0, lsl #1
+ br x2
+ .size bad_orr_not_lsl0, .-bad_orr_not_lsl0
+
+// Check that the input register operands of `add`/`mov` is correct.
+
+ .globl bad_add_input_reg
+ .type bad_add_input_reg, at function
+bad_add_input_reg:
+// CHECK-LABEL: GS-PAUTH: non-protected call found in function bad_add_input_reg, basic block {{[^,]+}}, at address
+// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: br x0 # TAILCALL
+// CHECK-NEXT: The 1 instructions that write to the affected registers after any authentication are:
+// CHECK-NEXT: 1. {{[0-9a-f]+}}: add x0, x1, #0x{{[0-9a-f]+}}
+// CHECK-NEXT: This happens in the following basic block:
+// CHECK-NEXT: {{[0-9a-f]+}}: adrp x0, #{{(0x)?[0-9a-f]+}}
+// CHECK-NEXT: {{[0-9a-f]+}}: add x0, x1, #0x{{[0-9a-f]+}}
+// CHECK-NEXT: {{[0-9a-f]+}}: br x0 # TAILCALL
+ adrp x0, sym
+ add x0, x1, :lo12:sym
+ br x0
+ .size bad_add_input_reg, .-bad_add_input_reg
+
+ .globl bad_mov_input_reg
+ .type bad_mov_input_reg, at function
+bad_mov_input_reg:
+// CHECK-LABEL: GS-PAUTH: non-protected call found in function bad_mov_input_reg, basic block {{[^,]+}}, at address
+// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: br x0 # TAILCALL
+// CHECK-NEXT: The 1 instructions that write to the affected registers after any authentication are:
+// CHECK-NEXT: 1. {{[0-9a-f]+}}: mov x0, x1
+// CHECK-NEXT: This happens in the following basic block:
+// CHECK-NEXT: {{[0-9a-f]+}}: adrp x0, #{{(0x)?[0-9a-f]+}}
+// CHECK-NEXT: {{[0-9a-f]+}}: mov x0, x1
+// CHECK-NEXT: {{[0-9a-f]+}}: br x0 # TAILCALL
+ adrp x0, sym
+ mov x0, x1
+ br x0
+ .size bad_mov_input_reg, .-bad_mov_input_reg
+
+ .globl main
+ .type main, at function
+main:
+ mov x0, 0
+ ret
+ .size main, .-main
diff --git a/bolt/test/binary-analysis/AArch64/lit.local.cfg b/bolt/test/binary-analysis/AArch64/lit.local.cfg
index 6ac7d3cc0fec7..54e093566dea9 100644
--- a/bolt/test/binary-analysis/AArch64/lit.local.cfg
+++ b/bolt/test/binary-analysis/AArch64/lit.local.cfg
@@ -1,7 +1,8 @@
if "AArch64" not in config.root.targets:
config.unsupported = True
-flags = "--target=aarch64-linux-gnu -nostartfiles -nostdlib -ffreestanding"
+# -Wl,--no-relax prevents converting ADRP+ADD pairs into NOP+ADR.
+flags = "--target=aarch64-linux-gnu -nostartfiles -nostdlib -ffreestanding -Wl,--no-relax"
config.substitutions.insert(0, ("%cflags", f"%cflags {flags}"))
config.substitutions.insert(0, ("%cxxflags", f"%cxxflags {flags}"))
More information about the llvm-branch-commits
mailing list