[llvm] [BOLT] Gadget scanner: Detect address materialization and arithmetics (PR #132540)
Anatoly Trosinenko via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 4 09:27:21 PDT 2025
https://github.com/atrosinenko updated https://github.com/llvm/llvm-project/pull/132540
>From 70aa35eca5db3d9fb90a6028406c0a453438891f Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko <atrosinenko at accesssoftek.com>
Date: Thu, 20 Mar 2025 20:15:07 +0300
Subject: [PATCH 1/7] [BOLT] Gadget scanner: Detect address materialization and
arithmetics
In addition to authenticated pointers, consider the contents of a
register safe if it was
* written by PC-relative address computation
* updated by an arithmetic instruction whose input address is safe
---
bolt/include/bolt/Core/MCPlusBuilder.h | 16 ++
bolt/lib/Passes/PAuthGadgetScanner.cpp | 92 +++++--
.../Target/AArch64/AArch64MCPlusBuilder.cpp | 30 +++
.../AArch64/gs-pacret-autiasp.s | 15 --
.../gs-pauth-address-materialization.s | 228 ++++++++++++++++++
.../binary-analysis/AArch64/lit.local.cfg | 3 +-
6 files changed, 345 insertions(+), 39 deletions(-)
create mode 100644 bolt/test/binary-analysis/AArch64/gs-pauth-address-materialization.s
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
index bbef65700b2a5..e59a9145490dd 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -587,6 +587,22 @@ class MCPlusBuilder {
return getNoRegister();
}
+ virtual MCPhysReg getSafelyMaterializedAddressReg(const MCInst &Inst) const {
+ llvm_unreachable("not implemented");
+ return getNoRegister();
+ }
+
+ /// Analyzes if this instruction can safely perform address arithmetics.
+ ///
+ /// If the first element of the returned pair is no-register, this instruction
+ /// is considered unknown. Otherwise, (output, input) pair is returned,
+ /// so that output is as trusted as input is.
+ virtual std::pair<MCPhysReg, MCPhysReg>
+ analyzeSafeAddressArithmetics(const MCInst &Inst) const {
+ llvm_unreachable("not implemented");
+ return std::make_pair(getNoRegister(), getNoRegister());
+ }
+
virtual bool isTerminator(const MCInst &Inst) const;
virtual bool isNoop(const MCInst &Inst) const {
diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index a3b320c545734..16da08551a34d 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -335,6 +335,50 @@ class PacRetAnalysis
});
}
+ BitVector getClobberedRegs(const MCInst &Point) const {
+ BitVector Clobbered(NumRegs, false);
+ // Assume a call can clobber all registers, including callee-saved
+ // registers. There's a good chance that callee-saved registers will be
+ // saved on the stack at some point during execution of the callee.
+ // Therefore they should also be considered as potentially modified by an
+ // attacker/written to.
+ // Also, not all functions may respect the AAPCS ABI rules about
+ // caller/callee-saved registers.
+ if (BC.MIB->isCall(Point))
+ Clobbered.set();
+ else
+ BC.MIB->getClobberedRegs(Point, Clobbered);
+ return Clobbered;
+ }
+
+ // Returns all registers that can be treated as if they are written by an
+ // authentication instruction.
+ SmallVector<MCPhysReg> getAuthenticatedRegs(const MCInst &Point,
+ const State &Cur) const {
+ SmallVector<MCPhysReg> Regs;
+ const MCPhysReg NoReg = BC.MIB->getNoRegister();
+
+ // A signed pointer can be authenticated, or
+ ErrorOr<MCPhysReg> AutReg = BC.MIB->getAuthenticatedReg(Point);
+ if (AutReg && *AutReg != NoReg)
+ Regs.push_back(*AutReg);
+
+ // ... a safe address can be materialized, or
+ MCPhysReg NewAddrReg = BC.MIB->getSafelyMaterializedAddressReg(Point);
+ if (NewAddrReg != NoReg)
+ Regs.push_back(NewAddrReg);
+
+ // ... an address can be updated in a safe manner, producing the result
+ // which is as trusted as the input address.
+ MCPhysReg ArithResult, ArithSrc;
+ std::tie(ArithResult, ArithSrc) =
+ BC.MIB->analyzeSafeAddressArithmetics(Point);
+ if (ArithResult != NoReg && Cur.SafeToDerefRegs[ArithSrc])
+ Regs.push_back(ArithResult);
+
+ return Regs;
+ }
+
State computeNext(const MCInst &Point, const State &Cur) {
PacStatePrinter P(BC);
LLVM_DEBUG({
@@ -355,19 +399,20 @@ class PacRetAnalysis
return State();
}
+ // First, compute various properties of the instruction, taking the state
+ // before its execution into account, if necessary.
+
+ BitVector Clobbered = getClobberedRegs(Point);
+ // Compute the set of registers that can be considered as written by
+ // an authentication instruction. This includes operations that are
+ // *strictly better* than authentication, such as materializing a
+ // PC-relative constant.
+ SmallVector<MCPhysReg> AuthenticatedOrBetter =
+ getAuthenticatedRegs(Point, Cur);
+
+ // Then, compute the state after this instruction is executed.
State Next = Cur;
- BitVector Clobbered(NumRegs, false);
- // Assume a call can clobber all registers, including callee-saved
- // registers. There's a good chance that callee-saved registers will be
- // saved on the stack at some point during execution of the callee.
- // Therefore they should also be considered as potentially modified by an
- // attacker/written to.
- // Also, not all functions may respect the AAPCS ABI rules about
- // caller/callee-saved registers.
- if (BC.MIB->isCall(Point))
- Clobbered.set();
- else
- BC.MIB->getClobberedRegs(Point, Clobbered);
+
Next.SafeToDerefRegs.reset(Clobbered);
// Keep track of this instruction if it writes to any of the registers we
// need to track that for:
@@ -375,17 +420,18 @@ class PacRetAnalysis
if (Clobbered[Reg])
lastWritingInsts(Next, Reg) = {&Point};
- ErrorOr<MCPhysReg> AutReg = BC.MIB->getAuthenticatedReg(Point);
- if (AutReg && *AutReg != BC.MIB->getNoRegister()) {
- // The sub-registers of *AutReg are also trusted now, but not its
- // super-registers (as they retain untrusted register units).
- BitVector AuthenticatedSubregs =
- BC.MIB->getAliases(*AutReg, /*OnlySmaller=*/true);
- for (MCPhysReg Reg : AuthenticatedSubregs.set_bits()) {
- Next.SafeToDerefRegs.set(Reg);
- if (RegsToTrackInstsFor.isTracked(Reg))
- lastWritingInsts(Next, Reg).clear();
- }
+ // After accounting for clobbered registers in general, override the state
+ // according to authentication and other *special cases* of clobbering.
+
+ // The sub-registers of each authenticated register are also trusted now,
+ // but not their super-registers (as they retain untrusted register units).
+ BitVector AuthenticatedSubregs(NumRegs);
+ for (MCPhysReg AutReg : AuthenticatedOrBetter)
+ AuthenticatedSubregs |= BC.MIB->getAliases(AutReg, /*OnlySmaller=*/true);
+ for (MCPhysReg Reg : AuthenticatedSubregs.set_bits()) {
+ Next.SafeToDerefRegs.set(Reg);
+ if (RegsToTrackInstsFor.isTracked(Reg))
+ lastWritingInsts(Next, Reg).clear();
}
LLVM_DEBUG({
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index 2a648baa4d514..56ebc3da2a462 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -304,6 +304,36 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
}
}
+ MCPhysReg getSafelyMaterializedAddressReg(const MCInst &Inst) const override {
+ switch (Inst.getOpcode()) {
+ case AArch64::ADR:
+ case AArch64::ADRP:
+ return Inst.getOperand(0).getReg();
+ default:
+ return getNoRegister();
+ }
+ }
+
+ std::pair<MCPhysReg, MCPhysReg>
+ analyzeSafeAddressArithmetics(const MCInst &Inst) const override {
+ switch (Inst.getOpcode()) {
+ default:
+ return std::make_pair(getNoRegister(), getNoRegister());
+ case AArch64::ADDXri:
+ case AArch64::SUBXri:
+ return std::make_pair(Inst.getOperand(0).getReg(),
+ Inst.getOperand(1).getReg());
+ case AArch64::ORRXrs:
+ // "mov Xd, Xm" is equivalent to "orr Xd, XZR, Xm, lsl #0"
+ if (Inst.getOperand(1).getReg() != AArch64::XZR ||
+ Inst.getOperand(3).getImm() != 0)
+ return std::make_pair(getNoRegister(), getNoRegister());
+
+ return std::make_pair(Inst.getOperand(0).getReg(),
+ Inst.getOperand(2).getReg());
+ }
+ }
+
bool isADRP(const MCInst &Inst) const override {
return Inst.getOpcode() == AArch64::ADRP;
}
diff --git a/bolt/test/binary-analysis/AArch64/gs-pacret-autiasp.s b/bolt/test/binary-analysis/AArch64/gs-pacret-autiasp.s
index 01b7cec3272e6..d506ec13f4895 100644
--- a/bolt/test/binary-analysis/AArch64/gs-pacret-autiasp.s
+++ b/bolt/test/binary-analysis/AArch64/gs-pacret-autiasp.s
@@ -141,24 +141,9 @@ f_nonx30_ret_ok:
stp x29, x30, [sp, #-16]!
mov x29, sp
bl g
- add x0, x0, #3
ldp x29, x30, [sp], #16
- // FIXME: Should the scanner understand that an authenticated register (below x30,
- // after the autiasp instruction), is OK to be moved to another register
- // and then that register being used to return?
- // This respects that pac-ret hardening intent, but the scanner currently
- // will produce a false positive for this.
- // Is it worthwhile to make the scanner more complex for this case?
- // So far, scanning many millions of instructions across a linux distro,
- // I haven't encountered such an example.
- // The ".if 0" block below tests this case and currently fails.
-.if 0
autiasp
mov x16, x30
-.else
- mov x16, x30
- autia x16, sp
-.endif
// CHECK-NOT: function f_nonx30_ret_ok
ret x16
.size f_nonx30_ret_ok, .-f_nonx30_ret_ok
diff --git a/bolt/test/binary-analysis/AArch64/gs-pauth-address-materialization.s b/bolt/test/binary-analysis/AArch64/gs-pauth-address-materialization.s
new file mode 100644
index 0000000000000..cdb26aff505ed
--- /dev/null
+++ b/bolt/test/binary-analysis/AArch64/gs-pauth-address-materialization.s
@@ -0,0 +1,228 @@
+// RUN: %clang %cflags -march=armv8.3-a %s -o %t.exe
+// RUN: llvm-bolt-binary-analysis --scanners=pauth %t.exe 2>&1 | FileCheck %s
+
+// Test various patterns that should or should not be considered safe
+// materialization of PC-relative addresses.
+//
+// Note that while "instructions that write to the affected registers"
+// section of the report is still technically correct, it does not necessarily
+// mentions the instructions that are used incorrectly.
+//
+// FIXME: Switch to PAC* instructions instead of indirect tail call for testing
+// if a register is considered safe when detection of signing oracles is
+// implemented, as it is more traditional usage of PC-relative constants.
+// Moreover, using PAC instructions would improve test robustness, as
+// handling of *calls* can be influenced by what BOLT classifies as a
+// tail call, for example.
+
+ .text
+
+// Define a function that is reachable by ADR instruction.
+ .type sym, at function
+sym:
+ ret
+ .size sym, .-sym
+
+ .globl good_adr
+ .type good_adr, at function
+good_adr:
+// CHECK-NOT: good_adr
+ adr x0, sym
+ br x0
+ .size good_adr, .-good_adr
+
+ .globl good_adrp
+ .type good_adrp, at function
+good_adrp:
+// CHECK-NOT: good_adrp
+ adrp x0, sym
+ br x0
+ .size good_adrp, .-good_adrp
+
+ .globl good_adrp_add
+ .type good_adrp_add, at function
+good_adrp_add:
+// CHECK-NOT: good_adrp_add
+ adrp x0, sym
+ add x0, x0, :lo12:sym
+ br x0
+ .size good_adrp_add, .-good_adrp_add
+
+ .globl good_adrp_add_with_const_offset
+ .type good_adrp_add_with_const_offset, at function
+good_adrp_add_with_const_offset:
+// CHECK-NOT: good_adrp_add_with_const_offset
+ adrp x0, sym
+ add x0, x0, :lo12:sym
+ add x0, x0, #8
+ br x0
+ .size good_adrp_add_with_const_offset, .-good_adrp_add_with_const_offset
+
+ .globl bad_adrp_with_nonconst_offset
+ .type bad_adrp_with_nonconst_offset, at function
+bad_adrp_with_nonconst_offset:
+// CHECK-LABEL: GS-PAUTH: non-protected call found in function bad_adrp_with_nonconst_offset, basic block {{[^,]+}}, at address
+// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: br x0 # TAILCALL
+// CHECK-NEXT: The 1 instructions that write to the affected registers after any authentication are:
+// CHECK-NEXT: 1. {{[0-9a-f]+}}: add x0, x0, x1
+// CHECK-NEXT: This happens in the following basic block:
+// CHECK-NEXT: {{[0-9a-f]+}}: adrp x0, #{{.*}}
+// CHECK-NEXT: {{[0-9a-f]+}}: add x0, x0, x1
+// CHECK-NEXT: {{[0-9a-f]+}}: br x0 # TAILCALL
+ adrp x0, sym
+ add x0, x0, x1
+ br x0
+ .size bad_adrp_with_nonconst_offset, .-bad_adrp_with_nonconst_offset
+
+ .globl bad_split_adrp
+ .type bad_split_adrp, at function
+bad_split_adrp:
+// CHECK-LABEL: GS-PAUTH: non-protected call found in function bad_split_adrp, basic block {{[^,]+}}, at address
+// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: br x0 # UNKNOWN CONTROL FLOW
+// CHECK-NEXT: The 1 instructions that write to the affected registers after any authentication are:
+// CHECK-NEXT: 1. {{[0-9a-f]+}}: add x0, x0, #0x{{[0-9a-f]+}}
+// CHECK-NEXT: This happens in the following basic block:
+// CHECK-NEXT: {{[0-9a-f]+}}: add x0, x0, #0x{{[0-9a-f]+}}
+// CHECK-NEXT: {{[0-9a-f]+}}: br x0 # UNKNOWN CONTROL FLOW
+ cbz x2, 1f
+ adrp x0, sym
+1:
+ add x0, x0, :lo12:sym
+ br x0
+ .size bad_split_adrp, .-bad_split_adrp
+
+// Materialization of absolute addresses is not expected.
+
+ .globl bad_immediate_constant
+ .type bad_immediate_constant, at function
+bad_immediate_constant:
+// CHECK-LABEL: GS-PAUTH: non-protected call found in function bad_immediate_constant, basic block {{[^,]+}}, at address
+// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: br x0 # TAILCALL
+// CHECK-NEXT: The 1 instructions that write to the affected registers after any authentication are:
+// CHECK-NEXT: 1. {{[0-9a-f]+}}: mov x0, #{{.*}}
+// CHECK-NEXT: This happens in the following basic block:
+// CHECK-NEXT: {{[0-9a-f]+}}: mov x0, #{{.*}}
+// CHECK-NEXT: {{[0-9a-f]+}}: br x0 # TAILCALL
+ movz x0, #1234
+ br x0
+ .size bad_immediate_constant, .-bad_immediate_constant
+
+// Any ADR or ADRP instruction followed by any number of increments/decrements
+// by constant is considered safe.
+
+ .globl good_adr_with_add
+ .type good_adr_with_add, at function
+good_adr_with_add:
+// CHECK-NOT: good_adr_with_add
+ adr x0, sym
+ add x0, x0, :lo12:sym
+ br x0
+ .size good_adr_with_add, .-good_adr_with_add
+
+ .globl good_adrp_with_add_non_consecutive
+ .type good_adrp_with_add_non_consecutive, at function
+good_adrp_with_add_non_consecutive:
+// CHECK-NOT: good_adrp_with_add_non_consecutive
+ adrp x0, sym
+ mul x1, x2, x3
+ add x0, x0, :lo12:sym
+ br x0
+ .size good_adrp_with_add_non_consecutive, .-good_adrp_with_add_non_consecutive
+
+ .globl good_many_offsets
+ .type good_many_offsets, at function
+good_many_offsets:
+// CHECK-NOT: good_many_offsets
+ adrp x0, sym
+ add x1, x0, #8
+ add x2, x1, :lo12:sym
+ br x2
+ .size good_many_offsets, .-good_many_offsets
+
+// MOV Xd, Xm (which is an alias of ORR Xd, XZR, Xm) is handled as part of
+// support for address arithmetics, but ORR in general is not.
+
+ .globl good_mov_reg
+ .type good_mov_reg, at function
+good_mov_reg:
+// CHECK-NOT: good_mov_reg
+ adrp x0, sym
+ mov x1, x0
+ orr x2, xzr, x1 // the same as "mov x2, x1"
+ br x2
+ .size good_mov_reg, .-good_mov_reg
+
+ .globl bad_orr_not_xzr
+ .type bad_orr_not_xzr, at function
+bad_orr_not_xzr:
+// CHECK-LABEL: GS-PAUTH: non-protected call found in function bad_orr_not_xzr, basic block {{[^,]+}}, at address
+// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: br x2 # TAILCALL
+// CHECK-NEXT: The 1 instructions that write to the affected registers after any authentication are:
+// CHECK-NEXT: 1. {{[0-9a-f]+}}: orr x2, x1, x0
+// CHECK-NEXT: This happens in the following basic block:
+// CHECK-NEXT: {{[0-9a-f]+}}: adrp x0, #{{(0x)?[0-9a-f]+}}
+// CHECK-NEXT: {{[0-9a-f]+}}: mov x1, #0
+// CHECK-NEXT: {{[0-9a-f]+}}: orr x2, x1, x0
+// CHECK-NEXT: {{[0-9a-f]+}}: br x2 # TAILCALL
+ adrp x0, sym
+ movz x1, #0
+ orr x2, x1, x0
+ br x2
+ .size bad_orr_not_xzr, .-bad_orr_not_xzr
+
+ .globl bad_orr_not_lsl0
+ .type bad_orr_not_lsl0, at function
+bad_orr_not_lsl0:
+// CHECK-LABEL: GS-PAUTH: non-protected call found in function bad_orr_not_lsl0, basic block {{[^,]+}}, at address
+// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: br x2 # TAILCALL
+// CHECK-NEXT: The 1 instructions that write to the affected registers after any authentication are:
+// CHECK-NEXT: 1. {{[0-9a-f]+}}: orr x2, xzr, x0, lsl #1
+// CHECK-NEXT: This happens in the following basic block:
+// CHECK-NEXT: {{[0-9a-f]+}}: adrp x0, #{{(0x)?[0-9a-f]+}}
+// CHECK-NEXT: {{[0-9a-f]+}}: orr x2, xzr, x0, lsl #1
+// CHECK-NEXT: {{[0-9a-f]+}}: br x2 # TAILCALL
+ adrp x0, sym
+ orr x2, xzr, x0, lsl #1
+ br x2
+ .size bad_orr_not_lsl0, .-bad_orr_not_lsl0
+
+// Check that the input register operands of `add`/`mov` is correct.
+
+ .globl bad_add_input_reg
+ .type bad_add_input_reg, at function
+bad_add_input_reg:
+// CHECK-LABEL: GS-PAUTH: non-protected call found in function bad_add_input_reg, basic block {{[^,]+}}, at address
+// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: br x0 # TAILCALL
+// CHECK-NEXT: The 1 instructions that write to the affected registers after any authentication are:
+// CHECK-NEXT: 1. {{[0-9a-f]+}}: add x0, x1, #0x{{[0-9a-f]+}}
+// CHECK-NEXT: This happens in the following basic block:
+// CHECK-NEXT: {{[0-9a-f]+}}: adrp x0, #{{(0x)?[0-9a-f]+}}
+// CHECK-NEXT: {{[0-9a-f]+}}: add x0, x1, #0x{{[0-9a-f]+}}
+// CHECK-NEXT: {{[0-9a-f]+}}: br x0 # TAILCALL
+ adrp x0, sym
+ add x0, x1, :lo12:sym
+ br x0
+ .size bad_add_input_reg, .-bad_add_input_reg
+
+ .globl bad_mov_input_reg
+ .type bad_mov_input_reg, at function
+bad_mov_input_reg:
+// CHECK-LABEL: GS-PAUTH: non-protected call found in function bad_mov_input_reg, basic block {{[^,]+}}, at address
+// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: br x0 # TAILCALL
+// CHECK-NEXT: The 1 instructions that write to the affected registers after any authentication are:
+// CHECK-NEXT: 1. {{[0-9a-f]+}}: mov x0, x1
+// CHECK-NEXT: This happens in the following basic block:
+// CHECK-NEXT: {{[0-9a-f]+}}: adrp x0, #{{(0x)?[0-9a-f]+}}
+// CHECK-NEXT: {{[0-9a-f]+}}: mov x0, x1
+// CHECK-NEXT: {{[0-9a-f]+}}: br x0 # TAILCALL
+ adrp x0, sym
+ mov x0, x1
+ br x0
+ .size bad_mov_input_reg, .-bad_mov_input_reg
+
+ .globl main
+ .type main, at function
+main:
+ mov x0, 0
+ ret
+ .size main, .-main
diff --git a/bolt/test/binary-analysis/AArch64/lit.local.cfg b/bolt/test/binary-analysis/AArch64/lit.local.cfg
index 6ac7d3cc0fec7..54e093566dea9 100644
--- a/bolt/test/binary-analysis/AArch64/lit.local.cfg
+++ b/bolt/test/binary-analysis/AArch64/lit.local.cfg
@@ -1,7 +1,8 @@
if "AArch64" not in config.root.targets:
config.unsupported = True
-flags = "--target=aarch64-linux-gnu -nostartfiles -nostdlib -ffreestanding"
+# -Wl,--no-relax prevents converting ADRP+ADD pairs into NOP+ADR.
+flags = "--target=aarch64-linux-gnu -nostartfiles -nostdlib -ffreestanding -Wl,--no-relax"
config.substitutions.insert(0, ("%cflags", f"%cflags {flags}"))
config.substitutions.insert(0, ("%cxxflags", f"%cxxflags {flags}"))
>From 49c04742e4dd365944d81b4f1ffff67af0ba1afa Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko <atrosinenko at accesssoftek.com>
Date: Wed, 26 Mar 2025 21:06:30 +0300
Subject: [PATCH 2/7] Address review comments
---
.../AArch64/gs-pauth-address-materialization.s | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)
diff --git a/bolt/test/binary-analysis/AArch64/gs-pauth-address-materialization.s b/bolt/test/binary-analysis/AArch64/gs-pauth-address-materialization.s
index cdb26aff505ed..3a875bdb8c770 100644
--- a/bolt/test/binary-analysis/AArch64/gs-pauth-address-materialization.s
+++ b/bolt/test/binary-analysis/AArch64/gs-pauth-address-materialization.s
@@ -6,7 +6,7 @@
//
// Note that while "instructions that write to the affected registers"
// section of the report is still technically correct, it does not necessarily
-// mentions the instructions that are used incorrectly.
+// mention the instructions that are used incorrectly.
//
// FIXME: Switch to PAC* instructions instead of indirect tail call for testing
// if a register is considered safe when detection of signing oracles is
@@ -91,7 +91,8 @@ bad_split_adrp:
br x0
.size bad_split_adrp, .-bad_split_adrp
-// Materialization of absolute addresses is not expected.
+// Materialization of absolute addresses is not handled, as it is not expected
+// to be used by real-world code, but can be supported if needed.
.globl bad_immediate_constant
.type bad_immediate_constant, at function
@@ -139,6 +140,15 @@ good_many_offsets:
br x2
.size good_many_offsets, .-good_many_offsets
+ .globl good_negative_offset
+ .type good_negative_offset, at function
+good_negative_offset:
+// CHECK-NOT: good_negative_offset
+ adr x0, sym
+ sub x1, x0, #8
+ br x1
+ .size good_negative_offset, .-good_negative_offset
+
// MOV Xd, Xm (which is an alias of ORR Xd, XZR, Xm) is handled as part of
// support for address arithmetics, but ORR in general is not.
>From 4d8bd913887570c77b49812bad2075c4ced6c4bb Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko <atrosinenko at accesssoftek.com>
Date: Tue, 1 Apr 2025 15:54:25 +0300
Subject: [PATCH 3/7] Address more review comments
---
bolt/include/bolt/Core/MCPlusBuilder.h | 25 +++++++++----
bolt/lib/Passes/PAuthGadgetScanner.cpp | 35 ++++++++-----------
.../Target/AArch64/AArch64MCPlusBuilder.cpp | 16 ++++++---
.../gs-pauth-address-materialization.s | 3 +-
.../binary-analysis/AArch64/lit.local.cfg | 3 +-
5 files changed, 47 insertions(+), 35 deletions(-)
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
index e59a9145490dd..580ffd365ae77 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -587,18 +587,29 @@ class MCPlusBuilder {
return getNoRegister();
}
- virtual MCPhysReg getSafelyMaterializedAddressReg(const MCInst &Inst) const {
+ /// Returns the register containing an address which is safely materialized
+ /// under Pointer Authentication threat model, or NoRegister otherwise.
+ ///
+ /// The produced address should not be attacker-controlled, assuming an
+ /// attacker is able to modify any writable memory, but not executable code
+ /// (as it should be W^X).
+ virtual MCPhysReg getMaterializedAddressRegForPtrAuth(const MCInst &Inst) const {
llvm_unreachable("not implemented");
return getNoRegister();
}
- /// Analyzes if this instruction can safely perform address arithmetics.
+ /// Analyzes if this instruction can safely perform address arithmetics
+ /// under Pointer Authentication threat model.
+ ///
+ /// If an (OutReg, InReg) pair is returned, then after Inst is executed,
+ /// OutReg is as trusted as InReg is.
///
- /// If the first element of the returned pair is no-register, this instruction
- /// is considered unknown. Otherwise, (output, input) pair is returned,
- /// so that output is as trusted as input is.
- virtual std::pair<MCPhysReg, MCPhysReg>
- analyzeSafeAddressArithmetics(const MCInst &Inst) const {
+ /// The arithmetic instruction is considered safe if OutReg is not attacker-
+ /// controlled, provided InReg and executable code are not. Please note that
+ /// registers other than InReg as well as the contents of memory which is
+ /// writable by the process should be considered attacker-controlled.
+ virtual std::optional<std::pair<MCPhysReg, MCPhysReg>>
+ analyzeAddressArithmeticsForPtrAuth(const MCInst &Inst) const {
llvm_unreachable("not implemented");
return std::make_pair(getNoRegister(), getNoRegister());
}
diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index 16da08551a34d..89745e4d6f094 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -353,8 +353,8 @@ class PacRetAnalysis
// Returns all registers that can be treated as if they are written by an
// authentication instruction.
- SmallVector<MCPhysReg> getAuthenticatedRegs(const MCInst &Point,
- const State &Cur) const {
+ SmallVector<MCPhysReg> getRegsMadeSafeToDeref(const MCInst &Point,
+ const State &Cur) const {
SmallVector<MCPhysReg> Regs;
const MCPhysReg NoReg = BC.MIB->getNoRegister();
@@ -364,17 +364,16 @@ class PacRetAnalysis
Regs.push_back(*AutReg);
// ... a safe address can be materialized, or
- MCPhysReg NewAddrReg = BC.MIB->getSafelyMaterializedAddressReg(Point);
+ MCPhysReg NewAddrReg = BC.MIB->getMaterializedAddressRegForPtrAuth(Point);
if (NewAddrReg != NoReg)
Regs.push_back(NewAddrReg);
// ... an address can be updated in a safe manner, producing the result
// which is as trusted as the input address.
- MCPhysReg ArithResult, ArithSrc;
- std::tie(ArithResult, ArithSrc) =
- BC.MIB->analyzeSafeAddressArithmetics(Point);
- if (ArithResult != NoReg && Cur.SafeToDerefRegs[ArithSrc])
- Regs.push_back(ArithResult);
+ if (auto DstAndSrc = BC.MIB->analyzeAddressArithmeticsForPtrAuth(Point)) {
+ if (Cur.SafeToDerefRegs[DstAndSrc->second])
+ Regs.push_back(DstAndSrc->first);
+ }
return Regs;
}
@@ -403,12 +402,8 @@ class PacRetAnalysis
// before its execution into account, if necessary.
BitVector Clobbered = getClobberedRegs(Point);
- // Compute the set of registers that can be considered as written by
- // an authentication instruction. This includes operations that are
- // *strictly better* than authentication, such as materializing a
- // PC-relative constant.
- SmallVector<MCPhysReg> AuthenticatedOrBetter =
- getAuthenticatedRegs(Point, Cur);
+ SmallVector<MCPhysReg> NewSafeToDerefRegs =
+ getRegsMadeSafeToDeref(Point, Cur);
// Then, compute the state after this instruction is executed.
State Next = Cur;
@@ -423,12 +418,12 @@ class PacRetAnalysis
// After accounting for clobbered registers in general, override the state
// according to authentication and other *special cases* of clobbering.
- // The sub-registers of each authenticated register are also trusted now,
- // but not their super-registers (as they retain untrusted register units).
- BitVector AuthenticatedSubregs(NumRegs);
- for (MCPhysReg AutReg : AuthenticatedOrBetter)
- AuthenticatedSubregs |= BC.MIB->getAliases(AutReg, /*OnlySmaller=*/true);
- for (MCPhysReg Reg : AuthenticatedSubregs.set_bits()) {
+ // The sub-registers are also safe-to-dereference now, but not their
+ // super-registers (as they retain untrusted register units).
+ BitVector NewSafeSubregs(NumRegs);
+ for (MCPhysReg AutReg : NewSafeToDerefRegs)
+ NewSafeSubregs |= BC.MIB->getAliases(AutReg, /*OnlySmaller=*/true);
+ for (MCPhysReg Reg : NewSafeSubregs.set_bits()) {
Next.SafeToDerefRegs.set(Reg);
if (RegsToTrackInstsFor.isTracked(Reg))
lastWritingInsts(Next, Reg).clear();
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index 56ebc3da2a462..7e7f0c8509e69 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -304,30 +304,36 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
}
}
- MCPhysReg getSafelyMaterializedAddressReg(const MCInst &Inst) const override {
+ MCPhysReg getMaterializedAddressRegForPtrAuth(const MCInst &Inst) const override {
switch (Inst.getOpcode()) {
case AArch64::ADR:
case AArch64::ADRP:
+ // These instructions produce an address value based on the information
+ // encoded into the instruction itself (which should reside in a read-only
+ // code memory) and the value of PC register (that is, the location of
+ // this instruction), so the produced value is not attacker-controlled.
return Inst.getOperand(0).getReg();
default:
return getNoRegister();
}
}
- std::pair<MCPhysReg, MCPhysReg>
- analyzeSafeAddressArithmetics(const MCInst &Inst) const override {
+ std::optional<std::pair<MCPhysReg, MCPhysReg>>
+ analyzeAddressArithmeticsForPtrAuth(const MCInst &Inst) const override {
switch (Inst.getOpcode()) {
default:
- return std::make_pair(getNoRegister(), getNoRegister());
+ return std::nullopt;
case AArch64::ADDXri:
case AArch64::SUBXri:
+ // The immediate addend is encoded into the instruction itself, so it is
+ // not attacker-controlled under Pointer Authentication threat model.
return std::make_pair(Inst.getOperand(0).getReg(),
Inst.getOperand(1).getReg());
case AArch64::ORRXrs:
// "mov Xd, Xm" is equivalent to "orr Xd, XZR, Xm, lsl #0"
if (Inst.getOperand(1).getReg() != AArch64::XZR ||
Inst.getOperand(3).getImm() != 0)
- return std::make_pair(getNoRegister(), getNoRegister());
+ return std::nullopt;
return std::make_pair(Inst.getOperand(0).getReg(),
Inst.getOperand(2).getReg());
diff --git a/bolt/test/binary-analysis/AArch64/gs-pauth-address-materialization.s b/bolt/test/binary-analysis/AArch64/gs-pauth-address-materialization.s
index 3a875bdb8c770..08071c238ef13 100644
--- a/bolt/test/binary-analysis/AArch64/gs-pauth-address-materialization.s
+++ b/bolt/test/binary-analysis/AArch64/gs-pauth-address-materialization.s
@@ -1,4 +1,5 @@
-// RUN: %clang %cflags -march=armv8.3-a %s -o %t.exe
+// -Wl,--no-relax prevents converting ADRP+ADD pairs into NOP+ADR.
+// RUN: %clang %cflags -march=armv8.3-a -Wl,--no-relax %s -o %t.exe
// RUN: llvm-bolt-binary-analysis --scanners=pauth %t.exe 2>&1 | FileCheck %s
// Test various patterns that should or should not be considered safe
diff --git a/bolt/test/binary-analysis/AArch64/lit.local.cfg b/bolt/test/binary-analysis/AArch64/lit.local.cfg
index 54e093566dea9..6ac7d3cc0fec7 100644
--- a/bolt/test/binary-analysis/AArch64/lit.local.cfg
+++ b/bolt/test/binary-analysis/AArch64/lit.local.cfg
@@ -1,8 +1,7 @@
if "AArch64" not in config.root.targets:
config.unsupported = True
-# -Wl,--no-relax prevents converting ADRP+ADD pairs into NOP+ADR.
-flags = "--target=aarch64-linux-gnu -nostartfiles -nostdlib -ffreestanding -Wl,--no-relax"
+flags = "--target=aarch64-linux-gnu -nostartfiles -nostdlib -ffreestanding"
config.substitutions.insert(0, ("%cflags", f"%cflags {flags}"))
config.substitutions.insert(0, ("%cxxflags", f"%cxxflags {flags}"))
>From c04ec57a4e38992a8d085e5e934774b989b9be40 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko <atrosinenko at accesssoftek.com>
Date: Tue, 1 Apr 2025 17:13:34 +0300
Subject: [PATCH 4/7] Fix formatting
---
bolt/include/bolt/Core/MCPlusBuilder.h | 3 ++-
bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp | 3 ++-
2 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
index 580ffd365ae77..9a4709db20753 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -593,7 +593,8 @@ class MCPlusBuilder {
/// The produced address should not be attacker-controlled, assuming an
/// attacker is able to modify any writable memory, but not executable code
/// (as it should be W^X).
- virtual MCPhysReg getMaterializedAddressRegForPtrAuth(const MCInst &Inst) const {
+ virtual MCPhysReg
+ getMaterializedAddressRegForPtrAuth(const MCInst &Inst) const {
llvm_unreachable("not implemented");
return getNoRegister();
}
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index 7e7f0c8509e69..f8ae7ce0aa129 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -304,7 +304,8 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
}
}
- MCPhysReg getMaterializedAddressRegForPtrAuth(const MCInst &Inst) const override {
+ MCPhysReg
+ getMaterializedAddressRegForPtrAuth(const MCInst &Inst) const override {
switch (Inst.getOpcode()) {
case AArch64::ADR:
case AArch64::ADRP:
>From 2bf9f8929ea09d0e422915055ede2589cad93e43 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko <atrosinenko at accesssoftek.com>
Date: Wed, 2 Apr 2025 20:16:57 +0300
Subject: [PATCH 5/7] [nit] Rename loop var: AutReg -> SafeReg
---
bolt/lib/Passes/PAuthGadgetScanner.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp b/bolt/lib/Passes/PAuthGadgetScanner.cpp
index 89745e4d6f094..00846247fdc21 100644
--- a/bolt/lib/Passes/PAuthGadgetScanner.cpp
+++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp
@@ -421,8 +421,8 @@ class PacRetAnalysis
// The sub-registers are also safe-to-dereference now, but not their
// super-registers (as they retain untrusted register units).
BitVector NewSafeSubregs(NumRegs);
- for (MCPhysReg AutReg : NewSafeToDerefRegs)
- NewSafeSubregs |= BC.MIB->getAliases(AutReg, /*OnlySmaller=*/true);
+ for (MCPhysReg SafeReg : NewSafeToDerefRegs)
+ NewSafeSubregs |= BC.MIB->getAliases(SafeReg, /*OnlySmaller=*/true);
for (MCPhysReg Reg : NewSafeSubregs.set_bits()) {
Next.SafeToDerefRegs.set(Reg);
if (RegsToTrackInstsFor.isTracked(Reg))
>From c0075d77d909a324c85e7135638625dc7848554f Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko <atrosinenko at accesssoftek.com>
Date: Thu, 3 Apr 2025 17:52:12 +0300
Subject: [PATCH 6/7] Update the description of
getMaterializedAddressRegForPtrAuth
---
bolt/include/bolt/Core/MCPlusBuilder.h | 17 ++++++++++++-----
1 file changed, 12 insertions(+), 5 deletions(-)
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
index 9a4709db20753..6d2e51cb4bd92 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -587,12 +587,19 @@ class MCPlusBuilder {
return getNoRegister();
}
- /// Returns the register containing an address which is safely materialized
- /// under Pointer Authentication threat model, or NoRegister otherwise.
+ /// Returns the register containing an address safely materialized by `Inst`
+ /// under the Pointer Authentication threat model.
///
- /// The produced address should not be attacker-controlled, assuming an
- /// attacker is able to modify any writable memory, but not executable code
- /// (as it should be W^X).
+ /// Returns the register `Inst` writes to if:
+ /// 1. the register is a materialized address, and
+ /// 2. the register has been materialized safely, i.e. cannot be attacker-
+ /// controlled, under the Pointer Authentication threat model.
+ ///
+ /// If the instruction does not write to any register satisfying the above
+ /// two conditions, NoRegister is returned.
+ ///
+ /// The Pointer Authentication threat model assumes an attacker is able to
+ /// modify any writable memory, but not executable code (due to W^X).
virtual MCPhysReg
getMaterializedAddressRegForPtrAuth(const MCInst &Inst) const {
llvm_unreachable("not implemented");
>From f89aa63448e3c096069427df0f8d86da506ee4a1 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko <atrosinenko at accesssoftek.com>
Date: Fri, 4 Apr 2025 19:26:49 +0300
Subject: [PATCH 7/7] Disambiguate the intention of the test cases
---
.../AArch64/gs-pauth-address-materialization.s | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/bolt/test/binary-analysis/AArch64/gs-pauth-address-materialization.s b/bolt/test/binary-analysis/AArch64/gs-pauth-address-materialization.s
index 08071c238ef13..b4dd53a5e3c8d 100644
--- a/bolt/test/binary-analysis/AArch64/gs-pauth-address-materialization.s
+++ b/bolt/test/binary-analysis/AArch64/gs-pauth-address-materialization.s
@@ -152,6 +152,7 @@ good_negative_offset:
// MOV Xd, Xm (which is an alias of ORR Xd, XZR, Xm) is handled as part of
// support for address arithmetics, but ORR in general is not.
+// This restriction may be relaxed in the future.
.globl good_mov_reg
.type good_mov_reg, at function
@@ -176,6 +177,8 @@ bad_orr_not_xzr:
// CHECK-NEXT: {{[0-9a-f]+}}: orr x2, x1, x0
// CHECK-NEXT: {{[0-9a-f]+}}: br x2 # TAILCALL
adrp x0, sym
+ // The generic case of "orr Xd, Xn, Xm" is not allowed so far,
+ // even if Xn is known to be safe
movz x1, #0
orr x2, x1, x0
br x2
@@ -193,6 +196,8 @@ bad_orr_not_lsl0:
// CHECK-NEXT: {{[0-9a-f]+}}: orr x2, xzr, x0, lsl #1
// CHECK-NEXT: {{[0-9a-f]+}}: br x2 # TAILCALL
adrp x0, sym
+ // Currently, the only allowed form of "orr" is that used by "mov Xd, Xn" alias.
+ // This can be relaxed in the future.
orr x2, xzr, x0, lsl #1
br x2
.size bad_orr_not_lsl0, .-bad_orr_not_lsl0
More information about the llvm-commits
mailing list