[llvm] [BOLT][binary-analysis] Add initial pac-ret gadget scanner (PR #122304)
Jacob Bramley via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 23 03:17:53 PST 2025
================
@@ -0,0 +1,918 @@
+// RUN: %clang %cflags -march=armv9.5-a+pauth-lr -mbranch-protection=pac-ret %s %p/../../Inputs/asm_main.c -o %t.exe
+// RUN: llvm-bolt-binary-analysis --scanners=pacret %t.exe 2>&1 | FileCheck %s
+
+ .text
+
+ .globl f1
+ .type f1, at function
+f1:
+ paciasp
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
+ bl g
+ add x0, x0, #3
+ ldp x29, x30, [sp], #16
+ // autiasp
+// CHECK-LABEL: GS-PACRET: non-protected ret found in function f1, basic block .LBB{{[0-9]+}}, at address
+// CHECK-NEXT: The return instruction is {{[0-9a-f]+}}: ret
+// CHECK-NEXT: The 1 instructions that write to the return register after any authentication are:
+// CHECK-NEXT: 1. {{[0-9a-f]+}}: ldp x29, x30, [sp], #0x10
+// CHECK-NEXT: This happens in the following basic block:
+// CHECK-NEXT: {{[0-9a-f]+}}: paciasp
+// CHECK-NEXT: {{[0-9a-f]+}}: stp x29, x30, [sp, #-0x10]!
+// CHECK-NEXT: {{[0-9a-f]+}}: mov x29, sp
+// CHECK-NEXT: {{[0-9a-f]+}}: bl g at PLT
+// CHECK-NEXT: {{[0-9a-f]+}}: add x0, x0, #0x3
+// CHECK-NEXT: {{[0-9a-f]+}}: ldp x29, x30, [sp], #0x10
+// CHECK-NEXT: {{[0-9a-f]+}}: ret
+ ret
+ .size f1, .-f1
+
+
+ .globl f_intermediate_overwrite1
+ .type f_intermediate_overwrite1, at function
+f_intermediate_overwrite1:
+ paciasp
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
+ bl g
+ add x0, x0, #3
+ autiasp
+ ldp x29, x30, [sp], #16
+// CHECK-LABEL: GS-PACRET: non-protected ret found in function f_intermediate_overwrite1, basic block .LBB
+// CHECK-NEXT: The return instruction is {{[0-9a-f]+}}: ret
+// CHECK-NEXT: The 1 instructions that write to the return register after any authentication are:
+// CHECK-NEXT: 1. {{[0-9a-f]+}}: ldp x29, x30, [sp], #0x10
+// CHECK-NEXT: This happens in the following basic block:
+// CHECK-NEXT: {{[0-9a-f]+}}: paciasp
+// CHECK-NEXT: {{[0-9a-f]+}}: stp x29, x30, [sp, #-0x10]!
+// CHECK-NEXT: {{[0-9a-f]+}}: mov x29, sp
+// CHECK-NEXT: {{[0-9a-f]+}}: bl g at PLT
+// CHECK-NEXT: {{[0-9a-f]+}}: add x0, x0, #0x3
+// CHECK-NEXT: {{[0-9a-f]+}}: autiasp
+// CHECK-NEXT: {{[0-9a-f]+}}: ldp x29, x30, [sp], #0x10
+// CHECK-NEXT: {{[0-9a-f]+}}: ret
+ ret
+ .size f_intermediate_overwrite1, .-f_intermediate_overwrite1
+
+ .globl f_intermediate_overwrite2
+ .type f_intermediate_overwrite2, at function
+f_intermediate_overwrite2:
+ paciasp
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
+ bl g
+ add x0, x0, #3
+ ldp x29, x30, [sp], #16
+ autiasp
+ mov x30, x0
+// CHECK-LABEL: GS-PACRET: non-protected ret found in function f_intermediate_overwrite2, basic block .LBB{{[0-9]+}}, at address
+// CHECK-NEXT: The return instruction is {{[0-9a-f]+}}: ret
+// CHECK-NEXT: The 1 instructions that write to the return register after any authentication are:
+// CHECK-NEXT: 1. {{[0-9a-f]+}}: mov x30, x0
+// CHECK-NEXT: This happens in the following basic block:
+// CHECK-NEXT: {{[0-9a-f]+}}: paciasp
+// CHECK-NEXT: {{[0-9a-f]+}}: stp x29, x30, [sp, #-0x10]!
+// CHECK-NEXT: {{[0-9a-f]+}}: mov x29, sp
+// CHECK-NEXT: {{[0-9a-f]+}}: bl g at PLT
+// CHECK-NEXT: {{[0-9a-f]+}}: add x0, x0, #0x3
+// CHECK-NEXT: {{[0-9a-f]+}}: ldp x29, x30, [sp], #0x10
+// CHECK-NEXT: {{[0-9a-f]+}}: autiasp
+// CHECK-NEXT: {{[0-9a-f]+}}: mov x30, x0
+// CHECK-NEXT: {{[0-9a-f]+}}: ret
+ ret
+ .size f_intermediate_overwrite2, .-f_intermediate_overwrite2
+
+ .globl f_intermediate_read
+ .type f_intermediate_read, at function
+f_intermediate_read:
+ paciasp
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
+ bl g
+ add x0, x0, #3
+ ldp x29, x30, [sp], #16
+ autiasp
+ mov x0, x30
+// CHECK-NOT: function f_intermediate_read
+ ret
+ .size f_intermediate_read, .-f_intermediate_read
+
+ .globl f_intermediate_overwrite3
+ .type f_intermediate_overwrite3, at function
+f_intermediate_overwrite3:
+ paciasp
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
+ bl g
+ add x0, x0, #3
+ ldp x29, x30, [sp], #16
+ autiasp
+ mov w30, w0
+// CHECK-LABEL: GS-PACRET: non-protected ret found in function f_intermediate_overwrite3, basic block .LBB{{[0-9]+}}, at address
+// CHECK-NEXT: The return instruction is {{[0-9a-f]+}}: ret
+// CHECK-NEXT: The 1 instructions that write to the return register after any authentication are:
+// CHECK-NEXT: 1. {{[0-9a-f]+}}: mov w30, w0
+// CHECK-NEXT: This happens in the following basic block:
+// CHECK-NEXT: {{[0-9a-f]+}}: paciasp
+// CHECK-NEXT: {{[0-9a-f]+}}: stp x29, x30, [sp, #-0x10]!
+// CHECK-NEXT: {{[0-9a-f]+}}: mov x29, sp
+// CHECK-NEXT: {{[0-9a-f]+}}: bl g at PLT
+// CHECK-NEXT: {{[0-9a-f]+}}: add x0, x0, #0x3
+// CHECK-NEXT: {{[0-9a-f]+}}: ldp x29, x30, [sp], #0x10
+// CHECK-NEXT: {{[0-9a-f]+}}: autiasp
+// CHECK-NEXT: {{[0-9a-f]+}}: mov w30, w0
+// CHECK-NEXT: {{[0-9a-f]+}}: ret
+ ret
+ .size f_intermediate_overwrite3, .-f_intermediate_overwrite3
+
+ .globl f_nonx30_ret
+ .type f_nonx30_ret, at function
+f_nonx30_ret:
+ paciasp
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
+ bl g
+ add x0, x0, #3
+ ldp x29, x30, [sp], #16
+ mov x16, x30
+ autiasp
+// CHECK-LABEL: GS-PACRET: non-protected ret found in function f_nonx30_ret, basic block .LBB{{[0-9]+}}, at address
+// CHECK-NEXT: The return instruction is {{[0-9a-f]+}}: ret x16
+// CHECK-NEXT: The 1 instructions that write to the return register after any authentication are:
+// CHECK-NEXT: 1. {{[0-9a-f]+}}: mov x16, x30
+// CHECK-NEXT: This happens in the following basic block:
+// CHECK-NEXT: {{[0-9a-f]+}}: paciasp
+// CHECK-NEXT: {{[0-9a-f]+}}: stp x29, x30, [sp, #-0x10]!
+// CHECK-NEXT: {{[0-9a-f]+}}: mov x29, sp
+// CHECK-NEXT: {{[0-9a-f]+}}: bl g at PLT
+// CHECK-NEXT: {{[0-9a-f]+}}: add x0, x0, #0x3
+// CHECK-NEXT: {{[0-9a-f]+}}: ldp x29, x30, [sp], #0x10
+// CHECK-NEXT: {{[0-9a-f]+}}: mov x16, x30
+// CHECK-NEXT: {{[0-9a-f]+}}: autiasp
+// CHECK-NEXT: {{[0-9a-f]+}}: ret x16
+ ret x16
+ .size f_nonx30_ret, .-f_nonx30_ret
+
+ .globl f_nonx30_ret_ok
+ .type f_nonx30_ret_ok, at function
+f_nonx30_ret_ok:
+ paciasp
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
+ bl g
+ add x0, x0, #3
+ ldp x29, x30, [sp], #16
+ // FIXME: Should the scanner understand that an authenticated register (below x30,
+ // after the autiasp instruction), is OK to be moved to another register
+ // and then that register being used to return?
+ // This respects that pac-ret hardening intent, but the scanner currently
+ // will produce a false positive for this.
+ // Is it worthwhile to make the scanner more complex for this case?
+ // So far, scanning many millions of instructions across a linux distro,
+ // I haven't encountered such an example.
+ // The ".if 0" block below tests this case and currently fails.
+.if 0
+ autiasp
+ mov x16, x30
+.else
+ mov x16, x30
+ autia x16, sp
+.endif
+// CHECK-NOT: function f_nonx30_ret_ok
+ ret x16
+ .size f_nonx30_ret_ok, .-f_nonx30_ret_ok
+
----------------
jacobbramley wrote:
There is _something_ special about `lr`, which is that it is written by `bl` or `blr`, and we usually expect that to be the return address at a function's entry point. With `pac-ret`, functions have to trust their provided `lr`:
```
fn:
# If `lr` is bad on entry, it still signs and authenticates correctly.
paciasp
...
autiasp
ret
```
Note that without `+leaf`, we don't sign or authenticate `lr` at all. The threat we're guarding against here is that the return address is overwritten between the function prologue and epilogue, not that the return address was invalid in the first place.
For the example highlighted by @atrosinenko, where `x1` is passed, then returned to, but never stored to memory, I think I'd prefer if it did raise an error.
- `f_nonx30_ret_non_auted` is a function that cannot be conventionally called with `bl` or `blr`, so is highly likely to be doing some weird control flow that justifies attention.
- It seems more likely that control-flow gadgets will be available to feed an attacker-controlled `x1` to a function than for `lr`.
- It's not AAPCS64, and arguably not `pac-ret`.
That said, it's correct that the scanner doesn't raise an error here:
```
fn:
paciasp
...
autia x0, sp
ret x0
```
Although unusual, the fact that it returns to `x0` doesn't change anything in terms of trust. It still trusts `lr` on function entry.
To summarise, I think this scanner should accept `ret <reg>` where any of the following are true:
- `<reg>` is `lr` and has not been modified during the function.
- `<reg>` is authenticated.
This leaves false negatives like this:
```
fn:
pacia x0, sp
...
autia lr, sp
ret
```
... but identifying that in general could be very difficult.
----
For forward-edge PAuth (like PAuth ABI), the threat model is slightly different, and attack could take place any time between pointer generation (e.g. by the runtime loader) and the point of use. For that reason, we'd have to authenticate every pointer, and that'd require a second `RegsToTrackInstsFor`, but it's currently out of scope of this scanner.
https://github.com/llvm/llvm-project/pull/122304
More information about the llvm-commits
mailing list