[llvm] [BOLT][binary-analysis] Add initial pac-ret gadget scanner (PR #122304)
Kristof Beyls via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 22 12:57:51 PST 2025
================
@@ -0,0 +1,918 @@
+// RUN: %clang %cflags -march=armv9.5-a+pauth-lr -mbranch-protection=pac-ret %s %p/../../Inputs/asm_main.c -o %t.exe
+// RUN: llvm-bolt-binary-analysis --scanners=pacret %t.exe 2>&1 | FileCheck %s
+
+ .text
+
+ .globl f1
+ .type f1, at function
+f1:
+ paciasp
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
+ bl g
+ add x0, x0, #3
+ ldp x29, x30, [sp], #16
+ // autiasp
+// CHECK-LABEL: GS-PACRET: non-protected ret found in function f1, basic block .LBB{{[0-9]+}}, at address
+// CHECK-NEXT: The return instruction is {{[0-9a-f]+}}: ret
+// CHECK-NEXT: The 1 instructions that write to the return register after any authentication are:
+// CHECK-NEXT: 1. {{[0-9a-f]+}}: ldp x29, x30, [sp], #0x10
+// CHECK-NEXT: This happens in the following basic block:
+// CHECK-NEXT: {{[0-9a-f]+}}: paciasp
+// CHECK-NEXT: {{[0-9a-f]+}}: stp x29, x30, [sp, #-0x10]!
+// CHECK-NEXT: {{[0-9a-f]+}}: mov x29, sp
+// CHECK-NEXT: {{[0-9a-f]+}}: bl g at PLT
+// CHECK-NEXT: {{[0-9a-f]+}}: add x0, x0, #0x3
+// CHECK-NEXT: {{[0-9a-f]+}}: ldp x29, x30, [sp], #0x10
+// CHECK-NEXT: {{[0-9a-f]+}}: ret
+ ret
+ .size f1, .-f1
+
+
+ .globl f_intermediate_overwrite1
+ .type f_intermediate_overwrite1, at function
+f_intermediate_overwrite1:
+ paciasp
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
+ bl g
+ add x0, x0, #3
+ autiasp
+ ldp x29, x30, [sp], #16
+// CHECK-LABEL: GS-PACRET: non-protected ret found in function f_intermediate_overwrite1, basic block .LBB
+// CHECK-NEXT: The return instruction is {{[0-9a-f]+}}: ret
+// CHECK-NEXT: The 1 instructions that write to the return register after any authentication are:
+// CHECK-NEXT: 1. {{[0-9a-f]+}}: ldp x29, x30, [sp], #0x10
+// CHECK-NEXT: This happens in the following basic block:
+// CHECK-NEXT: {{[0-9a-f]+}}: paciasp
+// CHECK-NEXT: {{[0-9a-f]+}}: stp x29, x30, [sp, #-0x10]!
+// CHECK-NEXT: {{[0-9a-f]+}}: mov x29, sp
+// CHECK-NEXT: {{[0-9a-f]+}}: bl g at PLT
+// CHECK-NEXT: {{[0-9a-f]+}}: add x0, x0, #0x3
+// CHECK-NEXT: {{[0-9a-f]+}}: autiasp
+// CHECK-NEXT: {{[0-9a-f]+}}: ldp x29, x30, [sp], #0x10
+// CHECK-NEXT: {{[0-9a-f]+}}: ret
+ ret
+ .size f_intermediate_overwrite1, .-f_intermediate_overwrite1
+
+ .globl f_intermediate_overwrite2
+ .type f_intermediate_overwrite2, at function
+f_intermediate_overwrite2:
+ paciasp
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
+ bl g
+ add x0, x0, #3
+ ldp x29, x30, [sp], #16
+ autiasp
+ mov x30, x0
+// CHECK-LABEL: GS-PACRET: non-protected ret found in function f_intermediate_overwrite2, basic block .LBB{{[0-9]+}}, at address
+// CHECK-NEXT: The return instruction is {{[0-9a-f]+}}: ret
+// CHECK-NEXT: The 1 instructions that write to the return register after any authentication are:
+// CHECK-NEXT: 1. {{[0-9a-f]+}}: mov x30, x0
+// CHECK-NEXT: This happens in the following basic block:
+// CHECK-NEXT: {{[0-9a-f]+}}: paciasp
+// CHECK-NEXT: {{[0-9a-f]+}}: stp x29, x30, [sp, #-0x10]!
+// CHECK-NEXT: {{[0-9a-f]+}}: mov x29, sp
+// CHECK-NEXT: {{[0-9a-f]+}}: bl g at PLT
+// CHECK-NEXT: {{[0-9a-f]+}}: add x0, x0, #0x3
+// CHECK-NEXT: {{[0-9a-f]+}}: ldp x29, x30, [sp], #0x10
+// CHECK-NEXT: {{[0-9a-f]+}}: autiasp
+// CHECK-NEXT: {{[0-9a-f]+}}: mov x30, x0
+// CHECK-NEXT: {{[0-9a-f]+}}: ret
+ ret
+ .size f_intermediate_overwrite2, .-f_intermediate_overwrite2
+
+ .globl f_intermediate_read
+ .type f_intermediate_read, at function
+f_intermediate_read:
+ paciasp
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
+ bl g
+ add x0, x0, #3
+ ldp x29, x30, [sp], #16
+ autiasp
+ mov x0, x30
+// CHECK-NOT: function f_intermediate_read
+ ret
+ .size f_intermediate_read, .-f_intermediate_read
+
+ .globl f_intermediate_overwrite3
+ .type f_intermediate_overwrite3, at function
+f_intermediate_overwrite3:
+ paciasp
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
+ bl g
+ add x0, x0, #3
+ ldp x29, x30, [sp], #16
+ autiasp
+ mov w30, w0
+// CHECK-LABEL: GS-PACRET: non-protected ret found in function f_intermediate_overwrite3, basic block .LBB{{[0-9]+}}, at address
+// CHECK-NEXT: The return instruction is {{[0-9a-f]+}}: ret
+// CHECK-NEXT: The 1 instructions that write to the return register after any authentication are:
+// CHECK-NEXT: 1. {{[0-9a-f]+}}: mov w30, w0
+// CHECK-NEXT: This happens in the following basic block:
+// CHECK-NEXT: {{[0-9a-f]+}}: paciasp
+// CHECK-NEXT: {{[0-9a-f]+}}: stp x29, x30, [sp, #-0x10]!
+// CHECK-NEXT: {{[0-9a-f]+}}: mov x29, sp
+// CHECK-NEXT: {{[0-9a-f]+}}: bl g at PLT
+// CHECK-NEXT: {{[0-9a-f]+}}: add x0, x0, #0x3
+// CHECK-NEXT: {{[0-9a-f]+}}: ldp x29, x30, [sp], #0x10
+// CHECK-NEXT: {{[0-9a-f]+}}: autiasp
+// CHECK-NEXT: {{[0-9a-f]+}}: mov w30, w0
+// CHECK-NEXT: {{[0-9a-f]+}}: ret
+ ret
+ .size f_intermediate_overwrite3, .-f_intermediate_overwrite3
+
+ .globl f_nonx30_ret
+ .type f_nonx30_ret, at function
+f_nonx30_ret:
+ paciasp
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
+ bl g
+ add x0, x0, #3
+ ldp x29, x30, [sp], #16
+ mov x16, x30
+ autiasp
+// CHECK-LABEL: GS-PACRET: non-protected ret found in function f_nonx30_ret, basic block .LBB{{[0-9]+}}, at address
+// CHECK-NEXT: The return instruction is {{[0-9a-f]+}}: ret x16
+// CHECK-NEXT: The 1 instructions that write to the return register after any authentication are:
+// CHECK-NEXT: 1. {{[0-9a-f]+}}: mov x16, x30
+// CHECK-NEXT: This happens in the following basic block:
+// CHECK-NEXT: {{[0-9a-f]+}}: paciasp
+// CHECK-NEXT: {{[0-9a-f]+}}: stp x29, x30, [sp, #-0x10]!
+// CHECK-NEXT: {{[0-9a-f]+}}: mov x29, sp
+// CHECK-NEXT: {{[0-9a-f]+}}: bl g at PLT
+// CHECK-NEXT: {{[0-9a-f]+}}: add x0, x0, #0x3
+// CHECK-NEXT: {{[0-9a-f]+}}: ldp x29, x30, [sp], #0x10
+// CHECK-NEXT: {{[0-9a-f]+}}: mov x16, x30
+// CHECK-NEXT: {{[0-9a-f]+}}: autiasp
+// CHECK-NEXT: {{[0-9a-f]+}}: ret x16
+ ret x16
+ .size f_nonx30_ret, .-f_nonx30_ret
+
+ .globl f_nonx30_ret_ok
+ .type f_nonx30_ret_ok, at function
+f_nonx30_ret_ok:
+ paciasp
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
+ bl g
+ add x0, x0, #3
+ ldp x29, x30, [sp], #16
+ // FIXME: Should the scanner understand that an authenticated register (below x30,
+ // after the autiasp instruction), is OK to be moved to another register
+ // and then that register being used to return?
+ // This respects that pac-ret hardening intent, but the scanner currently
+ // will produce a false positive for this.
+ // Is it worthwhile to make the scanner more complex for this case?
+ // So far, scanning many millions of instructions across a linux distro,
+ // I haven't encountered such an example.
+ // The ".if 0" block below tests this case and currently fails.
+.if 0
+ autiasp
+ mov x16, x30
+.else
+ mov x16, x30
+ autia x16, sp
+.endif
+// CHECK-NOT: function f_nonx30_ret_ok
+ ret x16
+ .size f_nonx30_ret_ok, .-f_nonx30_ret_ok
+
----------------
kbeyls wrote:
I haven't really thought this through in detail, but my current feel is that there really isn't anything special about `LR` (or `x30`) versus other registers at function entry.
Sure, in many cases, the value in `LR` will have been produced by executing the `BL` instruction that jumps to this function. But, for example, when the function is called through a tail call, the value of the LR register might be a "non-authenticated value that comes from memory", for example:
```
str x30, [sp]
ldr x30, [sp]
b f
...
f:
ret x30
```
This form isn't really different from the following form (which probably wouldn't show up in real code, unless in a code generator following very different ABI rules...)
```
adr x1, continuation_address
b f
continuation_address:
nop
...
f:
ret x1
```
I'm now starting to wonder if the first example above is an example of a false negative with the current implementation of the pac-ret scanner in the PR...
Thinking about how this false negative could be fixed, my immediate ideas are currently limited to:
* Assume on every call (tail call or otherwise) that the value in register `x30` will likely be used to return, and therefore must not be a non-authenticated value coming from memory. This could lead to false positives when the function that is called never uses `x30` to return...
* Alternatively, do 2 separate analyses:
* first do an analysis for each function, recording which of the register values at function start may get used as a code (or also data?) pointer without first being authenticated.
* In a second analysis, extend the current pac-ret analysis to also check at calls to a particular function, that there is no intersection between the set of `NonAutClobRegs` and the set of registers computed for that function in the first analysis.
Of course, this would require knowing at every call, also indirect calls, which the set of possible functions called would be, which seems unlikely to be true in general.
https://github.com/llvm/llvm-project/pull/122304
More information about the llvm-commits
mailing list