[llvm] [BOLT][binary-analysis] Add initial pac-ret gadget scanner (PR #122304)

Kristof Beyls via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 22 12:57:51 PST 2025


================
@@ -0,0 +1,918 @@
+// RUN: %clang %cflags -march=armv9.5-a+pauth-lr -mbranch-protection=pac-ret %s %p/../../Inputs/asm_main.c -o %t.exe
+// RUN: llvm-bolt-binary-analysis --scanners=pacret %t.exe 2>&1 | FileCheck %s
+
+        .text
+
+        .globl  f1
+        .type   f1, at function
+f1:
+        paciasp
+        stp     x29, x30, [sp, #-16]!
+        mov     x29, sp
+        bl      g
+        add     x0, x0, #3
+        ldp     x29, x30, [sp], #16
+        // autiasp
+// CHECK-LABEL: GS-PACRET: non-protected ret found in function f1, basic block .LBB{{[0-9]+}}, at address
+// CHECK-NEXT:    The return instruction is     {{[0-9a-f]+}}:       ret
+// CHECK-NEXT:    The 1 instructions that write to the return register after any authentication are:
+// CHECK-NEXT:    1. {{[0-9a-f]+}}: ldp     x29, x30, [sp], #0x10
+// CHECK-NEXT:  This happens in the following basic block:
+// CHECK-NEXT: {{[0-9a-f]+}}:   paciasp
+// CHECK-NEXT: {{[0-9a-f]+}}:   stp     x29, x30, [sp, #-0x10]!
+// CHECK-NEXT: {{[0-9a-f]+}}:   mov     x29, sp
+// CHECK-NEXT: {{[0-9a-f]+}}:   bl      g at PLT
+// CHECK-NEXT: {{[0-9a-f]+}}:   add     x0, x0, #0x3
+// CHECK-NEXT: {{[0-9a-f]+}}:   ldp     x29, x30, [sp], #0x10
+// CHECK-NEXT: {{[0-9a-f]+}}:   ret
+        ret
+        .size f1, .-f1
+
+
+        .globl  f_intermediate_overwrite1
+        .type   f_intermediate_overwrite1, at function
+f_intermediate_overwrite1:
+        paciasp
+        stp     x29, x30, [sp, #-16]!
+        mov     x29, sp
+        bl      g
+        add     x0, x0, #3
+        autiasp
+        ldp     x29, x30, [sp], #16
+// CHECK-LABEL: GS-PACRET: non-protected ret found in function f_intermediate_overwrite1, basic block .LBB
+// CHECK-NEXT:    The return instruction is     {{[0-9a-f]+}}:       ret
+// CHECK-NEXT:    The 1 instructions that write to the return register after any authentication are:
+// CHECK-NEXT:    1. {{[0-9a-f]+}}: ldp     x29, x30, [sp], #0x10
+// CHECK-NEXT:  This happens in the following basic block:
+// CHECK-NEXT: {{[0-9a-f]+}}:   paciasp
+// CHECK-NEXT: {{[0-9a-f]+}}:   stp     x29, x30, [sp, #-0x10]!
+// CHECK-NEXT: {{[0-9a-f]+}}:   mov     x29, sp
+// CHECK-NEXT: {{[0-9a-f]+}}:   bl      g at PLT
+// CHECK-NEXT: {{[0-9a-f]+}}:   add     x0, x0, #0x3
+// CHECK-NEXT: {{[0-9a-f]+}}:   autiasp
+// CHECK-NEXT: {{[0-9a-f]+}}:   ldp     x29, x30, [sp], #0x10
+// CHECK-NEXT: {{[0-9a-f]+}}:   ret
+        ret
+        .size f_intermediate_overwrite1, .-f_intermediate_overwrite1
+
+        .globl  f_intermediate_overwrite2
+        .type   f_intermediate_overwrite2, at function
+f_intermediate_overwrite2:
+        paciasp
+        stp     x29, x30, [sp, #-16]!
+        mov     x29, sp
+        bl      g
+        add     x0, x0, #3
+        ldp     x29, x30, [sp], #16
+        autiasp
+        mov     x30, x0
+// CHECK-LABEL: GS-PACRET: non-protected ret found in function f_intermediate_overwrite2, basic block .LBB{{[0-9]+}}, at address
+// CHECK-NEXT:    The return instruction is     {{[0-9a-f]+}}:       ret
+// CHECK-NEXT:    The 1 instructions that write to the return register after any authentication are:
+// CHECK-NEXT:    1. {{[0-9a-f]+}}: mov     x30, x0
+// CHECK-NEXT:  This happens in the following basic block:
+// CHECK-NEXT: {{[0-9a-f]+}}:   paciasp
+// CHECK-NEXT: {{[0-9a-f]+}}:   stp     x29, x30, [sp, #-0x10]!
+// CHECK-NEXT: {{[0-9a-f]+}}:   mov     x29, sp
+// CHECK-NEXT: {{[0-9a-f]+}}:   bl      g at PLT
+// CHECK-NEXT: {{[0-9a-f]+}}:   add     x0, x0, #0x3
+// CHECK-NEXT: {{[0-9a-f]+}}:   ldp     x29, x30, [sp], #0x10
+// CHECK-NEXT: {{[0-9a-f]+}}:   autiasp
+// CHECK-NEXT: {{[0-9a-f]+}}:   mov     x30, x0
+// CHECK-NEXT: {{[0-9a-f]+}}:   ret
+        ret
+        .size f_intermediate_overwrite2, .-f_intermediate_overwrite2
+
+        .globl  f_intermediate_read
+        .type   f_intermediate_read, at function
+f_intermediate_read:
+        paciasp
+        stp     x29, x30, [sp, #-16]!
+        mov     x29, sp
+        bl      g
+        add     x0, x0, #3
+        ldp     x29, x30, [sp], #16
+        autiasp
+        mov     x0, x30
+// CHECK-NOT: function f_intermediate_read
+        ret
+        .size f_intermediate_read, .-f_intermediate_read
+
+        .globl  f_intermediate_overwrite3
+        .type   f_intermediate_overwrite3, at function
+f_intermediate_overwrite3:
+        paciasp
+        stp     x29, x30, [sp, #-16]!
+        mov     x29, sp
+        bl      g
+        add     x0, x0, #3
+        ldp     x29, x30, [sp], #16
+        autiasp
+        mov     w30, w0
+// CHECK-LABEL: GS-PACRET: non-protected ret found in function f_intermediate_overwrite3, basic block .LBB{{[0-9]+}}, at address
+// CHECK-NEXT:    The return instruction is     {{[0-9a-f]+}}:       ret
+// CHECK-NEXT:    The 1 instructions that write to the return register after any authentication are:
+// CHECK-NEXT:    1. {{[0-9a-f]+}}: mov     w30, w0
+// CHECK-NEXT:  This happens in the following basic block:
+// CHECK-NEXT: {{[0-9a-f]+}}:   paciasp
+// CHECK-NEXT: {{[0-9a-f]+}}:   stp     x29, x30, [sp, #-0x10]!
+// CHECK-NEXT: {{[0-9a-f]+}}:   mov     x29, sp
+// CHECK-NEXT: {{[0-9a-f]+}}:   bl      g at PLT
+// CHECK-NEXT: {{[0-9a-f]+}}:   add     x0, x0, #0x3
+// CHECK-NEXT: {{[0-9a-f]+}}:   ldp     x29, x30, [sp], #0x10
+// CHECK-NEXT: {{[0-9a-f]+}}:   autiasp
+// CHECK-NEXT: {{[0-9a-f]+}}:   mov     w30, w0
+// CHECK-NEXT: {{[0-9a-f]+}}:   ret
+        ret
+        .size f_intermediate_overwrite3, .-f_intermediate_overwrite3
+
+        .globl  f_nonx30_ret
+        .type   f_nonx30_ret, at function
+f_nonx30_ret:
+        paciasp
+        stp     x29, x30, [sp, #-16]!
+        mov     x29, sp
+        bl      g
+        add     x0, x0, #3
+        ldp     x29, x30, [sp], #16
+        mov     x16, x30
+        autiasp
+// CHECK-LABEL: GS-PACRET: non-protected ret found in function f_nonx30_ret, basic block .LBB{{[0-9]+}}, at address
+// CHECK-NEXT:    The return instruction is     {{[0-9a-f]+}}:       ret     x16
+// CHECK-NEXT:    The 1 instructions that write to the return register after any authentication are:
+// CHECK-NEXT:    1. {{[0-9a-f]+}}: mov     x16, x30
+// CHECK-NEXT:  This happens in the following basic block:
+// CHECK-NEXT: {{[0-9a-f]+}}:   paciasp
+// CHECK-NEXT: {{[0-9a-f]+}}:   stp     x29, x30, [sp, #-0x10]!
+// CHECK-NEXT: {{[0-9a-f]+}}:   mov     x29, sp
+// CHECK-NEXT: {{[0-9a-f]+}}:   bl      g at PLT
+// CHECK-NEXT: {{[0-9a-f]+}}:   add     x0, x0, #0x3
+// CHECK-NEXT: {{[0-9a-f]+}}:   ldp     x29, x30, [sp], #0x10
+// CHECK-NEXT: {{[0-9a-f]+}}:   mov     x16, x30
+// CHECK-NEXT: {{[0-9a-f]+}}:   autiasp
+// CHECK-NEXT: {{[0-9a-f]+}}:   ret     x16
+        ret     x16
+        .size f_nonx30_ret, .-f_nonx30_ret
+
+        .globl  f_nonx30_ret_ok
+        .type   f_nonx30_ret_ok, at function
+f_nonx30_ret_ok:
+        paciasp
+        stp     x29, x30, [sp, #-16]!
+        mov     x29, sp
+        bl      g
+        add     x0, x0, #3
+        ldp     x29, x30, [sp], #16
+        // FIXME: Should the scanner understand that an authenticated register (below x30,
+        //        after the autiasp instruction), is OK to be moved to another register
+        //        and then that register being used to return?
+        //        This respects that pac-ret hardening intent, but the scanner currently
+        //        will produce a false positive for this.
+        //        Is it worthwhile to make the scanner more complex for this case?
+        //        So far, scanning many millions of instructions across a linux distro,
+        //        I haven't encountered such an example.
+        //        The ".if 0" block below tests this case and currently fails.
+.if 0
+        autiasp
+        mov     x16, x30
+.else
+        mov     x16, x30
+        autia   x16, sp
+.endif
+// CHECK-NOT: function f_nonx30_ret_ok
+        ret     x16
+        .size f_nonx30_ret_ok, .-f_nonx30_ret_ok
+
----------------
kbeyls wrote:

I haven't really thought this through in detail, but my current feel is that there really isn't anything special about `LR` (or `x30`) versus other registers at function entry.

Sure, in many cases, the value in `LR` will have been produced by executing the `BL` instruction that jumps to this function. But, for example, when the function is called through a tail call, the value of the LR register might be a "non-authenticated value that comes from memory", for example:

```
  str x30, [sp]
  ldr x30, [sp]
  b   f
  ...

f:
  ret x30
```

This form isn't really different from the following form (which probably wouldn't show up in real code, unless in a code generator following very different ABI rules...)
```
  adr x1, continuation_address
  b   f
continuation_address:
  nop
  ...

f:
  ret x1
```

I'm now starting to wonder if the first example above is an example of a false negative with the current implementation of the pac-ret scanner in the PR...

Thinking about how this false negative could be fixed, my immediate ideas are currently limited to:
* Assume on every call (tail call or otherwise) that the value in register `x30` will likely be used to return, and therefore must not be a non-authenticated value coming from memory. This could lead to false positives when the function that is called never uses `x30` to return...
* Alternatively, do 2 separate analyses:
   * first do an analysis for each function, recording which of the register values at function start may get used as a code (or also data?) pointer without first being authenticated.
   * In a second analysis, extend the current pac-ret analysis to also check at calls to a particular function, that there is no intersection between the set of `NonAutClobRegs` and the set of registers computed for that function in the first analysis.
  Of course, this would require knowing at every call, also indirect calls, which the set of possible functions called would be, which seems unlikely to be true in general.


https://github.com/llvm/llvm-project/pull/122304


More information about the llvm-commits mailing list