[llvm] [BOLT] Gadget scanner: Detect address materialization and arithmetics (PR #132540)

Anatoly Trosinenko via llvm-commits llvm-commits at lists.llvm.org
Fri Apr 4 08:51:44 PDT 2025


================
@@ -0,0 +1,228 @@
+// RUN: %clang %cflags -march=armv8.3-a %s -o %t.exe
+// RUN: llvm-bolt-binary-analysis --scanners=pauth %t.exe 2>&1 | FileCheck %s
+
+// Test various patterns that should or should not be considered safe
+// materialization of PC-relative addresses.
+//
+// Note that while "instructions that write to the affected registers"
+// section of the report is still technically correct, it does not necessarily
+// mentions the instructions that are used incorrectly.
+//
+// FIXME: Switch to PAC* instructions instead of indirect tail call for testing
+//        if a register is considered safe when detection of signing oracles is
+//        implemented, as it is more traditional usage of PC-relative constants.
+//        Moreover, using PAC instructions would improve test robustness, as
+//        handling of *calls* can be influenced by what BOLT classifies as a
+//        tail call, for example.
+
+        .text
+
+// Define a function that is reachable by ADR instruction.
+        .type   sym, at function
+sym:
+        ret
+        .size   sym, .-sym
+
+        .globl  good_adr
+        .type   good_adr, at function
+good_adr:
+// CHECK-NOT: good_adr
+        adr     x0, sym
+        br      x0
+        .size   good_adr, .-good_adr
+
+        .globl  good_adrp
+        .type   good_adrp, at function
+good_adrp:
+// CHECK-NOT: good_adrp
+        adrp    x0, sym
+        br      x0
+        .size   good_adrp, .-good_adrp
+
+        .globl  good_adrp_add
+        .type   good_adrp_add, at function
+good_adrp_add:
+// CHECK-NOT: good_adrp_add
+        adrp    x0, sym
+        add     x0, x0, :lo12:sym
+        br      x0
+        .size   good_adrp_add, .-good_adrp_add
+
+        .globl  good_adrp_add_with_const_offset
+        .type   good_adrp_add_with_const_offset, at function
+good_adrp_add_with_const_offset:
+// CHECK-NOT: good_adrp_add_with_const_offset
+        adrp    x0, sym
+        add     x0, x0, :lo12:sym
+        add     x0, x0, #8
+        br      x0
+        .size   good_adrp_add_with_const_offset, .-good_adrp_add_with_const_offset
+
+        .globl  bad_adrp_with_nonconst_offset
+        .type   bad_adrp_with_nonconst_offset, at function
+bad_adrp_with_nonconst_offset:
+// CHECK-LABEL: GS-PAUTH: non-protected call found in function bad_adrp_with_nonconst_offset, basic block {{[^,]+}}, at address
+// CHECK-NEXT:  The instruction is     {{[0-9a-f]+}}:      br      x0 # TAILCALL
+// CHECK-NEXT:  The 1 instructions that write to the affected registers after any authentication are:
+// CHECK-NEXT:  1.     {{[0-9a-f]+}}:      add     x0, x0, x1
+// CHECK-NEXT:  This happens in the following basic block:
+// CHECK-NEXT:  {{[0-9a-f]+}}:   adrp    x0, #{{.*}}
+// CHECK-NEXT:  {{[0-9a-f]+}}:   add     x0, x0, x1
+// CHECK-NEXT:  {{[0-9a-f]+}}:   br      x0 # TAILCALL
+        adrp    x0, sym
+        add     x0, x0, x1
+        br      x0
+        .size   bad_adrp_with_nonconst_offset, .-bad_adrp_with_nonconst_offset
+
+        .globl  bad_split_adrp
+        .type   bad_split_adrp, at function
+bad_split_adrp:
+// CHECK-LABEL: GS-PAUTH: non-protected call found in function bad_split_adrp, basic block {{[^,]+}}, at address
+// CHECK-NEXT:  The instruction is     {{[0-9a-f]+}}:      br      x0 # UNKNOWN CONTROL FLOW
+// CHECK-NEXT:  The 1 instructions that write to the affected registers after any authentication are:
+// CHECK-NEXT:  1.     {{[0-9a-f]+}}:      add     x0, x0, #0x{{[0-9a-f]+}}
+// CHECK-NEXT:  This happens in the following basic block:
+// CHECK-NEXT:  {{[0-9a-f]+}}:   add     x0, x0, #0x{{[0-9a-f]+}}
+// CHECK-NEXT:  {{[0-9a-f]+}}:   br      x0 # UNKNOWN CONTROL FLOW
+        cbz     x2, 1f
+        adrp    x0, sym
+1:
+        add     x0, x0, :lo12:sym
+        br      x0
+        .size   bad_split_adrp, .-bad_split_adrp
+
+// Materialization of absolute addresses is not expected.
+
+        .globl  bad_immediate_constant
+        .type   bad_immediate_constant, at function
+bad_immediate_constant:
+// CHECK-LABEL: GS-PAUTH: non-protected call found in function bad_immediate_constant, basic block {{[^,]+}}, at address
+// CHECK-NEXT:  The instruction is     {{[0-9a-f]+}}:      br      x0 # TAILCALL
+// CHECK-NEXT:  The 1 instructions that write to the affected registers after any authentication are:
+// CHECK-NEXT:  1.     {{[0-9a-f]+}}:      mov     x0, #{{.*}}
+// CHECK-NEXT:  This happens in the following basic block:
+// CHECK-NEXT:  {{[0-9a-f]+}}:   mov     x0, #{{.*}}
+// CHECK-NEXT:  {{[0-9a-f]+}}:   br      x0 # TAILCALL
+        movz    x0, #1234
+        br      x0
----------------
atrosinenko wrote:

I don't have a strong opinion for or against trying to allow every instruction that is expected to be safe, but it should probably be harmless to treat `movz` as yet another address-materializing instruction (in addition to `adr` and `adrp`) and `movk` as yet another case of address arithmetics.

On the other hand, when testing a prototype, I saw a few resign-with-offset instruction sequences that looked completely harmless, but they were reported as signing or authentication oracles. The reason was that the offset did not fit into the immediate operand of `add Xd, Xn, imm`, so it was first placed to register and then `add Xd, Xm, Xn` was emitted. There were very few such reports, but if we would like to eliminate such false-positives, it may be useful to handle "address constants" and "small constant numbers" separately.

https://github.com/llvm/llvm-project/pull/132540


More information about the llvm-commits mailing list