[llvm] [BOLT] Gadget scanner: Detect address materialization and arithmetics (PR #132540)
Anatoly Trosinenko via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 4 08:51:44 PDT 2025
================
@@ -0,0 +1,239 @@
+// -Wl,--no-relax prevents converting ADRP+ADD pairs into NOP+ADR.
+// RUN: %clang %cflags -march=armv8.3-a -Wl,--no-relax %s -o %t.exe
+// RUN: llvm-bolt-binary-analysis --scanners=pauth %t.exe 2>&1 | FileCheck %s
+
+// Test various patterns that should or should not be considered safe
+// materialization of PC-relative addresses.
+//
+// Note that while "instructions that write to the affected registers"
+// section of the report is still technically correct, it does not necessarily
+// mention the instructions that are used incorrectly.
+//
+// FIXME: Switch to PAC* instructions instead of indirect tail call for testing
+// if a register is considered safe when detection of signing oracles is
+// implemented, as it is more traditional usage of PC-relative constants.
+// Moreover, using PAC instructions would improve test robustness, as
+// handling of *calls* can be influenced by what BOLT classifies as a
+// tail call, for example.
+
+ .text
+
+// Define a function that is reachable by ADR instruction.
+ .type sym, at function
+sym:
+ ret
+ .size sym, .-sym
+
+ .globl good_adr
+ .type good_adr, at function
+good_adr:
+// CHECK-NOT: good_adr
+ adr x0, sym
+ br x0
+ .size good_adr, .-good_adr
+
+ .globl good_adrp
+ .type good_adrp, at function
+good_adrp:
+// CHECK-NOT: good_adrp
+ adrp x0, sym
+ br x0
+ .size good_adrp, .-good_adrp
+
+ .globl good_adrp_add
+ .type good_adrp_add, at function
+good_adrp_add:
+// CHECK-NOT: good_adrp_add
+ adrp x0, sym
+ add x0, x0, :lo12:sym
+ br x0
+ .size good_adrp_add, .-good_adrp_add
+
+ .globl good_adrp_add_with_const_offset
+ .type good_adrp_add_with_const_offset, at function
+good_adrp_add_with_const_offset:
+// CHECK-NOT: good_adrp_add_with_const_offset
+ adrp x0, sym
+ add x0, x0, :lo12:sym
+ add x0, x0, #8
+ br x0
+ .size good_adrp_add_with_const_offset, .-good_adrp_add_with_const_offset
+
+ .globl bad_adrp_with_nonconst_offset
+ .type bad_adrp_with_nonconst_offset, at function
+bad_adrp_with_nonconst_offset:
+// CHECK-LABEL: GS-PAUTH: non-protected call found in function bad_adrp_with_nonconst_offset, basic block {{[^,]+}}, at address
+// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: br x0 # TAILCALL
+// CHECK-NEXT: The 1 instructions that write to the affected registers after any authentication are:
+// CHECK-NEXT: 1. {{[0-9a-f]+}}: add x0, x0, x1
+// CHECK-NEXT: This happens in the following basic block:
+// CHECK-NEXT: {{[0-9a-f]+}}: adrp x0, #{{.*}}
+// CHECK-NEXT: {{[0-9a-f]+}}: add x0, x0, x1
+// CHECK-NEXT: {{[0-9a-f]+}}: br x0 # TAILCALL
+ adrp x0, sym
+ add x0, x0, x1
+ br x0
+ .size bad_adrp_with_nonconst_offset, .-bad_adrp_with_nonconst_offset
+
+ .globl bad_split_adrp
+ .type bad_split_adrp, at function
+bad_split_adrp:
+// CHECK-LABEL: GS-PAUTH: non-protected call found in function bad_split_adrp, basic block {{[^,]+}}, at address
+// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: br x0 # UNKNOWN CONTROL FLOW
+// CHECK-NEXT: The 1 instructions that write to the affected registers after any authentication are:
+// CHECK-NEXT: 1. {{[0-9a-f]+}}: add x0, x0, #0x{{[0-9a-f]+}}
+// CHECK-NEXT: This happens in the following basic block:
+// CHECK-NEXT: {{[0-9a-f]+}}: add x0, x0, #0x{{[0-9a-f]+}}
+// CHECK-NEXT: {{[0-9a-f]+}}: br x0 # UNKNOWN CONTROL FLOW
+ cbz x2, 1f
+ adrp x0, sym
+1:
+ add x0, x0, :lo12:sym
+ br x0
+ .size bad_split_adrp, .-bad_split_adrp
+
+// Materialization of absolute addresses is not handled, as it is not expected
+// to be used by real-world code, but can be supported if needed.
+
+ .globl bad_immediate_constant
+ .type bad_immediate_constant, at function
+bad_immediate_constant:
+// CHECK-LABEL: GS-PAUTH: non-protected call found in function bad_immediate_constant, basic block {{[^,]+}}, at address
+// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: br x0 # TAILCALL
+// CHECK-NEXT: The 1 instructions that write to the affected registers after any authentication are:
+// CHECK-NEXT: 1. {{[0-9a-f]+}}: mov x0, #{{.*}}
+// CHECK-NEXT: This happens in the following basic block:
+// CHECK-NEXT: {{[0-9a-f]+}}: mov x0, #{{.*}}
+// CHECK-NEXT: {{[0-9a-f]+}}: br x0 # TAILCALL
+ movz x0, #1234
+ br x0
+ .size bad_immediate_constant, .-bad_immediate_constant
+
+// Any ADR or ADRP instruction followed by any number of increments/decrements
+// by constant is considered safe.
+
+ .globl good_adr_with_add
+ .type good_adr_with_add, at function
+good_adr_with_add:
+// CHECK-NOT: good_adr_with_add
+ adr x0, sym
+ add x0, x0, :lo12:sym
+ br x0
+ .size good_adr_with_add, .-good_adr_with_add
+
+ .globl good_adrp_with_add_non_consecutive
+ .type good_adrp_with_add_non_consecutive, at function
+good_adrp_with_add_non_consecutive:
+// CHECK-NOT: good_adrp_with_add_non_consecutive
+ adrp x0, sym
+ mul x1, x2, x3
+ add x0, x0, :lo12:sym
+ br x0
+ .size good_adrp_with_add_non_consecutive, .-good_adrp_with_add_non_consecutive
+
+ .globl good_many_offsets
+ .type good_many_offsets, at function
+good_many_offsets:
+// CHECK-NOT: good_many_offsets
+ adrp x0, sym
+ add x1, x0, #8
+ add x2, x1, :lo12:sym
+ br x2
+ .size good_many_offsets, .-good_many_offsets
+
+ .globl good_negative_offset
+ .type good_negative_offset, at function
+good_negative_offset:
+// CHECK-NOT: good_negative_offset
+ adr x0, sym
+ sub x1, x0, #8
+ br x1
+ .size good_negative_offset, .-good_negative_offset
+
+// MOV Xd, Xm (which is an alias of ORR Xd, XZR, Xm) is handled as part of
+// support for address arithmetics, but ORR in general is not.
+
+ .globl good_mov_reg
+ .type good_mov_reg, at function
+good_mov_reg:
+// CHECK-NOT: good_mov_reg
+ adrp x0, sym
+ mov x1, x0
+ orr x2, xzr, x1 // the same as "mov x2, x1"
+ br x2
+ .size good_mov_reg, .-good_mov_reg
+
+ .globl bad_orr_not_xzr
+ .type bad_orr_not_xzr, at function
+bad_orr_not_xzr:
+// CHECK-LABEL: GS-PAUTH: non-protected call found in function bad_orr_not_xzr, basic block {{[^,]+}}, at address
+// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: br x2 # TAILCALL
+// CHECK-NEXT: The 1 instructions that write to the affected registers after any authentication are:
+// CHECK-NEXT: 1. {{[0-9a-f]+}}: orr x2, x1, x0
+// CHECK-NEXT: This happens in the following basic block:
+// CHECK-NEXT: {{[0-9a-f]+}}: adrp x0, #{{(0x)?[0-9a-f]+}}
+// CHECK-NEXT: {{[0-9a-f]+}}: mov x1, #0
+// CHECK-NEXT: {{[0-9a-f]+}}: orr x2, x1, x0
+// CHECK-NEXT: {{[0-9a-f]+}}: br x2 # TAILCALL
+ adrp x0, sym
+ movz x1, #0
+ orr x2, x1, x0
+ br x2
+ .size bad_orr_not_xzr, .-bad_orr_not_xzr
----------------
atrosinenko wrote:
IIRC this test case tests the fact that only the very specific usage of `orr` is permitted so far, namely `mov Xd, Xm` which is expanded into `orr Xd, XZR, Xm`. For that purpose, setting `x1` to otherwise safe value was probably intentional: `orr Xd, Xn, Xm` should only be supported with `Xn == XZR`, not any other register, even if it is "safe".
I should probably add an explicit comment describing the intention. It may be worth to relax this in the future, of course.
https://github.com/llvm/llvm-project/pull/132540
More information about the llvm-commits
mailing list