[llvm] [BOLT] Extend Inliner to work on functions with Pointer Autentication (PR #162458)
Gergely Bálint via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 19 05:04:59 PST 2025
https://github.com/bgergely0 updated https://github.com/llvm/llvm-project/pull/162458
>From 1c5251fe030925a51ecdc0db4cc81da2be5abe61 Mon Sep 17 00:00:00 2001
From: Gergely Balint <gergely.balint at arm.com>
Date: Wed, 8 Oct 2025 11:17:31 +0000
Subject: [PATCH 1/4] [BOLT] Extend Inliner to work on functions with Pointer
Autentication
The inliner uses DirectSP to check if a function has instructions that
modify the SP. Exceptions are stack Push and Pop instructions.
We can also allow pointer signing and authentication instructions.
The inliner removes the Return instructions from the inlined functions.
If it is a fused pointer-authentication-and-return (e.g. RETAA), we have
to generate a new authentication instruction in place of the Return.
---
bolt/include/bolt/Core/MCPlusBuilder.h | 6 +++
bolt/lib/Passes/Inliner.cpp | 18 ++++++++
.../Target/AArch64/AArch64MCPlusBuilder.cpp | 29 ++++++++++++
bolt/test/AArch64/inline-armv8.3-returns.s | 45 +++++++++++++++++++
4 files changed, 98 insertions(+)
create mode 100644 bolt/test/AArch64/inline-armv8.3-returns.s
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
index 69ae4fb8ddcc9..f543d2e5d79a1 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -632,6 +632,12 @@ class MCPlusBuilder {
return false;
}
+ /// Generate the matching pointer authentication instruction from a fused
+ /// pauth-and-return instruction.
+ virtual void createMatchingAuth(const MCInst &AuthAndRet, MCInst &Auth) {
+ llvm_unreachable("not implemented");
+ }
+
/// Returns the register used as a return address. Returns std::nullopt if
/// not applicable, such as reading the return address from a system register
/// or from the stack.
diff --git a/bolt/lib/Passes/Inliner.cpp b/bolt/lib/Passes/Inliner.cpp
index 9b28c7efde5bf..913ff3d554a5b 100644
--- a/bolt/lib/Passes/Inliner.cpp
+++ b/bolt/lib/Passes/Inliner.cpp
@@ -195,6 +195,13 @@ InliningInfo getInliningInfo(const BinaryFunction &BF) {
if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst))
continue;
+ // Pointer signing and authenticatin instructions are used around
+ // Push and Pop. These are also straightforward to handle.
+ if (BC.isAArch64() &&
+ (BC.MIB->isPSignOnLR(Inst) || BC.MIB->isPAuthOnLR(Inst) ||
+ BC.MIB->isPAuthAndRet(Inst)))
+ continue;
+
DirectSP |= BC.MIB->hasDefOfPhysReg(Inst, SPReg) ||
BC.MIB->hasUseOfPhysReg(Inst, SPReg);
}
@@ -338,6 +345,17 @@ Inliner::inlineCall(BinaryBasicBlock &CallerBB,
BC.Ctx.get());
}
+ // Handling fused authentication and return instructions (Armv8.3-A):
+ // if the Return here is RETA(A|B), we have to keep the authentication
+ // part.
+ // RETAA -> AUTIASP + RET
+ // RETAB -> AUTIBSP + RET
+ if (BC.isAArch64() && BC.MIB->isPAuthAndRet(Inst)) {
+ MCInst Auth;
+ BC.MIB->createMatchingAuth(Inst, Auth);
+ InsertII =
+ std::next(InlinedBB->insertInstruction(InsertII, std::move(Auth)));
+ }
if (CSIsTailCall || (!MIB.isCall(Inst) && !MIB.isReturn(Inst))) {
InsertII =
std::next(InlinedBB->insertInstruction(InsertII, std::move(Inst)));
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index db3989d6b0b5f..38b2ca710c539 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -313,6 +313,35 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
Inst.getOpcode() == AArch64::RETABSPPCr;
}
+ void createMatchingAuth(const MCInst &AuthAndRet, MCInst &Auth) override {
+ assert(isPAuthAndRet(AuthAndRet) &&
+ "Not a fused pauth-and-return instruction");
+
+ Auth.clear();
+ switch (AuthAndRet.getOpcode()) {
+ case AArch64::RETAA:
+ Auth.setOpcode(AArch64::AUTIASP);
+ break;
+ case AArch64::RETAB:
+ Auth.setOpcode(AArch64::AUTIBSP);
+ break;
+ case AArch64::RETAASPPCi:
+ Auth.setOpcode(AArch64::AUTIASPPCi);
+ break;
+ case AArch64::RETABSPPCi:
+ Auth.setOpcode(AArch64::AUTIBSPPCi);
+ break;
+ case AArch64::RETAASPPCr:
+ Auth.setOpcode(AArch64::AUTIASPPCr);
+ break;
+ case AArch64::RETABSPPCr:
+ Auth.setOpcode(AArch64::AUTIBSPPCr);
+ break;
+ default:
+ llvm_unreachable("Unhandled fused pauth-and-return instruction");
+ }
+ }
+
std::optional<MCPhysReg> getSignedReg(const MCInst &Inst) const override {
switch (Inst.getOpcode()) {
case AArch64::PACIA:
diff --git a/bolt/test/AArch64/inline-armv8.3-returns.s b/bolt/test/AArch64/inline-armv8.3-returns.s
new file mode 100644
index 0000000000000..055b589476caf
--- /dev/null
+++ b/bolt/test/AArch64/inline-armv8.3-returns.s
@@ -0,0 +1,45 @@
+# This test checks that inlining functions with fused pointer-auth-and-return
+# instructions is properly handled by BOLT.
+
+# REQUIRES: system-linux
+
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown -mattr=+v8.3a %s -o %t.o
+# RUN: %clang %cflags -O0 %t.o -o %t.exe -Wl,-q
+# RUN: llvm-bolt --inline-all --print-inline --print-only=_Z3barP1A \
+# RUN: %t.exe -o %t.bolt | FileCheck %s
+
+# CHECK: BOLT-INFO: inlined 0 calls at 1 call sites in 2 iteration(s). Change in binary size: 8 bytes.
+# CHECK: Binary Function "_Z3barP1A" after inlining {
+# CHECK-NOT: bl _Z3fooP1A
+# CHECK: ldr x8, [x0]
+# CHECK-NEXT: ldr w0, [x8]
+# CHECK-NEXT: autiasp
+
+ .text
+ .globl _Z3fooP1A
+ .type _Z3fooP1A, at function
+_Z3fooP1A:
+ paciasp
+ ldr x8, [x0]
+ ldr w0, [x8]
+ retaa
+ .size _Z3fooP1A, .-_Z3fooP1A
+
+ .globl _Z3barP1A
+ .type _Z3barP1A, at function
+_Z3barP1A:
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
+ bl _Z3fooP1A
+ mul w0, w0, w0
+ ldp x29, x30, [sp], #16
+ ret
+ .size _Z3barP1A, .-_Z3barP1A
+
+ .globl main
+ .p2align 2
+ .type main, at function
+main:
+ mov w0, wzr
+ ret
+ .size main, .-main
>From 754a053c3de66c2af1fae5f722f70a294b01bf55 Mon Sep 17 00:00:00 2001
From: Gergely Balint <gergely.balint at arm.com>
Date: Tue, 18 Nov 2025 10:11:46 +0000
Subject: [PATCH 2/4] [BOLT] Fix when inlining into a context with a tailcall
When inlining to a call site with a tailcall, the return in the inlined
block does not get removed. Because of this, we don't have to generate
the matching authentication.
Add test for this case.
---
bolt/lib/Passes/Inliner.cpp | 7 ++--
bolt/test/AArch64/inline-armv8.3-tailcall.s | 46 +++++++++++++++++++++
2 files changed, 50 insertions(+), 3 deletions(-)
create mode 100644 bolt/test/AArch64/inline-armv8.3-tailcall.s
diff --git a/bolt/lib/Passes/Inliner.cpp b/bolt/lib/Passes/Inliner.cpp
index 913ff3d554a5b..b83946f7b34b5 100644
--- a/bolt/lib/Passes/Inliner.cpp
+++ b/bolt/lib/Passes/Inliner.cpp
@@ -346,11 +346,12 @@ Inliner::inlineCall(BinaryBasicBlock &CallerBB,
}
// Handling fused authentication and return instructions (Armv8.3-A):
- // if the Return here is RETA(A|B), we have to keep the authentication
- // part.
+ // if the Callee does not end in a tailcall, the return will be removed
+ // from the inlined block. If that return is RETA(A|B), we have to keep
+ // the authentication part.
// RETAA -> AUTIASP + RET
// RETAB -> AUTIBSP + RET
- if (BC.isAArch64() && BC.MIB->isPAuthAndRet(Inst)) {
+ if (!CSIsTailCall && BC.isAArch64() && BC.MIB->isPAuthAndRet(Inst)) {
MCInst Auth;
BC.MIB->createMatchingAuth(Inst, Auth);
InsertII =
diff --git a/bolt/test/AArch64/inline-armv8.3-tailcall.s b/bolt/test/AArch64/inline-armv8.3-tailcall.s
new file mode 100644
index 0000000000000..78e7285fbf101
--- /dev/null
+++ b/bolt/test/AArch64/inline-armv8.3-tailcall.s
@@ -0,0 +1,46 @@
+# This test checks that inlining functions with fused pointer-auth-and-return
+# instructions into a location with a tailcall is properly handled by BOLT.
+# Because _Z3barP1A ends in a tailcall, we don't remove the return instruction
+# from the inlined block. Therefore, we should see a retaa, and not an autiasp.
+
+# REQUIRES: system-linux
+
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown -mattr=+v8.3a %s -o %t.o
+# RUN: %clang %cflags -O0 %t.o -o %t.exe -Wl,-q
+# RUN: llvm-bolt --inline-all --print-inline --print-only=_Z3barP1A \
+# RUN: %t.exe -o %t.bolt | FileCheck %s
+
+# CHECK: BOLT-INFO: inlined 0 calls at 1 call sites in 2 iteration(s). Change in binary size: 12 bytes.
+# CHECK: Binary Function "_Z3barP1A" after inlining {
+# CHECK-NOT: bl _Z3fooP1A
+# CHECK: mov x29, sp
+# CHECK-NEXT: paciasp
+# CHECK-NEXT: ldr x8, [x0]
+# CHECK-NEXT: ldr w0, [x8]
+# CHECK-NEXT: retaa
+
+ .text
+ .globl _Z3fooP1A
+ .type _Z3fooP1A, at function
+_Z3fooP1A:
+ paciasp
+ ldr x8, [x0]
+ ldr w0, [x8]
+ retaa
+ .size _Z3fooP1A, .-_Z3fooP1A
+
+ .globl _Z3barP1A
+ .type _Z3barP1A, at function
+_Z3barP1A:
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
+ b _Z3fooP1A // tailcall
+ .size _Z3barP1A, .-_Z3barP1A
+
+ .globl main
+ .p2align 2
+ .type main, at function
+main:
+ mov w0, wzr
+ ret
+ .size main, .-main
>From d0943f10362603834040fb6e7e47d3d4ebfd4c34 Mon Sep 17 00:00:00 2001
From: Gergely Balint <gergely.balint at arm.com>
Date: Wed, 19 Nov 2025 10:09:41 +0000
Subject: [PATCH 3/4] [BOLT] Fix: copy operands of MCInst in createMatchingAuth
- some PAuthAndRet variants need operands, so we need to copy them from
the to-be-removed MCInst to the new one
- remove extra assertion
- add unittest about inlining an Armv9.5-A PAuthAndRet variant (with the
operand copy).
---
.../Target/AArch64/AArch64MCPlusBuilder.cpp | 4 +-
bolt/test/AArch64/inline-pauth-lr.s | 45 +++++++++++++++++++
2 files changed, 46 insertions(+), 3 deletions(-)
create mode 100644 bolt/test/AArch64/inline-pauth-lr.s
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index 38b2ca710c539..d4fd4b78f382d 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -314,10 +314,8 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
}
void createMatchingAuth(const MCInst &AuthAndRet, MCInst &Auth) override {
- assert(isPAuthAndRet(AuthAndRet) &&
- "Not a fused pauth-and-return instruction");
-
Auth.clear();
+ Auth.setOperands(AuthAndRet.getOperands());
switch (AuthAndRet.getOpcode()) {
case AArch64::RETAA:
Auth.setOpcode(AArch64::AUTIASP);
diff --git a/bolt/test/AArch64/inline-pauth-lr.s b/bolt/test/AArch64/inline-pauth-lr.s
new file mode 100644
index 0000000000000..0ec0ab893bbef
--- /dev/null
+++ b/bolt/test/AArch64/inline-pauth-lr.s
@@ -0,0 +1,45 @@
+# This test checks that inlining functions with fused pointer-auth-and-return
+# instructions is properly handled by BOLT.
+
+# REQUIRES: system-linux
+
+# RUN: %clang %cflags -march=armv9.5-a+pauth-lr -O0 %s -o %t.exe -Wl,-q
+# RUN: llvm-bolt --inline-all --print-inline --print-only=_Z3barP1A \
+# RUN: %t.exe -o %t.bolt | FileCheck %s
+
+# CHECK: BOLT-INFO: inlined 0 calls at 1 call sites in 2 iteration(s). Change in binary size: 8 bytes.
+# CHECK: Binary Function "_Z3barP1A" after inlining {
+# CHECK-NOT: bl _Z3fooP1A
+# CHECK: paciasppc
+# CHECK-NEXT: ldr x8, [x0]
+# CHECK-NEXT: ldr w0, [x8]
+# CHECK-NEXT: autiasppcr x28
+
+ .text
+ .globl _Z3fooP1A
+ .type _Z3fooP1A, at function
+_Z3fooP1A:
+ paciasppc
+ ldr x8, [x0]
+ ldr w0, [x8]
+ retaasppcr x28
+ .size _Z3fooP1A, .-_Z3fooP1A
+
+ .globl _Z3barP1A
+ .type _Z3barP1A, at function
+_Z3barP1A:
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
+ bl _Z3fooP1A
+ mul w0, w0, w0
+ ldp x29, x30, [sp], #16
+ ret
+ .size _Z3barP1A, .-_Z3barP1A
+
+ .globl main
+ .p2align 2
+ .type main, at function
+main:
+ mov w0, wzr
+ ret
+ .size main, .-main
>From 267c93a90176ce20a6263e7995d4dcae9d0fc84b Mon Sep 17 00:00:00 2001
From: Gergely Balint <gergely.balint at arm.com>
Date: Wed, 19 Nov 2025 13:02:23 +0000
Subject: [PATCH 4/4] [BOLT] Add unittest for inliner using retaasppc <label>
---
bolt/test/AArch64/inline-pauth-lr.s | 22 +++++++++++++++++++---
1 file changed, 19 insertions(+), 3 deletions(-)
diff --git a/bolt/test/AArch64/inline-pauth-lr.s b/bolt/test/AArch64/inline-pauth-lr.s
index 0ec0ab893bbef..34f05721d5ea0 100644
--- a/bolt/test/AArch64/inline-pauth-lr.s
+++ b/bolt/test/AArch64/inline-pauth-lr.s
@@ -1,5 +1,5 @@
-# This test checks that inlining functions with fused pointer-auth-and-return
-# instructions is properly handled by BOLT.
+# This test checks that inlining functions with the pauth-lr variants of
+# fused pointer-auth-and-return instructions is properly handled by BOLT.
# REQUIRES: system-linux
@@ -7,13 +7,17 @@
# RUN: llvm-bolt --inline-all --print-inline --print-only=_Z3barP1A \
# RUN: %t.exe -o %t.bolt | FileCheck %s
-# CHECK: BOLT-INFO: inlined 0 calls at 1 call sites in 2 iteration(s). Change in binary size: 8 bytes.
+# CHECK: BOLT-INFO: inlined 0 calls at 2 call sites in 2 iteration(s). Change in binary size: 16 bytes.
# CHECK: Binary Function "_Z3barP1A" after inlining {
# CHECK-NOT: bl _Z3fooP1A
# CHECK: paciasppc
# CHECK-NEXT: ldr x8, [x0]
# CHECK-NEXT: ldr w0, [x8]
# CHECK-NEXT: autiasppcr x28
+# CHECK-NEXT: paciasppc
+# CHECK-NEXT: ldr x7, [x0]
+# CHECK-NEXT: ldr w0, [x7]
+# CHECK-NEXT: autiasppc _Z3bazP1A
.text
.globl _Z3fooP1A
@@ -25,12 +29,24 @@ _Z3fooP1A:
retaasppcr x28
.size _Z3fooP1A, .-_Z3fooP1A
+ .text
+ .globl _Z3bazP1A
+ .type _Z3bazP1A, at function
+_Z3bazP1A:
+0:
+ paciasppc
+ ldr x7, [x0]
+ ldr w0, [x7]
+ retaasppc 0b
+ .size _Z3bazP1A, .-_Z3bazP1A
+
.globl _Z3barP1A
.type _Z3barP1A, at function
_Z3barP1A:
stp x29, x30, [sp, #-16]!
mov x29, sp
bl _Z3fooP1A
+ bl _Z3bazP1A
mul w0, w0, w0
ldp x29, x30, [sp], #16
ret
More information about the llvm-commits
mailing list