[llvm] support `inline-small-functions` for AArch64 (PR #120187)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 16 22:20:29 PST 2024
https://github.com/liusy58 created https://github.com/llvm/llvm-project/pull/120187
Add some functions in `AArch64MCPlusBuilder.cpp` to support inline for AArch64.
>From 132461cadf930b8d0f7e0bd410178df7b477d1ed Mon Sep 17 00:00:00 2001
From: liusy58 <liusy58 at linux.alibaba.com>
Date: Thu, 12 Dec 2024 19:33:36 +0800
Subject: [PATCH] support inline-small-functions for AArch64
---
bolt/lib/Passes/Inliner.cpp | 4 +-
.../Target/AArch64/AArch64MCPlusBuilder.cpp | 28 +++++++++
bolt/test/AArch64/inline-test.s | 57 +++++++++++++++++++
3 files changed, 87 insertions(+), 2 deletions(-)
create mode 100644 bolt/test/AArch64/inline-test.s
diff --git a/bolt/lib/Passes/Inliner.cpp b/bolt/lib/Passes/Inliner.cpp
index f004a8eeea185b..1793f4ff1f1480 100644
--- a/bolt/lib/Passes/Inliner.cpp
+++ b/bolt/lib/Passes/Inliner.cpp
@@ -310,13 +310,13 @@ Inliner::inlineCall(BinaryBasicBlock &CallerBB,
if (MIB.isPseudo(Inst))
continue;
- MIB.stripAnnotations(Inst, /*KeepTC=*/BC.isX86());
+ MIB.stripAnnotations(Inst, /*KeepTC=*/BC.isX86() || BC.isAArch64());
// Fix branch target. Strictly speaking, we don't have to do this as
// targets of direct branches will be fixed later and don't matter
// in the CFG state. However, disassembly may look misleading, and
// hence we do the fixing.
- if (MIB.isBranch(Inst)) {
+ if (MIB.isBranch(Inst) && !MIB.isTailCall(Inst)) {
assert(!MIB.isIndirectBranch(Inst) &&
"unexpected indirect branch in callee");
const BinaryBasicBlock *TargetBB =
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index 7e08e5c81d26ff..0722b8ae0cb2c9 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -133,6 +133,34 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
public:
using MCPlusBuilder::MCPlusBuilder;
+ MCPhysReg getStackPointer() const override { return AArch64::SP; }
+
+ bool isPush(const MCInst &Inst) const override { return false; }
+
+ bool isPop(const MCInst &Inst) const override { return false; }
+
+ void createCall(MCInst &Inst, const MCSymbol *Target,
+ MCContext *Ctx) override {
+ createDirectCall(Inst, Target, Ctx, false);
+ }
+
+ bool convertTailCallToCall(MCInst &Inst) override {
+ int NewOpcode;
+ switch (Inst.getOpcode()) {
+ default:
+ return false;
+ case AArch64::B:
+ NewOpcode = AArch64::BL;
+ break;
+ case AArch64::BR:
+ NewOpcode = AArch64::BLR;
+ break;
+ }
+ Inst.setOpcode(NewOpcode);
+ removeAnnotation(Inst, MCPlus::MCAnnotation::kTailCall);
+ return true;
+ }
+
bool equals(const MCTargetExpr &A, const MCTargetExpr &B,
CompFuncTy Comp) const override {
const auto &AArch64ExprA = cast<AArch64MCExpr>(A);
diff --git a/bolt/test/AArch64/inline-test.s b/bolt/test/AArch64/inline-test.s
new file mode 100644
index 00000000000000..ec33f735163899
--- /dev/null
+++ b/bolt/test/AArch64/inline-test.s
@@ -0,0 +1,57 @@
+# This test checks that inline is properly handled by BOLT on aarch64.
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-linux-gnu %s -o %t.o
+# RUN: %clang --target=aarch64-unknown-linux %t.o -o %t.exe -Wl,-q
+# RUN: llvm-bolt --inline-small-functions --print-inline --print-only=_Z3barP1A -debug-only=bolt-inliner %t.exe -o %t.bolt | FileCheck %s
+
+# CHECK: BOLT-INFO: inlined 0 calls at 1 call sites in 2 iteration(s). Change in binary size: 4 bytes.
+# CHECK: Binary Function "_Z3barP1A" after inlining {
+# CHECK-NOT: bl _Z3fooP1A
+# CHECK: ldr x8, [x0]
+# CHECK-NEXT: ldr w0, [x8]
+
+ .text
+ .globl _Z3fooP1A // -- Begin function _Z3fooP1A
+ .p2align 2
+ .type _Z3fooP1A, at function
+_Z3fooP1A: // @_Z3fooP1A
+ .cfi_startproc
+ ldr x8, [x0]
+ ldr w0, [x8]
+ ret
+.Lfunc_end0:
+ .size _Z3fooP1A, .Lfunc_end0-_Z3fooP1A
+ .cfi_endproc
+ .globl _Z3barP1A // -- Begin function _Z3barP1A
+ .p2align 2
+ .type _Z3barP1A, at function
+_Z3barP1A: // @_Z3barP1A
+ .cfi_startproc
+ stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+ .cfi_def_cfa_offset 16
+ mov x29, sp
+ .cfi_def_cfa w29, 16
+ .cfi_offset w30, -8
+ .cfi_offset w29, -16
+ bl _Z3fooP1A
+ mul w0, w0, w0
+ .cfi_def_cfa wsp, 16
+ ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+ .cfi_def_cfa_offset 0
+ .cfi_restore w30
+ .cfi_restore w29
+ ret
+.Lfunc_end1:
+ .size _Z3barP1A, .Lfunc_end1-_Z3barP1A
+ .cfi_endproc
+ .globl main // -- Begin function main
+ .p2align 2
+ .type main, at function
+main: // @main
+ .cfi_startproc
+ mov w0, wzr
+ ret
+.Lfunc_end2:
+ .size main, .Lfunc_end2-main
+ .cfi_endproc
+ .section ".note.GNU-stack","", at progbits
+ .addrsig
\ No newline at end of file
More information about the llvm-commits
mailing list