[llvm] support `inline-small-functions` for AArch64 (PR #120187)

via llvm-commits llvm-commits at lists.llvm.org
Mon Dec 16 22:20:29 PST 2024


https://github.com/liusy58 created https://github.com/llvm/llvm-project/pull/120187

Add some functions in `AArch64MCPlusBuilder.cpp` to support inline for AArch64. 

>From 132461cadf930b8d0f7e0bd410178df7b477d1ed Mon Sep 17 00:00:00 2001
From: liusy58 <liusy58 at linux.alibaba.com>
Date: Thu, 12 Dec 2024 19:33:36 +0800
Subject: [PATCH] support inline-small-functions for AArch64

---
 bolt/lib/Passes/Inliner.cpp                   |  4 +-
 .../Target/AArch64/AArch64MCPlusBuilder.cpp   | 28 +++++++++
 bolt/test/AArch64/inline-test.s               | 57 +++++++++++++++++++
 3 files changed, 87 insertions(+), 2 deletions(-)
 create mode 100644 bolt/test/AArch64/inline-test.s

diff --git a/bolt/lib/Passes/Inliner.cpp b/bolt/lib/Passes/Inliner.cpp
index f004a8eeea185b..1793f4ff1f1480 100644
--- a/bolt/lib/Passes/Inliner.cpp
+++ b/bolt/lib/Passes/Inliner.cpp
@@ -310,13 +310,13 @@ Inliner::inlineCall(BinaryBasicBlock &CallerBB,
       if (MIB.isPseudo(Inst))
         continue;
 
-      MIB.stripAnnotations(Inst, /*KeepTC=*/BC.isX86());
+      MIB.stripAnnotations(Inst, /*KeepTC=*/BC.isX86() || BC.isAArch64());
 
       // Fix branch target. Strictly speaking, we don't have to do this as
       // targets of direct branches will be fixed later and don't matter
       // in the CFG state. However, disassembly may look misleading, and
       // hence we do the fixing.
-      if (MIB.isBranch(Inst)) {
+      if (MIB.isBranch(Inst) && !MIB.isTailCall(Inst)) {
         assert(!MIB.isIndirectBranch(Inst) &&
                "unexpected indirect branch in callee");
         const BinaryBasicBlock *TargetBB =
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index 7e08e5c81d26ff..0722b8ae0cb2c9 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -133,6 +133,34 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
 public:
   using MCPlusBuilder::MCPlusBuilder;
 
+  MCPhysReg getStackPointer() const override { return AArch64::SP; }
+
+  bool isPush(const MCInst &Inst) const override { return false; }
+
+  bool isPop(const MCInst &Inst) const override { return false; }
+
+  void createCall(MCInst &Inst, const MCSymbol *Target,
+                  MCContext *Ctx) override {
+    createDirectCall(Inst, Target, Ctx, false);
+  }
+
+  bool convertTailCallToCall(MCInst &Inst) override {
+    int NewOpcode;
+    switch (Inst.getOpcode()) {
+    default:
+      return false;
+    case AArch64::B:
+      NewOpcode = AArch64::BL;
+      break;
+    case AArch64::BR:
+      NewOpcode = AArch64::BLR;
+      break;
+    }
+    Inst.setOpcode(NewOpcode);
+    removeAnnotation(Inst, MCPlus::MCAnnotation::kTailCall);
+    return true;
+  }
+
   bool equals(const MCTargetExpr &A, const MCTargetExpr &B,
               CompFuncTy Comp) const override {
     const auto &AArch64ExprA = cast<AArch64MCExpr>(A);
diff --git a/bolt/test/AArch64/inline-test.s b/bolt/test/AArch64/inline-test.s
new file mode 100644
index 00000000000000..ec33f735163899
--- /dev/null
+++ b/bolt/test/AArch64/inline-test.s
@@ -0,0 +1,57 @@
+# This test checks that inline is properly handled by BOLT on aarch64.
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-linux-gnu  %s -o %t.o
+# RUN: %clang --target=aarch64-unknown-linux %t.o -o %t.exe -Wl,-q
+# RUN: llvm-bolt --inline-small-functions --print-inline  --print-only=_Z3barP1A  -debug-only=bolt-inliner %t.exe -o %t.bolt  | FileCheck %s
+ 
+# CHECK: BOLT-INFO: inlined 0 calls at 1 call sites in 2 iteration(s). Change in binary size: 4 bytes.
+# CHECK: Binary Function "_Z3barP1A" after inlining {
+# CHECK-NOT: bl	_Z3fooP1A
+# CHECK: ldr	x8, [x0]
+# CHECK-NEXT: ldr	w0, [x8]
+ 
+	.text
+	.globl	_Z3fooP1A                       // -- Begin function _Z3fooP1A
+	.p2align	2
+	.type	_Z3fooP1A, at function
+_Z3fooP1A:                              // @_Z3fooP1A
+	.cfi_startproc
+	ldr	x8, [x0]
+	ldr	w0, [x8]
+	ret
+.Lfunc_end0:
+	.size	_Z3fooP1A, .Lfunc_end0-_Z3fooP1A
+	.cfi_endproc
+	.globl	_Z3barP1A                       // -- Begin function _Z3barP1A
+	.p2align	2
+	.type	_Z3barP1A, at function
+_Z3barP1A:                              // @_Z3barP1A
+	.cfi_startproc
+	stp	x29, x30, [sp, #-16]!           // 16-byte Folded Spill
+	.cfi_def_cfa_offset 16
+	mov	x29, sp
+	.cfi_def_cfa w29, 16
+	.cfi_offset w30, -8
+	.cfi_offset w29, -16
+	bl	_Z3fooP1A
+	mul	w0, w0, w0
+	.cfi_def_cfa wsp, 16
+	ldp	x29, x30, [sp], #16             // 16-byte Folded Reload
+	.cfi_def_cfa_offset 0
+	.cfi_restore w30
+	.cfi_restore w29
+	ret
+.Lfunc_end1:
+	.size	_Z3barP1A, .Lfunc_end1-_Z3barP1A
+	.cfi_endproc
+	.globl	main                            // -- Begin function main
+	.p2align	2
+	.type	main, at function
+main:                                   // @main
+	.cfi_startproc
+	mov	w0, wzr
+	ret
+.Lfunc_end2:
+	.size	main, .Lfunc_end2-main
+	.cfi_endproc
+	.section	".note.GNU-stack","", at progbits
+	.addrsig
\ No newline at end of file



More information about the llvm-commits mailing list