[llvm] [BOLT][AArch64] Implement PLTCall optimization (PR #93584)

Paschalis Mpeis via llvm-commits llvm-commits at lists.llvm.org
Tue May 28 12:03:57 PDT 2024


https://github.com/paschalis-mpeis updated https://github.com/llvm/llvm-project/pull/93584

>From 206b4c90c03372ad0b0de04c520689baafc9bdc5 Mon Sep 17 00:00:00 2001
From: Paschalis Mpeis <Paschalis.Mpeis at arm.com>
Date: Fri, 17 May 2024 14:27:15 +0100
Subject: [PATCH] [BOLT][AArch64] Implement PLTCall optimization

`convertCallToIndirectCall` applies the PLTCall optimization and returns
an (updated if needed) iterator to the converted call instruction.
Since AArch64 requires to inject additional instructions to implement this
pass, the relevant BasicBlock and an iterator was passed to the
`convertCallToIndirectCall`.

`NumCallsOptimized` is updated only on successful application of the pass.

Tests:
- Inputs/plt-tailcall.c: an example of a tail call optimized PLT call.
- AArch64/plt-call.test: it is the actual A64 test, that runs the PLTCall
  optimization on the above input file and verifies the application of
  the pass to the calls: 'printf' and 'puts'.
---
 bolt/include/bolt/Core/MCPlusBuilder.h        | 13 ++++--
 bolt/lib/Passes/PLTCall.cpp                   | 18 +++++---
 .../Target/AArch64/AArch64MCPlusBuilder.cpp   | 46 +++++++++++++++++++
 bolt/lib/Target/X86/X86MCPlusBuilder.cpp      |  5 +-
 bolt/test/AArch64/plt-call.test               | 16 +++++++
 bolt/test/Inputs/plt-tailcall.c               |  8 ++++
 6 files changed, 95 insertions(+), 11 deletions(-)
 create mode 100644 bolt/test/AArch64/plt-call.test
 create mode 100644 bolt/test/Inputs/plt-tailcall.c

diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
index f7614cf9ac977..01be123894869 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -14,6 +14,7 @@
 #ifndef BOLT_CORE_MCPLUSBUILDER_H
 #define BOLT_CORE_MCPLUSBUILDER_H
 
+#include "bolt/Core/BinaryBasicBlock.h"
 #include "bolt/Core/MCPlus.h"
 #include "bolt/Core/Relocation.h"
 #include "llvm/ADT/ArrayRef.h"
@@ -1412,9 +1413,15 @@ class MCPlusBuilder {
     return false;
   }
 
-  /// Modify a direct call instruction \p Inst with an indirect call taking
-  /// a destination from a memory location pointed by \p TargetLocation symbol.
-  virtual bool convertCallToIndirectCall(MCInst &Inst,
+  /// Modify a direct call instruction pointed by the iterator \p It, with an
+  /// indirect call taking a destination from a memory location pointed by \p
+  /// TargetLocation symbol. If additional instructions need to be prepended
+  /// before \p It, then the iterator must be updated to point to the indirect
+  /// call instruction.
+  ///
+  /// \return true on success
+  virtual bool convertCallToIndirectCall(BinaryBasicBlock &BB,
+                                         BinaryBasicBlock::iterator &It,
                                          const MCSymbol *TargetLocation,
                                          MCContext *Ctx) {
     llvm_unreachable("not implemented");
diff --git a/bolt/lib/Passes/PLTCall.cpp b/bolt/lib/Passes/PLTCall.cpp
index d0276f22e14ef..00e47ea2b25e7 100644
--- a/bolt/lib/Passes/PLTCall.cpp
+++ b/bolt/lib/Passes/PLTCall.cpp
@@ -61,19 +61,23 @@ Error PLTCall::runOnFunctions(BinaryContext &BC) {
       if (opts::PLT == OT_HOT && !BB.getKnownExecutionCount())
         continue;
 
-      for (MCInst &Instr : BB) {
-        if (!BC.MIB->isCall(Instr))
+      for (auto It = BB.begin(); It != BB.end(); It++) {
+        if (!BC.MIB->isCall(*It))
           continue;
-        const MCSymbol *CallSymbol = BC.MIB->getTargetSymbol(Instr);
+        const MCSymbol *CallSymbol = BC.MIB->getTargetSymbol(*It);
         if (!CallSymbol)
           continue;
         const BinaryFunction *CalleeBF = BC.getFunctionForSymbol(CallSymbol);
         if (!CalleeBF || !CalleeBF->isPLTFunction())
           continue;
-        BC.MIB->convertCallToIndirectCall(Instr, CalleeBF->getPLTSymbol(),
-                                          BC.Ctx.get());
-        BC.MIB->addAnnotation(Instr, "PLTCall", true);
-        ++NumCallsOptimized;
+        if (BC.MIB->convertCallToIndirectCall(BB, It, CalleeBF->getPLTSymbol(),
+                                              BC.Ctx.get())) {
+          assert(BC.MIB->isCall(*It) &&
+                 "Iterator must point to the optimized call");
+
+          BC.MIB->addAnnotation(*It, "PLTCall", true);
+          ++NumCallsOptimized;
+        }
       }
     }
   }
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index 0ae9d3668b93b..13ce917059532 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -1055,6 +1055,52 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
     return true;
   }
 
+  bool convertCallToIndirectCall(BinaryBasicBlock &BB,
+                                 BinaryBasicBlock::iterator &It,
+                                 const MCSymbol *TargetLocation,
+                                 MCContext *Ctx) override {
+    // Generated code:
+    // adrp	x16 <symbol>
+    // ldr	x17, [x16, #<offset>]
+    // bl <label> -> blr	x17  (or covert 'b -> br' for tail calls)
+
+    MCInst &InstCall = *It;
+    bool IsTailCall = isTailCall(InstCall);
+    assert((InstCall.getOpcode() == AArch64::BL ||
+            (InstCall.getOpcode() == AArch64::B && IsTailCall)) &&
+           "64-bit direct (tail) call instruction expected");
+
+    // Convert the call to an indicrect one by modifying the instruction.
+    InstCall.clear();
+    InstCall.setOpcode(IsTailCall ? AArch64::BR : AArch64::BLR);
+    InstCall.addOperand(MCOperand::createReg(AArch64::X17));
+    if (IsTailCall)
+      setTailCall(*It);
+
+    // Prepend instructions to load PLT call address from the input symbol.
+
+    MCInst InstLoad;
+    InstLoad.setOpcode(AArch64::LDRXui);
+    InstLoad.addOperand(MCOperand::createReg(AArch64::X17));
+    InstLoad.addOperand(MCOperand::createReg(AArch64::X16));
+    InstLoad.addOperand(MCOperand::createImm(0));
+    setOperandToSymbolRef(InstLoad, /* OpNum */ 2, TargetLocation,
+                          /* Addend */ 0, Ctx, ELF::R_AARCH64_LD64_GOT_LO12_NC);
+    It = BB.insertInstruction(It, InstLoad);
+
+    MCInst InstAdrp;
+    InstAdrp.setOpcode(AArch64::ADRP);
+    InstAdrp.clear();
+    InstAdrp.addOperand(MCOperand::createReg(AArch64::X16));
+    InstAdrp.addOperand(MCOperand::createImm(0));
+    setOperandToSymbolRef(InstAdrp, /* OpNum */ 1, TargetLocation,
+                          /* Addend */ 0, Ctx, ELF::R_AARCH64_ADR_GOT_PAGE);
+    It = BB.insertInstruction(It, InstAdrp);
+
+    It = It + 2;
+    return true;
+  }
+
   bool lowerTailCall(MCInst &Inst) override {
     removeAnnotation(Inst, MCPlus::MCAnnotation::kTailCall);
     if (getConditionalTailCall(Inst))
diff --git a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
index 8b1894953f375..ca85600ea4363 100644
--- a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
+++ b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
@@ -1639,8 +1639,11 @@ class X86MCPlusBuilder : public MCPlusBuilder {
     return true;
   }
 
-  bool convertCallToIndirectCall(MCInst &Inst, const MCSymbol *TargetLocation,
+  bool convertCallToIndirectCall(BinaryBasicBlock &BB,
+                                 BinaryBasicBlock::iterator &It,
+                                 const MCSymbol *TargetLocation,
                                  MCContext *Ctx) override {
+    MCInst &Inst = (*It);
     assert((Inst.getOpcode() == X86::CALL64pcrel32 ||
             (Inst.getOpcode() == X86::JMP_4 && isTailCall(Inst))) &&
            "64-bit direct (tail) call instruction expected");
diff --git a/bolt/test/AArch64/plt-call.test b/bolt/test/AArch64/plt-call.test
new file mode 100644
index 0000000000000..c986808672bc0
--- /dev/null
+++ b/bolt/test/AArch64/plt-call.test
@@ -0,0 +1,16 @@
+// Verify that PLTCall optimization works, including when PLT calls were
+// tail-call optimized.
+
+RUN: %clang %cflags %p/../Inputs/plt-tailcall.c \
+RUN:    -o %t -Wl,-q
+RUN: llvm-bolt %t -o %t.bolt --plt=all --print-plt  --print-only=foo | FileCheck %s
+
+// Call to printf
+CHECK: adrp	x16, printf at GOT
+CHECK: ldr	x17, [x16, :lo12:printf at GOT]
+CHECK: blr	x17 # PLTCall: 1
+
+// Call to puts, that was tail-call optimized
+CHECK: adrp	x16, puts at GOT
+CHECK: ldr	x17, [x16, :lo12:puts at GOT]
+CHECK: br	x17 # TAILCALL  # PLTCall: 1
diff --git a/bolt/test/Inputs/plt-tailcall.c b/bolt/test/Inputs/plt-tailcall.c
new file mode 100644
index 0000000000000..13f6e29c60774
--- /dev/null
+++ b/bolt/test/Inputs/plt-tailcall.c
@@ -0,0 +1,8 @@
+#include "stub.h"
+
+int foo(char *c) {
+  printf("");
+  __attribute__((musttail)) return puts(c);
+}
+
+int main() { return foo("a"); }



More information about the llvm-commits mailing list