[llvm] [bolt][aarch64] Change indirect call instrumentation snippet (PR #180229)
Alexey Moksyakov via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 11 05:47:38 PST 2026
https://github.com/yavtuk updated https://github.com/llvm/llvm-project/pull/180229
>From a954a633b83d9c880f5c4b2f8dcd2579c403b627 Mon Sep 17 00:00:00 2001
From: Alexey Moksyakov <moksyakov.alexey at huawei.com>
Date: Thu, 27 Nov 2025 23:48:10 +0300
Subject: [PATCH 1/2] [bolt][aarch64] Change indirect call instrumentation
snippet
Indirect call instrumentation snippet uses x16 register in exit
handler to go to destination target
__bolt_instr_ind_call_handler_func:
msr nzcv, x1
ldp x0, x1, [sp], #16
ldr x16, [sp], #16
ldp x0, x1, [sp], #16
br x16 <-----
This patch adds the instrumentation snippet by calling instrumentation
runtime library through indirect call instruction and adding the wrapper
to store/load target value and the register for original indirect instruction.
Example:
mov x16, foo
infirectCall:
adrp x8, Label
add x8, x8, #:lo12:Label
blr x8
Before:
Instrumented indirect call:
stp x0, x1, [sp, #-16]!
mov x0, x8
movk x1, #0x0, lsl #48
movk x1, #0x0, lsl #32
movk x1, #0x0, lsl #16
movk x1, #0x0
stp x0, x1, [sp, #-16]!
adrp x0, __bolt_instr_ind_call_handler_func
add x0, x0, #:lo12:__bolt_instr_ind_call_handler_func
blr x0
__bolt_instr_ind_call_handler: (exit snippet)
msr nzcv, x1
ldp x0, x1, [sp], #16
ldr x16, [sp], #16
ldp x0, x1, [sp], #16
br x16 <- overwrites the original value in X16
__bolt_instr_ind_call_handler_func: (entry snippet)
stp x0, x1, [sp, #-16]!
mrs x1, nzcv
adrp x0, __bolt_instr_ind_call_handler
add x0, x0, x0, #:lo12:__bolt_instr_ind_call_handler
ldr x0, [x0]
cmp x0, #0x0
b.eq __bolt_instr_ind_call_handler
str x30, [sp, #-16]!
blr x0 <--- runtime lib store/load all regs
ldr x30, [sp], #16
b __bolt_instr_ind_call_handler
_________________________________________________________________________
After:
mov x16, foo
infirectCall:
adrp x8, Label
add x8, x8, #:lo12:Label
blr x8
Instrumented indirect call:
stp x0, x1, [sp, #-16]!
mov x0, x8
movk x1, #0x0, lsl #48
movk x1, #0x0, lsl #32
movk x1, #0x0, lsl #16
movk x1, #0x0
stp x0, x1, [sp, #-16]!
str x30, [sp, #-16]!
adrp x8, __bolt_instr_ind_call_handler_func
add x8, x8, #:lo12:__bolt_instr_ind_call_handler_func
blr x8 <--- call trampoline instr lib
ldr x30, [sp], #16
ldr x8, [sp], #16
ldp x0, x1, [sp], #16
blr x8 <--- original indirect call instruction
// don't touch regs besides x0, x1
__bolt_instr_ind_call_handler: (exit snippet)
ret <---- return to original function with indirect call
__bolt_instr_ind_call_handler_func: (entry snippet)
adrp x0, __bolt_instr_ind_call_handler
add x0, x0, #:lo12:__bolt_instr_ind_call_handler
ldr x0, [x0]
cmp x0, #0x0
b.eq __bolt_instr_ind_call_handler
str x30, [sp, #-16]!
blr x0 <--- runtime lib store/load all regs
ldr x30, [sp], #16
b __bolt_instr_ind_call_handler
---
bolt/include/bolt/Core/MCPlusBuilder.h | 5 +
.../Target/AArch64/AArch64MCPlusBuilder.cpp | 144 +++++++++++-------
bolt/runtime/instr.cpp | 6 +
bolt/runtime/sys_aarch64.h | 6 +-
.../AArch64/instrumentation-ind-call.c | 57 ++++++-
5 files changed, 160 insertions(+), 58 deletions(-)
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
index e571e91d85135..595e6d66da90e 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -545,6 +545,11 @@ class MCPlusBuilder {
llvm_unreachable("not implemented");
}
+ virtual void createDirectBranch(MCInst &Inst, const MCSymbol *Target,
+ MCContext *Ctx) {
+ llvm_unreachable("not implemented");
+ }
+
virtual MCPhysReg getX86R11() const { llvm_unreachable("not implemented"); }
virtual unsigned getShortBranchOpcode(unsigned Opcode) const {
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index aa5cf3c671cdc..a6bbd9e875e30 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -48,14 +48,14 @@ static cl::opt<bool> NoLSEAtomics(
namespace {
-static void getSystemFlag(MCInst &Inst, MCPhysReg RegName) {
+[[maybe_unused]] static void getSystemFlag(MCInst &Inst, MCPhysReg RegName) {
Inst.setOpcode(AArch64::MRS);
Inst.clear();
Inst.addOperand(MCOperand::createReg(RegName));
Inst.addOperand(MCOperand::createImm(AArch64SysReg::NZCV));
}
-static void setSystemFlag(MCInst &Inst, MCPhysReg RegName) {
+[[maybe_unused]] static void setSystemFlag(MCInst &Inst, MCPhysReg RegName) {
Inst.setOpcode(AArch64::MSR);
Inst.clear();
Inst.addOperand(MCOperand::createImm(AArch64SysReg::NZCV));
@@ -2178,6 +2178,14 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
convertJmpToTailCall(Inst);
}
+ void createDirectBranch(MCInst &Inst, const MCSymbol *Target,
+ MCContext *Ctx) override {
+ Inst.setOpcode(AArch64::B);
+ Inst.clear();
+ Inst.addOperand(MCOperand::createExpr(getTargetExprFor(
+ Inst, MCSymbolRefExpr::create(Target, *Ctx), *Ctx, 0)));
+ }
+
bool analyzeBranch(InstructionIterator Begin, InstructionIterator End,
const MCSymbol *&TBB, const MCSymbol *&FBB,
MCInst *&CondBranch,
@@ -2535,21 +2543,14 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
}
InstructionListType createInstrumentedIndCallHandlerExitBB() const override {
- InstructionListType Insts(5);
// Code sequence for instrumented indirect call handler:
- // msr nzcv, x1
- // ldp x0, x1, [sp], #16
- // ldr x16, [sp], #16
- // ldp x0, x1, [sp], #16
- // br x16
- setSystemFlag(Insts[0], AArch64::X1);
- createPopRegisters(Insts[1], AArch64::X0, AArch64::X1);
- // Here we load address of the next function which should be called in the
- // original binary to X16 register. Writing to X16 is permitted without
- // needing to restore.
- loadReg(Insts[2], AArch64::X16, AArch64::SP);
- createPopRegisters(Insts[3], AArch64::X0, AArch64::X1);
- createIndirectBranch(Insts[4], AArch64::X16, 0);
+ // ret
+
+ InstructionListType Insts;
+
+ Insts.emplace_back();
+ createReturn(Insts.back());
+
return Insts;
}
@@ -2625,39 +2626,62 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
MCSymbol *HandlerFuncAddr,
int CallSiteID,
MCContext *Ctx) override {
- InstructionListType Insts;
// Code sequence used to enter indirect call instrumentation helper:
- // stp x0, x1, [sp, #-16]! createPushRegisters
- // mov target x0 convertIndirectCallToLoad -> orr x0 target xzr
+ // stp x0, x1, [sp, #-16]! createPushRegisters (1)
+ // mov target, x0 convertIndirectCallToLoad -> orr x0 target xzr
// mov x1 CallSiteID createLoadImmediate ->
// movk x1, #0x0, lsl #48
// movk x1, #0x0, lsl #32
// movk x1, #0x0, lsl #16
// movk x1, #0x0
- // stp x0, x1, [sp, #-16]!
- // bl *HandlerFuncAddr createIndirectCall ->
+ // stp x0, x1, [sp, #-16]! (2)
+ // str x30, [sp, #-16]! (3)
// adr x0 *HandlerFuncAddr -> adrp + add
- // blr x0
+ // blr x0 (__bolt_instr_ind_call_handler_func)
+ // ldr x30, [sp], #16 (3)
+ // ldr target ; restore target value (2)
+ // ldp x0, x1, [sp], #16 (1)
+ // blr target
+
+ InstructionListType Insts;
Insts.emplace_back();
- createPushRegisters(Insts.back(), AArch64::X0, AArch64::X1);
+ createPushRegisters(Insts.back(), getIntArgRegister(0),
+ getIntArgRegister(1));
Insts.emplace_back(CallInst);
- convertIndirectCallToLoad(Insts.back(), AArch64::X0);
+ convertIndirectCallToLoad(Insts.back(), getIntArgRegister(0));
InstructionListType LoadImm =
createLoadImmediate(getIntArgRegister(1), CallSiteID);
Insts.insert(Insts.end(), LoadImm.begin(), LoadImm.end());
Insts.emplace_back();
- createPushRegisters(Insts.back(), AArch64::X0, AArch64::X1);
+ createPushRegisters(Insts.back(), getIntArgRegister(0),
+ getIntArgRegister(1));
+ Insts.emplace_back();
+ storeReg(Insts.back(), AArch64::LR, getSpRegister(/*Size*/ 8));
Insts.resize(Insts.size() + 2);
- InstructionListType Addr =
- materializeAddress(HandlerFuncAddr, Ctx, AArch64::X0);
+ InstructionListType Addr = materializeAddress(
+ HandlerFuncAddr, Ctx, CallInst.getOperand(0).getReg());
assert(Addr.size() == 2 && "Invalid Addr size");
std::copy(Addr.begin(), Addr.end(), Insts.end() - Addr.size());
+
+ Insts.emplace_back();
+ createIndirectCallInst(Insts.back(), false,
+ CallInst.getOperand(0).getReg());
+
Insts.emplace_back();
- createIndirectCallInst(Insts.back(), isTailCall(CallInst), AArch64::X0);
+ loadReg(Insts.back(), AArch64::LR, getSpRegister(/*Size*/ 8));
- // Carry over metadata including tail call marker if present.
- stripAnnotations(Insts.back());
- moveAnnotations(std::move(CallInst), Insts.back());
+ Insts.emplace_back();
+ createPopRegisters(Insts.back(), getIntArgRegister(0),
+ getIntArgRegister(1));
+
+ Insts.emplace_back();
+ loadReg(Insts.back(), CallInst.getOperand(0).getReg(), AArch64::SP);
+
+ Insts.emplace_back();
+ createPopRegisters(Insts.back(), getIntArgRegister(0),
+ getIntArgRegister(1));
+
+ Insts.emplace_back(CallInst);
return Insts;
}
@@ -2666,12 +2690,10 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
createInstrumentedIndCallHandlerEntryBB(const MCSymbol *InstrTrampoline,
const MCSymbol *IndCallHandler,
MCContext *Ctx) override {
- // Code sequence used to check whether InstrTampoline was initialized
+ // Code sequence used to check whether InstrTrampoline was initialized
// and call it if so, returns via IndCallHandler
- // stp x0, x1, [sp, #-16]!
- // mrs x1, nzcv
- // adr x0, InstrTrampoline -> adrp + add
- // ldr x0, [x0]
+ // adrp x0, InstrTrampoline
+ // ldr x0, [x0, #lo12:InstrTrampoline]
// subs x0, x0, #0x0
// b.eq IndCallHandler
// str x30, [sp, #-16]!
@@ -2679,30 +2701,42 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
// ldr x30, [sp], #16
// b IndCallHandler
InstructionListType Insts;
+
+ // load handler address
+ MCInst InstAdrp;
+ InstAdrp.setOpcode(AArch64::ADRP);
+ InstAdrp.addOperand(MCOperand::createReg(getIntArgRegister(0)));
+ InstAdrp.addOperand(MCOperand::createImm(0));
+ setOperandToSymbolRef(InstAdrp, /* OpNum */ 1, InstrTrampoline,
+ /* Addend */ 0, Ctx, ELF::R_AARCH64_ADR_GOT_PAGE);
+ Insts.emplace_back(InstAdrp);
+
+ MCInst InstLoad;
+ InstLoad.setOpcode(AArch64::LDRXui);
+ InstLoad.addOperand(MCOperand::createReg(getIntArgRegister(0)));
+ InstLoad.addOperand(MCOperand::createReg(getIntArgRegister(0)));
+ InstLoad.addOperand(MCOperand::createImm(0));
+ setOperandToSymbolRef(InstLoad, /* OpNum */ 2, InstrTrampoline,
+ /* Addend */ 0, Ctx, ELF::R_AARCH64_LD64_GOT_LO12_NC);
+ Insts.emplace_back(InstLoad);
+
+ InstructionListType CmpJmp =
+ createCmpJE(getIntArgRegister(0), 0, IndCallHandler, Ctx);
+ Insts.insert(Insts.end(), CmpJmp.begin(), CmpJmp.end());
+
Insts.emplace_back();
- createPushRegisters(Insts.back(), AArch64::X0, AArch64::X1);
- Insts.emplace_back();
- getSystemFlag(Insts.back(), getIntArgRegister(1));
- Insts.emplace_back();
- Insts.emplace_back();
- InstructionListType Addr =
- materializeAddress(InstrTrampoline, Ctx, AArch64::X0);
- std::copy(Addr.begin(), Addr.end(), Insts.end() - Addr.size());
- assert(Addr.size() == 2 && "Invalid Addr size");
- Insts.emplace_back();
- loadReg(Insts.back(), AArch64::X0, AArch64::X0);
- InstructionListType cmpJmp =
- createCmpJE(AArch64::X0, 0, IndCallHandler, Ctx);
- Insts.insert(Insts.end(), cmpJmp.begin(), cmpJmp.end());
- Insts.emplace_back();
- storeReg(Insts.back(), AArch64::LR, AArch64::SP);
+ storeReg(Insts.back(), AArch64::LR, getSpRegister(/*Size*/ 8));
+
Insts.emplace_back();
Insts.back().setOpcode(AArch64::BLR);
- Insts.back().addOperand(MCOperand::createReg(AArch64::X0));
+ Insts.back().addOperand(MCOperand::createReg(getIntArgRegister(0)));
+
Insts.emplace_back();
- loadReg(Insts.back(), AArch64::LR, AArch64::SP);
+ loadReg(Insts.back(), AArch64::LR, getSpRegister(/*Size*/ 8));
+
Insts.emplace_back();
- createDirectCall(Insts.back(), IndCallHandler, Ctx, /*IsTailCall*/ true);
+ createDirectBranch(Insts.back(), IndCallHandler, Ctx);
+
return Insts;
}
diff --git a/bolt/runtime/instr.cpp b/bolt/runtime/instr.cpp
index c0b8fc3d807c9..68cb36dd3fae4 100644
--- a/bolt/runtime/instr.cpp
+++ b/bolt/runtime/instr.cpp
@@ -1694,6 +1694,9 @@ instrumentIndirectCall(uint64_t Target, uint64_t IndCallID) {
extern "C" __attribute((naked)) void __bolt_instr_indirect_call()
{
#if defined(__aarch64__)
+ // the target address is placed on stack
+ // the identifier of the indirect call site is placed in X1 register
+
// clang-format off
__asm__ __volatile__(SAVE_ALL
"ldp x0, x1, [sp, #288]\n"
@@ -1731,6 +1734,9 @@ extern "C" __attribute((naked)) void __bolt_instr_indirect_call()
extern "C" __attribute((naked)) void __bolt_instr_indirect_tailcall()
{
#if defined(__aarch64__)
+ // the target address is placed on stack
+ // the identifier of the indirect call site is placed in X1 register
+
// clang-format off
__asm__ __volatile__(SAVE_ALL
"ldp x0, x1, [sp, #288]\n"
diff --git a/bolt/runtime/sys_aarch64.h b/bolt/runtime/sys_aarch64.h
index b1d04f9d558e0..9cb8e022f58df 100644
--- a/bolt/runtime/sys_aarch64.h
+++ b/bolt/runtime/sys_aarch64.h
@@ -18,10 +18,12 @@
"stp x24, x25, [sp, #-16]!\n" \
"stp x26, x27, [sp, #-16]!\n" \
"stp x28, x29, [sp, #-16]!\n" \
- "str x30, [sp,#-16]!\n"
+ "mrs x29, nzcv\n" \
+ "stp x29, x30, [sp, #-16]!\n"
// Mirrors SAVE_ALL
#define RESTORE_ALL \
- "ldr x30, [sp], #16\n" \
+ "ldp x29, x30, [sp], #16\n" \
+ "msr nzcv, x29\n" \
"ldp x28, x29, [sp], #16\n" \
"ldp x26, x27, [sp], #16\n" \
"ldp x24, x25, [sp], #16\n" \
diff --git a/bolt/test/runtime/AArch64/instrumentation-ind-call.c b/bolt/test/runtime/AArch64/instrumentation-ind-call.c
index f9056da333b4e..697fc560ddb73 100644
--- a/bolt/test/runtime/AArch64/instrumentation-ind-call.c
+++ b/bolt/test/runtime/AArch64/instrumentation-ind-call.c
@@ -15,9 +15,64 @@ int main() {
REQUIRES: system-linux,bolt-runtime
RUN: %clang %cflags %s -o %t.exe -Wl,-q -no-pie -fpie
+RUN: llvm-objdump --disassemble-symbols=main %t.exe \
+RUN: | FileCheck %s --check-prefix=CHECKINDIRECTREG
+
+CHECKINDIRECTREG: mov w0, #0xa
+CHECKINDIRECTREG-NEXT: mov w1, #0x14
+CHECKINDIRECTREG-NEXT: blr x8
RUN: llvm-bolt %t.exe --instrument --instrumentation-file=%t.fdata \
-RUN: -o %t.instrumented
+RUN: -o %t.instrumented \
+RUN: | FileCheck %s --check-prefix=CHECK-INSTR-LOG
+
+CHECK-INSTR-LOG: BOLT-INSTRUMENTER: Number of indirect call site descriptors: 1
+
+RUN: llvm-objdump --disassemble-symbols=main %t.instrumented \
+RUN: | FileCheck %s --check-prefix=CHECK-INSTR-INDIRECTREG
+
+RUN: llvm-objdump --disassemble-symbols=__bolt_instr_ind_call_handler \
+RUN: %t.instrumented | FileCheck %s --check-prefix=CHECK-INSTR-INDIR-CALL
+RUN: llvm-objdump --disassemble-symbols=__bolt_instr_ind_call_handler_func \
+RUN: %t.instrumented | FileCheck %s --check-prefix=CHECK-INSTR-INDIR-CALL-FUNC
+
+CHECK-INSTR-INDIRECTREG: mov w0, #0xa
+CHECK-INSTR-INDIRECTREG-NEXT: mov w1, #0x14
+// store current values
+CHECK-INSTR-INDIRECTREG-NEXT: stp x0, x1, {{.*}}
+// store the indirect target address in x0
+CHECK-INSTR-INDIRECTREG-NEXT: mov x0, x8
+// load callsite id into x1
+CHECK-INSTR-INDIRECTREG-NEXT: movk x1, {{.*}}
+CHECK-INSTR-INDIRECTREG-NEXT: movk x1, {{.*}}
+CHECK-INSTR-INDIRECTREG-NEXT: movk x1, {{.*}}
+CHECK-INSTR-INDIRECTREG-NEXT: movk x1, {{.*}}
+CHECK-INSTR-INDIRECTREG-NEXT: stp x0, x1, {{.*}}
+CHECK-INSTR-INDIRECTREG-NEXT: str x30, {{.*}}
+CHECK-INSTR-INDIRECTREG-NEXT: adrp x8, {{.*}}
+CHECK-INSTR-INDIRECTREG-NEXT: add x8, {{.*}}
+// call instrumentation library handler function
+CHECK-INSTR-INDIRECTREG-NEXT: blr x8
+// restore registers saved before
+CHECK-INSTR-INDIRECTREG-NEXT: ldr x30, {{.*}}
+CHECK-INSTR-INDIRECTREG-NEXT: ldr x8
+CHECK-INSTR-INDIRECTREG-NEXT: ldp x0, x1, {{.*}}
+// original indirect call instruction
+CHECK-INSTR-INDIRECTREG-NEXT: blr x8
+
+
+CHECK-INSTR-INDIR-CALL: __bolt_instr_ind_call_handler>:
+CHECK-INSTR-INDIR-CALL-NEXT: ret
+
+CHECK-INSTR-INDIR-CALL-FUNC: __bolt_instr_ind_call_handler_func>:
+CHECK-INSTR-INDIR-CALL-FUNC-NEXT: adrp x0
+CHECK-INSTR-INDIR-CALL-FUNC-NEXT: ldr x0
+CHECK-INSTR-INDIR-CALL-FUNC-NEXT: cmp x0, #0x0
+CHECK-INSTR-INDIR-CALL-FUNC-NEXT: b.eq{{.*}}__bolt_instr_ind_call_handler
+CHECK-INSTR-INDIR-CALL-FUNC-NEXT: str x30
+CHECK-INSTR-INDIR-CALL-FUNC-NEXT: blr x0
+CHECK-INSTR-INDIR-CALL-FUNC-NEXT: ldr x30
+CHECK-INSTR-INDIR-CALL-FUNC-NEXT: b{{.*}}__bolt_instr_ind_call_handler
# Instrumented program needs to finish returning zero
RUN: %t.instrumented | FileCheck %s -check-prefix=CHECK-OUTPUT
>From 9c90d543780d71548bcf08bedd3e7e1591f58bfd Mon Sep 17 00:00:00 2001
From: yavtuk <yavtuk at ya.ru>
Date: Wed, 11 Feb 2026 16:45:36 +0300
Subject: [PATCH 2/2] [bolt][arch64] Reduce indirect call instrumentation
snippet
---
bolt/include/bolt/Core/MCPlusBuilder.h | 2 +-
.../Target/AArch64/AArch64MCPlusBuilder.cpp | 92 +++++++++----------
bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp | 2 +-
bolt/lib/Target/X86/X86MCPlusBuilder.cpp | 2 +-
bolt/runtime/instr.cpp | 4 +-
.../AArch64/instrumentation-ind-call.c | 19 ++--
6 files changed, 56 insertions(+), 65 deletions(-)
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
index 595e6d66da90e..4b79cc8bf84c1 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -2383,7 +2383,7 @@ class MCPlusBuilder {
virtual InstructionListType
createInstrumentedIndirectCall(MCInst &&CallInst, MCSymbol *HandlerFuncAddr,
- int CallSiteID, MCContext *Ctx) {
+ size_t CallSiteID, MCContext *Ctx) {
llvm_unreachable("not implemented");
return InstructionListType();
}
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index a6bbd9e875e30..054291e627e4a 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -2603,14 +2603,28 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
InstructionListType createLoadImmediate(const MCPhysReg Dest,
uint64_t Imm) const override {
- InstructionListType Insts(4);
- int Shift = 48;
- for (int I = 0; I < 4; I++, Shift -= 16) {
- Insts[I].setOpcode(AArch64::MOVKXi);
- Insts[I].addOperand(MCOperand::createReg(Dest));
- Insts[I].addOperand(MCOperand::createReg(Dest));
- Insts[I].addOperand(MCOperand::createImm((Imm >> Shift) & 0xFFFF));
- Insts[I].addOperand(MCOperand::createImm(Shift));
+ InstructionListType Insts;
+
+ Insts.emplace_back();
+ MCInst &Inst = Insts.back();
+ Inst.clear();
+ Inst.setOpcode(AArch64::MOVZXi);
+ Inst.addOperand(MCOperand::createReg(Dest));
+ Inst.addOperand(MCOperand::createImm(Imm & 0xFFFF));
+ Inst.addOperand(MCOperand::createImm(0));
+
+ int Shift = 16;
+ for (int I = 0; I < 3; I++, Shift += 16) {
+ const uint64_t ImmVal = (Imm >> Shift) & 0xFFFF;
+ if (!ImmVal)
+ continue;
+ Insts.emplace_back();
+ MCInst &Inst = Insts.back();
+ Inst.setOpcode(AArch64::MOVKXi);
+ Inst.addOperand(MCOperand::createReg(Dest));
+ Inst.addOperand(MCOperand::createReg(Dest));
+ Inst.addOperand(MCOperand::createImm(ImmVal));
+ Inst.addOperand(MCOperand::createImm(Shift));
}
return Insts;
}
@@ -2624,62 +2638,46 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
InstructionListType createInstrumentedIndirectCall(MCInst &&CallInst,
MCSymbol *HandlerFuncAddr,
- int CallSiteID,
+ size_t CallSiteID,
MCContext *Ctx) override {
// Code sequence used to enter indirect call instrumentation helper:
- // stp x0, x1, [sp, #-16]! createPushRegisters (1)
- // mov target, x0 convertIndirectCallToLoad -> orr x0 target xzr
- // mov x1 CallSiteID createLoadImmediate ->
- // movk x1, #0x0, lsl #48
- // movk x1, #0x0, lsl #32
- // movk x1, #0x0, lsl #16
- // movk x1, #0x0
- // stp x0, x1, [sp, #-16]! (2)
- // str x30, [sp, #-16]! (3)
- // adr x0 *HandlerFuncAddr -> adrp + add
- // blr x0 (__bolt_instr_ind_call_handler_func)
- // ldr x30, [sp], #16 (3)
- // ldr target ; restore target value (2)
- // ldp x0, x1, [sp], #16 (1)
- // blr target
+ // stp x0, x30, [sp, #-16]! createPushRegisters (1)
+ // movz/k x0, CallSiteID (createLoadImmediate)
+ // stp CallInst.Reg, x0, [sp, #-16]! ; store target value (2)
+ // adr CallInst.Reg, *HandlerFuncAddr (adrp + add)
+ // blr CallInst.Reg (__bolt_instr_ind_call_handler_func)
+ // ldr CallInst.Reg, [sp], #16 ; restore target value (2)
+ // ldp x0, x30, [sp], #16 (1)
+ // CallInst
+ // Note: if CallInst uses X0 need to use X1
+
+ const MCRegister BlrReg = CallInst.getOperand(0).getReg();
+ const MCRegister AvailReg = BlrReg == AArch64::X0 ? AArch64::X1 : BlrReg;
InstructionListType Insts;
Insts.emplace_back();
- createPushRegisters(Insts.back(), getIntArgRegister(0),
- getIntArgRegister(1));
- Insts.emplace_back(CallInst);
- convertIndirectCallToLoad(Insts.back(), getIntArgRegister(0));
+ createPushRegisters(Insts.back(), getIntArgRegister(0), AArch64::LR);
+
InstructionListType LoadImm =
- createLoadImmediate(getIntArgRegister(1), CallSiteID);
+ createLoadImmediate(getIntArgRegister(0), CallSiteID);
Insts.insert(Insts.end(), LoadImm.begin(), LoadImm.end());
+
Insts.emplace_back();
- createPushRegisters(Insts.back(), getIntArgRegister(0),
- getIntArgRegister(1));
- Insts.emplace_back();
- storeReg(Insts.back(), AArch64::LR, getSpRegister(/*Size*/ 8));
+ createPushRegisters(Insts.back(), AvailReg, getIntArgRegister(0));
+
Insts.resize(Insts.size() + 2);
- InstructionListType Addr = materializeAddress(
- HandlerFuncAddr, Ctx, CallInst.getOperand(0).getReg());
+ InstructionListType Addr = materializeAddress(HandlerFuncAddr, Ctx, AvailReg);
assert(Addr.size() == 2 && "Invalid Addr size");
std::copy(Addr.begin(), Addr.end(), Insts.end() - Addr.size());
Insts.emplace_back();
- createIndirectCallInst(Insts.back(), false,
- CallInst.getOperand(0).getReg());
-
- Insts.emplace_back();
- loadReg(Insts.back(), AArch64::LR, getSpRegister(/*Size*/ 8));
-
- Insts.emplace_back();
- createPopRegisters(Insts.back(), getIntArgRegister(0),
- getIntArgRegister(1));
+ createIndirectCallInst(Insts.back(), false, AvailReg);
Insts.emplace_back();
- loadReg(Insts.back(), CallInst.getOperand(0).getReg(), AArch64::SP);
+ loadReg(Insts.back(), AvailReg, getStackPointer());
Insts.emplace_back();
- createPopRegisters(Insts.back(), getIntArgRegister(0),
- getIntArgRegister(1));
+ createPopRegisters(Insts.back(), getIntArgRegister(0), AArch64::LR);
Insts.emplace_back(CallInst);
diff --git a/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp b/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp
index d21bb628dcfcb..957768e5aaa29 100644
--- a/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp
+++ b/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp
@@ -821,7 +821,7 @@ class RISCVMCPlusBuilder : public MCPlusBuilder {
InstructionListType createInstrumentedIndirectCall(MCInst &&CallInst,
MCSymbol *HandlerFuncAddr,
- int CallSiteID,
+ size_t CallSiteID,
MCContext *Ctx) override {
// Code sequence used to enter indirect call instrumentation helper:
// addi sp, sp, -0x10
diff --git a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
index 7c24c2ce136fa..51e7d27f18a0b 100644
--- a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
+++ b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
@@ -3123,7 +3123,7 @@ class X86MCPlusBuilder : public MCPlusBuilder {
InstructionListType createInstrumentedIndirectCall(MCInst &&CallInst,
MCSymbol *HandlerFuncAddr,
- int CallSiteID,
+ size_t CallSiteID,
MCContext *Ctx) override {
// Check if the target address expression used in the original indirect call
// uses the stack pointer, which we are going to clobber.
diff --git a/bolt/runtime/instr.cpp b/bolt/runtime/instr.cpp
index 68cb36dd3fae4..dbca4c3a7a46d 100644
--- a/bolt/runtime/instr.cpp
+++ b/bolt/runtime/instr.cpp
@@ -1699,7 +1699,7 @@ extern "C" __attribute((naked)) void __bolt_instr_indirect_call()
// clang-format off
__asm__ __volatile__(SAVE_ALL
- "ldp x0, x1, [sp, #288]\n"
+ "ldp x0, x1, [sp, #272]\n"
"bl instrumentIndirectCall\n"
RESTORE_ALL
"ret\n"
@@ -1739,7 +1739,7 @@ extern "C" __attribute((naked)) void __bolt_instr_indirect_tailcall()
// clang-format off
__asm__ __volatile__(SAVE_ALL
- "ldp x0, x1, [sp, #288]\n"
+ "ldp x0, x1, [sp, #272]\n"
"bl instrumentIndirectCall\n"
RESTORE_ALL
"ret\n"
diff --git a/bolt/test/runtime/AArch64/instrumentation-ind-call.c b/bolt/test/runtime/AArch64/instrumentation-ind-call.c
index 697fc560ddb73..c3ac79f483c27 100644
--- a/bolt/test/runtime/AArch64/instrumentation-ind-call.c
+++ b/bolt/test/runtime/AArch64/instrumentation-ind-call.c
@@ -39,24 +39,17 @@ RUN: %t.instrumented | FileCheck %s --check-prefix=CHECK-INSTR-INDIR-CALL-FUNC
CHECK-INSTR-INDIRECTREG: mov w0, #0xa
CHECK-INSTR-INDIRECTREG-NEXT: mov w1, #0x14
// store current values
-CHECK-INSTR-INDIRECTREG-NEXT: stp x0, x1, {{.*}}
-// store the indirect target address in x0
-CHECK-INSTR-INDIRECTREG-NEXT: mov x0, x8
-// load callsite id into x1
-CHECK-INSTR-INDIRECTREG-NEXT: movk x1, {{.*}}
-CHECK-INSTR-INDIRECTREG-NEXT: movk x1, {{.*}}
-CHECK-INSTR-INDIRECTREG-NEXT: movk x1, {{.*}}
-CHECK-INSTR-INDIRECTREG-NEXT: movk x1, {{.*}}
-CHECK-INSTR-INDIRECTREG-NEXT: stp x0, x1, {{.*}}
-CHECK-INSTR-INDIRECTREG-NEXT: str x30, {{.*}}
+CHECK-INSTR-INDIRECTREG-NEXT: stp x0, x30, {{.*}}
+// load callsite id
+CHECK-INSTR-INDIRECTREG-NEXT: mov x0, #0x0
+CHECK-INSTR-INDIRECTREG-NEXT: stp x8, x0, {{.*}}
CHECK-INSTR-INDIRECTREG-NEXT: adrp x8, {{.*}}
CHECK-INSTR-INDIRECTREG-NEXT: add x8, {{.*}}
// call instrumentation library handler function
CHECK-INSTR-INDIRECTREG-NEXT: blr x8
// restore registers saved before
-CHECK-INSTR-INDIRECTREG-NEXT: ldr x30, {{.*}}
-CHECK-INSTR-INDIRECTREG-NEXT: ldr x8
-CHECK-INSTR-INDIRECTREG-NEXT: ldp x0, x1, {{.*}}
+CHECK-INSTR-INDIRECTREG-NEXT: ldr x8, [sp], #0x10
+CHECK-INSTR-INDIRECTREG-NEXT: ldp x0, x30, [sp], #0x10
// original indirect call instruction
CHECK-INSTR-INDIRECTREG-NEXT: blr x8
More information about the llvm-commits
mailing list