[llvm] WIP: [bolt][aarch64] Change indirect call instrumentation snippet (PR #180229)
Alexey Moksyakov via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 10 05:24:12 PST 2026
https://github.com/yavtuk updated https://github.com/llvm/llvm-project/pull/180229
>From a954a633b83d9c880f5c4b2f8dcd2579c403b627 Mon Sep 17 00:00:00 2001
From: Alexey Moksyakov <moksyakov.alexey at huawei.com>
Date: Thu, 27 Nov 2025 23:48:10 +0300
Subject: [PATCH] [bolt][aarch64] Change indirect call instrumentation snippet
Indirect call instrumentation snippet uses x16 register in exit
handler to go to destination target
__bolt_instr_ind_call_handler_func:
msr nzcv, x1
ldp x0, x1, [sp], #16
ldr x16, [sp], #16
ldp x0, x1, [sp], #16
br x16 <-----
This patch adds the instrumentation snippet by calling instrumentation
runtime library through indirect call instruction and adding the wrapper
to store/load target value and the register for original indirect instruction.
Example:
mov x16, foo
infirectCall:
adrp x8, Label
add x8, x8, #:lo12:Label
blr x8
Before:
Instrumented indirect call:
stp x0, x1, [sp, #-16]!
mov x0, x8
movk x1, #0x0, lsl #48
movk x1, #0x0, lsl #32
movk x1, #0x0, lsl #16
movk x1, #0x0
stp x0, x1, [sp, #-16]!
adrp x0, __bolt_instr_ind_call_handler_func
add x0, x0, #:lo12:__bolt_instr_ind_call_handler_func
blr x0
__bolt_instr_ind_call_handler: (exit snippet)
msr nzcv, x1
ldp x0, x1, [sp], #16
ldr x16, [sp], #16
ldp x0, x1, [sp], #16
br x16 <- overwrites the original value in X16
__bolt_instr_ind_call_handler_func: (entry snippet)
stp x0, x1, [sp, #-16]!
mrs x1, nzcv
adrp x0, __bolt_instr_ind_call_handler
add x0, x0, x0, #:lo12:__bolt_instr_ind_call_handler
ldr x0, [x0]
cmp x0, #0x0
b.eq __bolt_instr_ind_call_handler
str x30, [sp, #-16]!
blr x0 <--- runtime lib store/load all regs
ldr x30, [sp], #16
b __bolt_instr_ind_call_handler
_________________________________________________________________________
After:
mov x16, foo
infirectCall:
adrp x8, Label
add x8, x8, #:lo12:Label
blr x8
Instrumented indirect call:
stp x0, x1, [sp, #-16]!
mov x0, x8
movk x1, #0x0, lsl #48
movk x1, #0x0, lsl #32
movk x1, #0x0, lsl #16
movk x1, #0x0
stp x0, x1, [sp, #-16]!
str x30, [sp, #-16]!
adrp x8, __bolt_instr_ind_call_handler_func
add x8, x8, #:lo12:__bolt_instr_ind_call_handler_func
blr x8 <--- call trampoline instr lib
ldr x30, [sp], #16
ldr x8, [sp], #16
ldp x0, x1, [sp], #16
blr x8 <--- original indirect call instruction
// don't touch regs besides x0, x1
__bolt_instr_ind_call_handler: (exit snippet)
ret <---- return to original function with indirect call
__bolt_instr_ind_call_handler_func: (entry snippet)
adrp x0, __bolt_instr_ind_call_handler
add x0, x0, #:lo12:__bolt_instr_ind_call_handler
ldr x0, [x0]
cmp x0, #0x0
b.eq __bolt_instr_ind_call_handler
str x30, [sp, #-16]!
blr x0 <--- runtime lib store/load all regs
ldr x30, [sp], #16
b __bolt_instr_ind_call_handler
---
bolt/include/bolt/Core/MCPlusBuilder.h | 5 +
.../Target/AArch64/AArch64MCPlusBuilder.cpp | 144 +++++++++++-------
bolt/runtime/instr.cpp | 6 +
bolt/runtime/sys_aarch64.h | 6 +-
.../AArch64/instrumentation-ind-call.c | 57 ++++++-
5 files changed, 160 insertions(+), 58 deletions(-)
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
index e571e91d85135..595e6d66da90e 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -545,6 +545,11 @@ class MCPlusBuilder {
llvm_unreachable("not implemented");
}
+ virtual void createDirectBranch(MCInst &Inst, const MCSymbol *Target,
+ MCContext *Ctx) {
+ llvm_unreachable("not implemented");
+ }
+
virtual MCPhysReg getX86R11() const { llvm_unreachable("not implemented"); }
virtual unsigned getShortBranchOpcode(unsigned Opcode) const {
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index aa5cf3c671cdc..a6bbd9e875e30 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -48,14 +48,14 @@ static cl::opt<bool> NoLSEAtomics(
namespace {
-static void getSystemFlag(MCInst &Inst, MCPhysReg RegName) {
+[[maybe_unused]] static void getSystemFlag(MCInst &Inst, MCPhysReg RegName) {
Inst.setOpcode(AArch64::MRS);
Inst.clear();
Inst.addOperand(MCOperand::createReg(RegName));
Inst.addOperand(MCOperand::createImm(AArch64SysReg::NZCV));
}
-static void setSystemFlag(MCInst &Inst, MCPhysReg RegName) {
+[[maybe_unused]] static void setSystemFlag(MCInst &Inst, MCPhysReg RegName) {
Inst.setOpcode(AArch64::MSR);
Inst.clear();
Inst.addOperand(MCOperand::createImm(AArch64SysReg::NZCV));
@@ -2178,6 +2178,14 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
convertJmpToTailCall(Inst);
}
+ void createDirectBranch(MCInst &Inst, const MCSymbol *Target,
+ MCContext *Ctx) override {
+ Inst.setOpcode(AArch64::B);
+ Inst.clear();
+ Inst.addOperand(MCOperand::createExpr(getTargetExprFor(
+ Inst, MCSymbolRefExpr::create(Target, *Ctx), *Ctx, 0)));
+ }
+
bool analyzeBranch(InstructionIterator Begin, InstructionIterator End,
const MCSymbol *&TBB, const MCSymbol *&FBB,
MCInst *&CondBranch,
@@ -2535,21 +2543,14 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
}
InstructionListType createInstrumentedIndCallHandlerExitBB() const override {
- InstructionListType Insts(5);
// Code sequence for instrumented indirect call handler:
- // msr nzcv, x1
- // ldp x0, x1, [sp], #16
- // ldr x16, [sp], #16
- // ldp x0, x1, [sp], #16
- // br x16
- setSystemFlag(Insts[0], AArch64::X1);
- createPopRegisters(Insts[1], AArch64::X0, AArch64::X1);
- // Here we load address of the next function which should be called in the
- // original binary to X16 register. Writing to X16 is permitted without
- // needing to restore.
- loadReg(Insts[2], AArch64::X16, AArch64::SP);
- createPopRegisters(Insts[3], AArch64::X0, AArch64::X1);
- createIndirectBranch(Insts[4], AArch64::X16, 0);
+ // ret
+
+ InstructionListType Insts;
+
+ Insts.emplace_back();
+ createReturn(Insts.back());
+
return Insts;
}
@@ -2625,39 +2626,62 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
MCSymbol *HandlerFuncAddr,
int CallSiteID,
MCContext *Ctx) override {
- InstructionListType Insts;
// Code sequence used to enter indirect call instrumentation helper:
- // stp x0, x1, [sp, #-16]! createPushRegisters
- // mov target x0 convertIndirectCallToLoad -> orr x0 target xzr
+ // stp x0, x1, [sp, #-16]! createPushRegisters (1)
+ // mov target, x0 convertIndirectCallToLoad -> orr x0 target xzr
// mov x1 CallSiteID createLoadImmediate ->
// movk x1, #0x0, lsl #48
// movk x1, #0x0, lsl #32
// movk x1, #0x0, lsl #16
// movk x1, #0x0
- // stp x0, x1, [sp, #-16]!
- // bl *HandlerFuncAddr createIndirectCall ->
+ // stp x0, x1, [sp, #-16]! (2)
+ // str x30, [sp, #-16]! (3)
// adr x0 *HandlerFuncAddr -> adrp + add
- // blr x0
+ // blr x0 (__bolt_instr_ind_call_handler_func)
+ // ldr x30, [sp], #16 (3)
+ // ldr target ; restore target value (2)
+ // ldp x0, x1, [sp], #16 (1)
+ // blr target
+
+ InstructionListType Insts;
Insts.emplace_back();
- createPushRegisters(Insts.back(), AArch64::X0, AArch64::X1);
+ createPushRegisters(Insts.back(), getIntArgRegister(0),
+ getIntArgRegister(1));
Insts.emplace_back(CallInst);
- convertIndirectCallToLoad(Insts.back(), AArch64::X0);
+ convertIndirectCallToLoad(Insts.back(), getIntArgRegister(0));
InstructionListType LoadImm =
createLoadImmediate(getIntArgRegister(1), CallSiteID);
Insts.insert(Insts.end(), LoadImm.begin(), LoadImm.end());
Insts.emplace_back();
- createPushRegisters(Insts.back(), AArch64::X0, AArch64::X1);
+ createPushRegisters(Insts.back(), getIntArgRegister(0),
+ getIntArgRegister(1));
+ Insts.emplace_back();
+ storeReg(Insts.back(), AArch64::LR, getSpRegister(/*Size*/ 8));
Insts.resize(Insts.size() + 2);
- InstructionListType Addr =
- materializeAddress(HandlerFuncAddr, Ctx, AArch64::X0);
+ InstructionListType Addr = materializeAddress(
+ HandlerFuncAddr, Ctx, CallInst.getOperand(0).getReg());
assert(Addr.size() == 2 && "Invalid Addr size");
std::copy(Addr.begin(), Addr.end(), Insts.end() - Addr.size());
+
+ Insts.emplace_back();
+ createIndirectCallInst(Insts.back(), false,
+ CallInst.getOperand(0).getReg());
+
Insts.emplace_back();
- createIndirectCallInst(Insts.back(), isTailCall(CallInst), AArch64::X0);
+ loadReg(Insts.back(), AArch64::LR, getSpRegister(/*Size*/ 8));
- // Carry over metadata including tail call marker if present.
- stripAnnotations(Insts.back());
- moveAnnotations(std::move(CallInst), Insts.back());
+ Insts.emplace_back();
+ createPopRegisters(Insts.back(), getIntArgRegister(0),
+ getIntArgRegister(1));
+
+ Insts.emplace_back();
+ loadReg(Insts.back(), CallInst.getOperand(0).getReg(), AArch64::SP);
+
+ Insts.emplace_back();
+ createPopRegisters(Insts.back(), getIntArgRegister(0),
+ getIntArgRegister(1));
+
+ Insts.emplace_back(CallInst);
return Insts;
}
@@ -2666,12 +2690,10 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
createInstrumentedIndCallHandlerEntryBB(const MCSymbol *InstrTrampoline,
const MCSymbol *IndCallHandler,
MCContext *Ctx) override {
- // Code sequence used to check whether InstrTampoline was initialized
+ // Code sequence used to check whether InstrTrampoline was initialized
// and call it if so, returns via IndCallHandler
- // stp x0, x1, [sp, #-16]!
- // mrs x1, nzcv
- // adr x0, InstrTrampoline -> adrp + add
- // ldr x0, [x0]
+ // adrp x0, InstrTrampoline
+ // ldr x0, [x0, #lo12:InstrTrampoline]
// subs x0, x0, #0x0
// b.eq IndCallHandler
// str x30, [sp, #-16]!
@@ -2679,30 +2701,42 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
// ldr x30, [sp], #16
// b IndCallHandler
InstructionListType Insts;
+
+ // load handler address
+ MCInst InstAdrp;
+ InstAdrp.setOpcode(AArch64::ADRP);
+ InstAdrp.addOperand(MCOperand::createReg(getIntArgRegister(0)));
+ InstAdrp.addOperand(MCOperand::createImm(0));
+ setOperandToSymbolRef(InstAdrp, /* OpNum */ 1, InstrTrampoline,
+ /* Addend */ 0, Ctx, ELF::R_AARCH64_ADR_GOT_PAGE);
+ Insts.emplace_back(InstAdrp);
+
+ MCInst InstLoad;
+ InstLoad.setOpcode(AArch64::LDRXui);
+ InstLoad.addOperand(MCOperand::createReg(getIntArgRegister(0)));
+ InstLoad.addOperand(MCOperand::createReg(getIntArgRegister(0)));
+ InstLoad.addOperand(MCOperand::createImm(0));
+ setOperandToSymbolRef(InstLoad, /* OpNum */ 2, InstrTrampoline,
+ /* Addend */ 0, Ctx, ELF::R_AARCH64_LD64_GOT_LO12_NC);
+ Insts.emplace_back(InstLoad);
+
+ InstructionListType CmpJmp =
+ createCmpJE(getIntArgRegister(0), 0, IndCallHandler, Ctx);
+ Insts.insert(Insts.end(), CmpJmp.begin(), CmpJmp.end());
+
Insts.emplace_back();
- createPushRegisters(Insts.back(), AArch64::X0, AArch64::X1);
- Insts.emplace_back();
- getSystemFlag(Insts.back(), getIntArgRegister(1));
- Insts.emplace_back();
- Insts.emplace_back();
- InstructionListType Addr =
- materializeAddress(InstrTrampoline, Ctx, AArch64::X0);
- std::copy(Addr.begin(), Addr.end(), Insts.end() - Addr.size());
- assert(Addr.size() == 2 && "Invalid Addr size");
- Insts.emplace_back();
- loadReg(Insts.back(), AArch64::X0, AArch64::X0);
- InstructionListType cmpJmp =
- createCmpJE(AArch64::X0, 0, IndCallHandler, Ctx);
- Insts.insert(Insts.end(), cmpJmp.begin(), cmpJmp.end());
- Insts.emplace_back();
- storeReg(Insts.back(), AArch64::LR, AArch64::SP);
+ storeReg(Insts.back(), AArch64::LR, getSpRegister(/*Size*/ 8));
+
Insts.emplace_back();
Insts.back().setOpcode(AArch64::BLR);
- Insts.back().addOperand(MCOperand::createReg(AArch64::X0));
+ Insts.back().addOperand(MCOperand::createReg(getIntArgRegister(0)));
+
Insts.emplace_back();
- loadReg(Insts.back(), AArch64::LR, AArch64::SP);
+ loadReg(Insts.back(), AArch64::LR, getSpRegister(/*Size*/ 8));
+
Insts.emplace_back();
- createDirectCall(Insts.back(), IndCallHandler, Ctx, /*IsTailCall*/ true);
+ createDirectBranch(Insts.back(), IndCallHandler, Ctx);
+
return Insts;
}
diff --git a/bolt/runtime/instr.cpp b/bolt/runtime/instr.cpp
index c0b8fc3d807c9..68cb36dd3fae4 100644
--- a/bolt/runtime/instr.cpp
+++ b/bolt/runtime/instr.cpp
@@ -1694,6 +1694,9 @@ instrumentIndirectCall(uint64_t Target, uint64_t IndCallID) {
extern "C" __attribute((naked)) void __bolt_instr_indirect_call()
{
#if defined(__aarch64__)
+ // the target address is placed on stack
+ // the identifier of the indirect call site is placed in X1 register
+
// clang-format off
__asm__ __volatile__(SAVE_ALL
"ldp x0, x1, [sp, #288]\n"
@@ -1731,6 +1734,9 @@ extern "C" __attribute((naked)) void __bolt_instr_indirect_call()
extern "C" __attribute((naked)) void __bolt_instr_indirect_tailcall()
{
#if defined(__aarch64__)
+ // the target address is placed on stack
+ // the identifier of the indirect call site is placed in X1 register
+
// clang-format off
__asm__ __volatile__(SAVE_ALL
"ldp x0, x1, [sp, #288]\n"
diff --git a/bolt/runtime/sys_aarch64.h b/bolt/runtime/sys_aarch64.h
index b1d04f9d558e0..9cb8e022f58df 100644
--- a/bolt/runtime/sys_aarch64.h
+++ b/bolt/runtime/sys_aarch64.h
@@ -18,10 +18,12 @@
"stp x24, x25, [sp, #-16]!\n" \
"stp x26, x27, [sp, #-16]!\n" \
"stp x28, x29, [sp, #-16]!\n" \
- "str x30, [sp,#-16]!\n"
+ "mrs x29, nzcv\n" \
+ "stp x29, x30, [sp, #-16]!\n"
// Mirrors SAVE_ALL
#define RESTORE_ALL \
- "ldr x30, [sp], #16\n" \
+ "ldp x29, x30, [sp], #16\n" \
+ "msr nzcv, x29\n" \
"ldp x28, x29, [sp], #16\n" \
"ldp x26, x27, [sp], #16\n" \
"ldp x24, x25, [sp], #16\n" \
diff --git a/bolt/test/runtime/AArch64/instrumentation-ind-call.c b/bolt/test/runtime/AArch64/instrumentation-ind-call.c
index f9056da333b4e..697fc560ddb73 100644
--- a/bolt/test/runtime/AArch64/instrumentation-ind-call.c
+++ b/bolt/test/runtime/AArch64/instrumentation-ind-call.c
@@ -15,9 +15,64 @@ int main() {
REQUIRES: system-linux,bolt-runtime
RUN: %clang %cflags %s -o %t.exe -Wl,-q -no-pie -fpie
+RUN: llvm-objdump --disassemble-symbols=main %t.exe \
+RUN: | FileCheck %s --check-prefix=CHECKINDIRECTREG
+
+CHECKINDIRECTREG: mov w0, #0xa
+CHECKINDIRECTREG-NEXT: mov w1, #0x14
+CHECKINDIRECTREG-NEXT: blr x8
RUN: llvm-bolt %t.exe --instrument --instrumentation-file=%t.fdata \
-RUN: -o %t.instrumented
+RUN: -o %t.instrumented \
+RUN: | FileCheck %s --check-prefix=CHECK-INSTR-LOG
+
+CHECK-INSTR-LOG: BOLT-INSTRUMENTER: Number of indirect call site descriptors: 1
+
+RUN: llvm-objdump --disassemble-symbols=main %t.instrumented \
+RUN: | FileCheck %s --check-prefix=CHECK-INSTR-INDIRECTREG
+
+RUN: llvm-objdump --disassemble-symbols=__bolt_instr_ind_call_handler \
+RUN: %t.instrumented | FileCheck %s --check-prefix=CHECK-INSTR-INDIR-CALL
+RUN: llvm-objdump --disassemble-symbols=__bolt_instr_ind_call_handler_func \
+RUN: %t.instrumented | FileCheck %s --check-prefix=CHECK-INSTR-INDIR-CALL-FUNC
+
+CHECK-INSTR-INDIRECTREG: mov w0, #0xa
+CHECK-INSTR-INDIRECTREG-NEXT: mov w1, #0x14
+// store current values
+CHECK-INSTR-INDIRECTREG-NEXT: stp x0, x1, {{.*}}
+// store the indirect target address in x0
+CHECK-INSTR-INDIRECTREG-NEXT: mov x0, x8
+// load callsite id into x1
+CHECK-INSTR-INDIRECTREG-NEXT: movk x1, {{.*}}
+CHECK-INSTR-INDIRECTREG-NEXT: movk x1, {{.*}}
+CHECK-INSTR-INDIRECTREG-NEXT: movk x1, {{.*}}
+CHECK-INSTR-INDIRECTREG-NEXT: movk x1, {{.*}}
+CHECK-INSTR-INDIRECTREG-NEXT: stp x0, x1, {{.*}}
+CHECK-INSTR-INDIRECTREG-NEXT: str x30, {{.*}}
+CHECK-INSTR-INDIRECTREG-NEXT: adrp x8, {{.*}}
+CHECK-INSTR-INDIRECTREG-NEXT: add x8, {{.*}}
+// call instrumentation library handler function
+CHECK-INSTR-INDIRECTREG-NEXT: blr x8
+// restore registers saved before
+CHECK-INSTR-INDIRECTREG-NEXT: ldr x30, {{.*}}
+CHECK-INSTR-INDIRECTREG-NEXT: ldr x8
+CHECK-INSTR-INDIRECTREG-NEXT: ldp x0, x1, {{.*}}
+// original indirect call instruction
+CHECK-INSTR-INDIRECTREG-NEXT: blr x8
+
+
+CHECK-INSTR-INDIR-CALL: __bolt_instr_ind_call_handler>:
+CHECK-INSTR-INDIR-CALL-NEXT: ret
+
+CHECK-INSTR-INDIR-CALL-FUNC: __bolt_instr_ind_call_handler_func>:
+CHECK-INSTR-INDIR-CALL-FUNC-NEXT: adrp x0
+CHECK-INSTR-INDIR-CALL-FUNC-NEXT: ldr x0
+CHECK-INSTR-INDIR-CALL-FUNC-NEXT: cmp x0, #0x0
+CHECK-INSTR-INDIR-CALL-FUNC-NEXT: b.eq{{.*}}__bolt_instr_ind_call_handler
+CHECK-INSTR-INDIR-CALL-FUNC-NEXT: str x30
+CHECK-INSTR-INDIR-CALL-FUNC-NEXT: blr x0
+CHECK-INSTR-INDIR-CALL-FUNC-NEXT: ldr x30
+CHECK-INSTR-INDIR-CALL-FUNC-NEXT: b{{.*}}__bolt_instr_ind_call_handler
# Instrumented program needs to finish returning zero
RUN: %t.instrumented | FileCheck %s -check-prefix=CHECK-OUTPUT
More information about the llvm-commits
mailing list