[llvm] [BOLT][AArch64] Fixed indirect call instrumentation snippet (PR #141918)
Alexey Moksyakov via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 25 02:24:08 PST 2025
https://github.com/yavtuk updated https://github.com/llvm/llvm-project/pull/141918
>From 6796c375151bdddfcbf0adaa383e80a503e02232 Mon Sep 17 00:00:00 2001
From: Moksyakov Alexey <moksyakov.alexey at huawei.com>
Date: Thu, 29 May 2025 07:28:13 +0000
Subject: [PATCH] [bolt][aarch64] Change indirect call instrumentation snippet
Indirect call instrumentation snippet uses x16 register in exit
handler to go to destination target
__bolt_instr_ind_call_handler_func:
msr nzcv, x1
ldp x0, x1, [sp], #16
ldr x16, [sp], #16
ldp x0, x1, [sp], #16
br x16 <-----
This patch adds the instrumentation snippet by calling instrumentation
runtime library through indirect call instruction and adding the wrapper
to store/load target value and the register for original indirect instruction.
Example:
mov x16, foo
infirectCall:
adrp x8, Label
add x8, x8, #:lo12:Label
blr x8
Before:
Instrumented indirect call:
stp x0, x1, [sp, #-16]!
mov x0, x8
movk x1, #0x0, lsl #48
movk x1, #0x0, lsl #32
movk x1, #0x0, lsl #16
movk x1, #0x0
stp x0, x1, [sp, #-16]!
adrp x0, __bolt_instr_ind_call_handler_func
add x0, x0, #:lo12:__bolt_instr_ind_call_handler_func
blr x0
__bolt_instr_ind_call_handler: (exit snippet)
msr nzcv, x1
ldp x0, x1, [sp], #16
ldr x16, [sp], #16
ldp x0, x1, [sp], #16
br x16 <- overwrites the original value in X16
__bolt_instr_ind_call_handler_func: (entry snippet)
stp x0, x1, [sp, #-16]!
mrs x1, nzcv
adrp x0, __bolt_instr_ind_call_handler
add x0, x0, x0, #:lo12:__bolt_instr_ind_call_handler
ldr x0, [x0]
cmp x0, #0x0
b.eq __bolt_instr_ind_call_handler
str x30, [sp, #-16]!
blr x0 <--- runtime lib store/load all regs
ldr x30, [sp], #16
b __bolt_instr_ind_call_handler
_________________________________________________________________________
After:
mov x16, foo
infirectCall:
adrp x8, Label
add x8, x8, #:lo12:Label
blr x8
Instrumented indirect call:
stp x0, x1, [sp, #-16]!
mov x0, x8
movk x1, #0x0, lsl #48
movk x1, #0x0, lsl #32
movk x1, #0x0, lsl #16
movk x1, #0x0
stp x0, x30, [sp, #-16]!
adrp x8, __bolt_instr_ind_call_handler_func
add x8, x8, #:lo12:__bolt_instr_ind_call_handler_func
blr x8 <--- call trampoline instr lib
ldp x0, x30, [sp], #16
mov x8, x0 <---- restore original target
ldp x0, x1, [sp], #16
blr x8 <--- original indirect call instruction
// don't touch regs besides x0, x1
__bolt_instr_ind_call_handler: (exit snippet)
ret <---- return to original function with indirect call
__bolt_instr_ind_call_handler_func: (entry snippet)
adrp x0, __bolt_instr_ind_call_handler
add x0, x0, #:lo12:__bolt_instr_ind_call_handler
ldr x0, [x0]
cmp x0, #0x0
b.eq __bolt_instr_ind_call_handler
str x30, [sp, #-16]!
blr x0 <--- runtime lib store/load all regs
ldr x30, [sp], #16
b __bolt_instr_ind_call_handler
Signed-off-by: Moksyakov Alexey <moksyakov.alexey at huawei.com>
---
bolt/include/bolt/Core/MCPlusBuilder.h | 5 +
bolt/lib/Passes/Instrumentation.cpp | 9 +-
.../Target/AArch64/AArch64MCPlusBuilder.cpp | 141 +++++++++++-------
bolt/runtime/instr.cpp | 10 +-
bolt/runtime/sys_aarch64.h | 6 +-
.../AArch64/instrumentation-ind-call.c | 56 ++++++-
6 files changed, 164 insertions(+), 63 deletions(-)
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
index f543d2e5d79a1..510c84cc27e88 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -538,6 +538,11 @@ class MCPlusBuilder {
llvm_unreachable("not implemented");
}
+ virtual void createDirectBranch(MCInst &Inst, const MCSymbol *Target,
+ MCContext *Ctx) {
+ llvm_unreachable("not implemented");
+ }
+
virtual MCPhysReg getX86R11() const { llvm_unreachable("not implemented"); }
virtual unsigned getShortBranchOpcode(unsigned Opcode) const {
diff --git a/bolt/lib/Passes/Instrumentation.cpp b/bolt/lib/Passes/Instrumentation.cpp
index 150461b020f06..10479f35d8f9d 100644
--- a/bolt/lib/Passes/Instrumentation.cpp
+++ b/bolt/lib/Passes/Instrumentation.cpp
@@ -305,9 +305,12 @@ void Instrumentation::instrumentIndirectTarget(BinaryBasicBlock &BB,
: IndCallHandlerExitBBFunction->getSymbol(),
IndCallSiteID, &*BC.Ctx);
- Iter = BB.eraseInstruction(Iter);
- Iter = insertInstructions(CounterInstrs, BB, Iter);
- --Iter;
+ if (!BC.isAArch64()) {
+ Iter = BB.eraseInstruction(Iter);
+ Iter = insertInstructions(CounterInstrs, BB, Iter);
+ --Iter;
+ } else
+ Iter = insertInstructions(CounterInstrs, BB, Iter);
}
bool Instrumentation::instrumentOneTarget(
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index d4fd4b78f382d..9b9d56841fd8d 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -48,14 +48,14 @@ static cl::opt<bool> NoLSEAtomics(
namespace {
-static void getSystemFlag(MCInst &Inst, MCPhysReg RegName) {
+[[maybe_unused]] static void getSystemFlag(MCInst &Inst, MCPhysReg RegName) {
Inst.setOpcode(AArch64::MRS);
Inst.clear();
Inst.addOperand(MCOperand::createReg(RegName));
Inst.addOperand(MCOperand::createImm(AArch64SysReg::NZCV));
}
-static void setSystemFlag(MCInst &Inst, MCPhysReg RegName) {
+[[maybe_unused]] static void setSystemFlag(MCInst &Inst, MCPhysReg RegName) {
Inst.setOpcode(AArch64::MSR);
Inst.clear();
Inst.addOperand(MCOperand::createImm(AArch64SysReg::NZCV));
@@ -2114,6 +2114,14 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
convertJmpToTailCall(Inst);
}
+ void createDirectBranch(MCInst &Inst, const MCSymbol *Target,
+ MCContext *Ctx) override {
+ Inst.setOpcode(AArch64::B);
+ Inst.clear();
+ Inst.addOperand(MCOperand::createExpr(getTargetExprFor(
+ Inst, MCSymbolRefExpr::create(Target, *Ctx), *Ctx, 0)));
+ }
+
bool analyzeBranch(InstructionIterator Begin, InstructionIterator End,
const MCSymbol *&TBB, const MCSymbol *&FBB,
MCInst *&CondBranch,
@@ -2471,21 +2479,14 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
}
InstructionListType createInstrumentedIndCallHandlerExitBB() const override {
- InstructionListType Insts(5);
// Code sequence for instrumented indirect call handler:
- // msr nzcv, x1
- // ldp x0, x1, [sp], #16
- // ldr x16, [sp], #16
- // ldp x0, x1, [sp], #16
- // br x16
- setSystemFlag(Insts[0], AArch64::X1);
- createPopRegisters(Insts[1], AArch64::X0, AArch64::X1);
- // Here we load address of the next function which should be called in the
- // original binary to X16 register. Writing to X16 is permitted without
- // needing to restore.
- loadReg(Insts[2], AArch64::X16, AArch64::SP);
- createPopRegisters(Insts[3], AArch64::X0, AArch64::X1);
- createIndirectBranch(Insts[4], AArch64::X16, 0);
+ // ret
+
+ InstructionListType Insts;
+
+ Insts.emplace_back();
+ createReturn(Insts.back());
+
return Insts;
}
@@ -2561,39 +2562,59 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
MCSymbol *HandlerFuncAddr,
int CallSiteID,
MCContext *Ctx) override {
- InstructionListType Insts;
// Code sequence used to enter indirect call instrumentation helper:
- // stp x0, x1, [sp, #-16]! createPushRegisters
- // mov target x0 convertIndirectCallToLoad -> orr x0 target xzr
+ // stp x0, x1, [sp, #-16]! createPushRegisters (1)
+ // mov target, x0 convertIndirectCallToLoad -> orr x0 target xzr
// mov x1 CallSiteID createLoadImmediate ->
// movk x1, #0x0, lsl #48
// movk x1, #0x0, lsl #32
// movk x1, #0x0, lsl #16
// movk x1, #0x0
- // stp x0, x1, [sp, #-16]!
- // bl *HandlerFuncAddr createIndirectCall ->
+ // stp x0, x30, [sp, #-16]! (2)
// adr x0 *HandlerFuncAddr -> adrp + add
- // blr x0
+ // blr x0 (__bolt_instr_ind_call_handler_func)
+ // ldp x0, x30, [sp], #16 (2)
+ // mov x0, target ; move target address to used register
+ // ldp x0, x1, [sp], #16 (1)
+
+ InstructionListType Insts;
Insts.emplace_back();
- createPushRegisters(Insts.back(), AArch64::X0, AArch64::X1);
+ createPushRegisters(Insts.back(), getIntArgRegister(0),
+ getIntArgRegister(1));
Insts.emplace_back(CallInst);
- convertIndirectCallToLoad(Insts.back(), AArch64::X0);
+ convertIndirectCallToLoad(Insts.back(), getIntArgRegister(0));
InstructionListType LoadImm =
createLoadImmediate(getIntArgRegister(1), CallSiteID);
Insts.insert(Insts.end(), LoadImm.begin(), LoadImm.end());
Insts.emplace_back();
- createPushRegisters(Insts.back(), AArch64::X0, AArch64::X1);
+ createPushRegisters(Insts.back(), getIntArgRegister(0), AArch64::LR);
Insts.resize(Insts.size() + 2);
- InstructionListType Addr =
- materializeAddress(HandlerFuncAddr, Ctx, AArch64::X0);
+ InstructionListType Addr = materializeAddress(
+ HandlerFuncAddr, Ctx, CallInst.getOperand(0).getReg());
assert(Addr.size() == 2 && "Invalid Addr size");
std::copy(Addr.begin(), Addr.end(), Insts.end() - Addr.size());
+
Insts.emplace_back();
- createIndirectCallInst(Insts.back(), isTailCall(CallInst), AArch64::X0);
+ createIndirectCallInst(Insts.back(), false,
+ CallInst.getOperand(0).getReg());
- // Carry over metadata including tail call marker if present.
- stripAnnotations(Insts.back());
- moveAnnotations(std::move(CallInst), Insts.back());
+ Insts.emplace_back();
+ createPopRegisters(Insts.back(), getIntArgRegister(0), AArch64::LR);
+
+ // move x0 to indirect call register
+ Insts.emplace_back();
+ Insts.back().setOpcode(AArch64::ORRXrs);
+ Insts.back().insert(Insts.back().begin(),
+ MCOperand::createReg(CallInst.getOperand(0).getReg()));
+ Insts.back().insert(Insts.back().begin() + 1,
+ MCOperand::createReg(AArch64::XZR));
+ Insts.back().insert(Insts.back().begin() + 2,
+ MCOperand::createReg(getIntArgRegister(0)));
+ Insts.back().insert(Insts.back().begin() + 3, MCOperand::createImm(0));
+
+ Insts.emplace_back();
+ createPopRegisters(Insts.back(), getIntArgRegister(0),
+ getIntArgRegister(1));
return Insts;
}
@@ -2602,12 +2623,10 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
createInstrumentedIndCallHandlerEntryBB(const MCSymbol *InstrTrampoline,
const MCSymbol *IndCallHandler,
MCContext *Ctx) override {
- // Code sequence used to check whether InstrTampoline was initialized
+ // Code sequence used to check whether InstrTrampoline was initialized
// and call it if so, returns via IndCallHandler
- // stp x0, x1, [sp, #-16]!
- // mrs x1, nzcv
- // adr x0, InstrTrampoline -> adrp + add
- // ldr x0, [x0]
+ // adrp x0, InstrTrampoline
+ // ldr x0, [x0, #lo12:InstrTrampoline]
// subs x0, x0, #0x0
// b.eq IndCallHandler
// str x30, [sp, #-16]!
@@ -2615,30 +2634,42 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
// ldr x30, [sp], #16
// b IndCallHandler
InstructionListType Insts;
+
+ // load handler address
+ MCInst InstAdrp;
+ InstAdrp.setOpcode(AArch64::ADRP);
+ InstAdrp.addOperand(MCOperand::createReg(getIntArgRegister(0)));
+ InstAdrp.addOperand(MCOperand::createImm(0));
+ setOperandToSymbolRef(InstAdrp, /* OpNum */ 1, InstrTrampoline,
+ /* Addend */ 0, Ctx, ELF::R_AARCH64_ADR_GOT_PAGE);
+ Insts.emplace_back(InstAdrp);
+
+ MCInst InstLoad;
+ InstLoad.setOpcode(AArch64::LDRXui);
+ InstLoad.addOperand(MCOperand::createReg(getIntArgRegister(0)));
+ InstLoad.addOperand(MCOperand::createReg(getIntArgRegister(0)));
+ InstLoad.addOperand(MCOperand::createImm(0));
+ setOperandToSymbolRef(InstLoad, /* OpNum */ 2, InstrTrampoline,
+ /* Addend */ 0, Ctx, ELF::R_AARCH64_LD64_GOT_LO12_NC);
+ Insts.emplace_back(InstLoad);
+
+ InstructionListType CmpJmp =
+ createCmpJE(getIntArgRegister(0), 0, IndCallHandler, Ctx);
+ Insts.insert(Insts.end(), CmpJmp.begin(), CmpJmp.end());
+
Insts.emplace_back();
- createPushRegisters(Insts.back(), AArch64::X0, AArch64::X1);
- Insts.emplace_back();
- getSystemFlag(Insts.back(), getIntArgRegister(1));
- Insts.emplace_back();
- Insts.emplace_back();
- InstructionListType Addr =
- materializeAddress(InstrTrampoline, Ctx, AArch64::X0);
- std::copy(Addr.begin(), Addr.end(), Insts.end() - Addr.size());
- assert(Addr.size() == 2 && "Invalid Addr size");
- Insts.emplace_back();
- loadReg(Insts.back(), AArch64::X0, AArch64::X0);
- InstructionListType cmpJmp =
- createCmpJE(AArch64::X0, 0, IndCallHandler, Ctx);
- Insts.insert(Insts.end(), cmpJmp.begin(), cmpJmp.end());
- Insts.emplace_back();
- storeReg(Insts.back(), AArch64::LR, AArch64::SP);
+ storeReg(Insts.back(), AArch64::LR, getSpRegister(/*Size*/ 8));
+
Insts.emplace_back();
Insts.back().setOpcode(AArch64::BLR);
- Insts.back().addOperand(MCOperand::createReg(AArch64::X0));
+ Insts.back().addOperand(MCOperand::createReg(getIntArgRegister(0)));
+
Insts.emplace_back();
- loadReg(Insts.back(), AArch64::LR, AArch64::SP);
+ loadReg(Insts.back(), AArch64::LR, getSpRegister(/*Size*/ 8));
+
Insts.emplace_back();
- createDirectCall(Insts.back(), IndCallHandler, Ctx, /*IsTailCall*/ true);
+ createDirectBranch(Insts.back(), IndCallHandler, Ctx);
+
return Insts;
}
diff --git a/bolt/runtime/instr.cpp b/bolt/runtime/instr.cpp
index f586db2b0f9ba..634ade6bdd407 100644
--- a/bolt/runtime/instr.cpp
+++ b/bolt/runtime/instr.cpp
@@ -1691,9 +1691,12 @@ instrumentIndirectCall(uint64_t Target, uint64_t IndCallID) {
extern "C" __attribute((naked)) void __bolt_instr_indirect_call()
{
#if defined(__aarch64__)
+ // the target address is placed on stack
+ // the identifier of the indirect call site is placed in X1 register
+
// clang-format off
__asm__ __volatile__(SAVE_ALL
- "ldp x0, x1, [sp, #288]\n"
+ "ldr x0, [sp, #272]\n"
"bl instrumentIndirectCall\n"
RESTORE_ALL
"ret\n"
@@ -1728,9 +1731,12 @@ extern "C" __attribute((naked)) void __bolt_instr_indirect_call()
extern "C" __attribute((naked)) void __bolt_instr_indirect_tailcall()
{
#if defined(__aarch64__)
+ // the target address is placed on stack
+ // the identifier of the indirect call site is placed in X1 register
+
// clang-format off
__asm__ __volatile__(SAVE_ALL
- "ldp x0, x1, [sp, #288]\n"
+ "ldr x0, [sp, #272]\n"
"bl instrumentIndirectCall\n"
RESTORE_ALL
"ret\n"
diff --git a/bolt/runtime/sys_aarch64.h b/bolt/runtime/sys_aarch64.h
index b1d04f9d558e0..9cb8e022f58df 100644
--- a/bolt/runtime/sys_aarch64.h
+++ b/bolt/runtime/sys_aarch64.h
@@ -18,10 +18,12 @@
"stp x24, x25, [sp, #-16]!\n" \
"stp x26, x27, [sp, #-16]!\n" \
"stp x28, x29, [sp, #-16]!\n" \
- "str x30, [sp,#-16]!\n"
+ "mrs x29, nzcv\n" \
+ "stp x29, x30, [sp, #-16]!\n"
// Mirrors SAVE_ALL
#define RESTORE_ALL \
- "ldr x30, [sp], #16\n" \
+ "ldp x29, x30, [sp], #16\n" \
+ "msr nzcv, x29\n" \
"ldp x28, x29, [sp], #16\n" \
"ldp x26, x27, [sp], #16\n" \
"ldp x24, x25, [sp], #16\n" \
diff --git a/bolt/test/runtime/AArch64/instrumentation-ind-call.c b/bolt/test/runtime/AArch64/instrumentation-ind-call.c
index f9056da333b4e..eddecba4d8b52 100644
--- a/bolt/test/runtime/AArch64/instrumentation-ind-call.c
+++ b/bolt/test/runtime/AArch64/instrumentation-ind-call.c
@@ -15,9 +15,63 @@ int main() {
REQUIRES: system-linux,bolt-runtime
RUN: %clang %cflags %s -o %t.exe -Wl,-q -no-pie -fpie
+RUN: llvm-objdump --disassemble-symbols=main %t.exe \
+RUN: | FileCheck %s --check-prefix=CHECKINDIRECTREG
+
+CHECKINDIRECTREG: mov w0, #0xa
+CHECKINDIRECTREG-NEXT: mov w1, #0x14
+CHECKINDIRECTREG-NEXT: blr x8
RUN: llvm-bolt %t.exe --instrument --instrumentation-file=%t.fdata \
-RUN: -o %t.instrumented
+RUN: -o %t.instrumented \
+RUN: | FileCheck %s --check-prefix=CHECK-INSTR-LOG
+
+CHECK-INSTR-LOG: BOLT-INSTRUMENTER: Number of indirect call site descriptors: 1
+
+RUN: llvm-objdump --disassemble-symbols=main %t.instrumented \
+RUN: | FileCheck %s --check-prefix=CHECK-INSTR-INDIRECTREG
+
+RUN: llvm-objdump --disassemble-symbols=__bolt_instr_ind_call_handler \
+RUN: %t.instrumented | FileCheck %s --check-prefix=CHECK-INSTR-INDIR-CALL
+RUN: llvm-objdump --disassemble-symbols=__bolt_instr_ind_call_handler_func \
+RUN: %t.instrumented | FileCheck %s --check-prefix=CHECK-INSTR-INDIR-CALL-FUNC
+
+CHECK-INSTR-INDIRECTREG: mov w0, #0xa
+CHECK-INSTR-INDIRECTREG-NEXT: mov w1, #0x14
+// store current values
+CHECK-INSTR-INDIRECTREG-NEXT: stp x0, x1, {{.*}}
+// store the indirect target address in x0
+CHECK-INSTR-INDIRECTREG-NEXT: mov x0, x8
+// load callsite id into x1
+CHECK-INSTR-INDIRECTREG-NEXT: movk x1, {{.*}}
+CHECK-INSTR-INDIRECTREG-NEXT: movk x1, {{.*}}
+CHECK-INSTR-INDIRECTREG-NEXT: movk x1, {{.*}}
+CHECK-INSTR-INDIRECTREG-NEXT: movk x1, {{.*}}
+CHECK-INSTR-INDIRECTREG-NEXT: stp x0, x30, {{.*}}
+CHECK-INSTR-INDIRECTREG-NEXT: adrp x8, {{.*}}
+CHECK-INSTR-INDIRECTREG-NEXT: add x8, {{.*}}
+// call instrumentation library handler function
+CHECK-INSTR-INDIRECTREG-NEXT: blr x8
+// restore registers saved before
+CHECK-INSTR-INDIRECTREG-NEXT: ldp x0, x30, {{.*}}
+CHECK-INSTR-INDIRECTREG-NEXT: mov x8, x0
+CHECK-INSTR-INDIRECTREG-NEXT: ldp x0, x1, {{.*}}
+// original indirect call instruction
+CHECK-INSTR-INDIRECTREG-NEXT: blr x8
+
+
+CHECK-INSTR-INDIR-CALL: __bolt_instr_ind_call_handler>:
+CHECK-INSTR-INDIR-CALL-NEXT: ret
+
+CHECK-INSTR-INDIR-CALL-FUNC: __bolt_instr_ind_call_handler_func>:
+CHECK-INSTR-INDIR-CALL-FUNC-NEXT: adrp x0
+CHECK-INSTR-INDIR-CALL-FUNC-NEXT: ldr x0
+CHECK-INSTR-INDIR-CALL-FUNC-NEXT: cmp x0, #0x0
+CHECK-INSTR-INDIR-CALL-FUNC-NEXT: b.eq{{.*}}__bolt_instr_ind_call_handler
+CHECK-INSTR-INDIR-CALL-FUNC-NEXT: str x30
+CHECK-INSTR-INDIR-CALL-FUNC-NEXT: blr x0
+CHECK-INSTR-INDIR-CALL-FUNC-NEXT: ldr x30
+CHECK-INSTR-INDIR-CALL-FUNC-NEXT: b{{.*}}__bolt_instr_ind_call_handler
# Instrumented program needs to finish returning zero
RUN: %t.instrumented | FileCheck %s -check-prefix=CHECK-OUTPUT
More information about the llvm-commits
mailing list