[llvm] Revert "[BOLT][AArch64] Fixed indirect call instrumentation snippet" (PR #170874)

via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 5 07:49:57 PST 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-bolt

Author: Gergely Bálint (bgergely0)

<details>
<summary>Changes</summary>

Reverts llvm/llvm-project#<!-- -->141918

The patch broke a buildbot while applying instrumentation:
https://lab.llvm.org/buildbot/#/builders/128/builds/8866

Reverting until the issue is triaged.



---
Full diff: https://github.com/llvm/llvm-project/pull/170874.diff


6 Files Affected:

- (modified) bolt/include/bolt/Core/MCPlusBuilder.h (-5) 
- (modified) bolt/lib/Passes/Instrumentation.cpp (+3-6) 
- (modified) bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp (+55-86) 
- (modified) bolt/runtime/instr.cpp (+2-8) 
- (modified) bolt/runtime/sys_aarch64.h (+2-4) 
- (modified) bolt/test/runtime/AArch64/instrumentation-ind-call.c (+1-55) 


``````````diff
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
index c8f4e2aa8c580..a318ef0b6bd68 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -538,11 +538,6 @@ class MCPlusBuilder {
     llvm_unreachable("not implemented");
   }
 
-  virtual void createDirectBranch(MCInst &Inst, const MCSymbol *Target,
-                                  MCContext *Ctx) {
-    llvm_unreachable("not implemented");
-  }
-
   virtual MCPhysReg getX86R11() const { llvm_unreachable("not implemented"); }
 
   virtual unsigned getShortBranchOpcode(unsigned Opcode) const {
diff --git a/bolt/lib/Passes/Instrumentation.cpp b/bolt/lib/Passes/Instrumentation.cpp
index 10479f35d8f9d..150461b020f06 100644
--- a/bolt/lib/Passes/Instrumentation.cpp
+++ b/bolt/lib/Passes/Instrumentation.cpp
@@ -305,12 +305,9 @@ void Instrumentation::instrumentIndirectTarget(BinaryBasicBlock &BB,
                  : IndCallHandlerExitBBFunction->getSymbol(),
       IndCallSiteID, &*BC.Ctx);
 
-  if (!BC.isAArch64()) {
-    Iter = BB.eraseInstruction(Iter);
-    Iter = insertInstructions(CounterInstrs, BB, Iter);
-    --Iter;
-  } else
-    Iter = insertInstructions(CounterInstrs, BB, Iter);
+  Iter = BB.eraseInstruction(Iter);
+  Iter = insertInstructions(CounterInstrs, BB, Iter);
+  --Iter;
 }
 
 bool Instrumentation::instrumentOneTarget(
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index dc7644fbabdcf..af87d5c12b5ce 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -48,14 +48,14 @@ static cl::opt<bool> NoLSEAtomics(
 
 namespace {
 
-[[maybe_unused]] static void getSystemFlag(MCInst &Inst, MCPhysReg RegName) {
+static void getSystemFlag(MCInst &Inst, MCPhysReg RegName) {
   Inst.setOpcode(AArch64::MRS);
   Inst.clear();
   Inst.addOperand(MCOperand::createReg(RegName));
   Inst.addOperand(MCOperand::createImm(AArch64SysReg::NZCV));
 }
 
-[[maybe_unused]] static void setSystemFlag(MCInst &Inst, MCPhysReg RegName) {
+static void setSystemFlag(MCInst &Inst, MCPhysReg RegName) {
   Inst.setOpcode(AArch64::MSR);
   Inst.clear();
   Inst.addOperand(MCOperand::createImm(AArch64SysReg::NZCV));
@@ -2114,14 +2114,6 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
       convertJmpToTailCall(Inst);
   }
 
-  void createDirectBranch(MCInst &Inst, const MCSymbol *Target,
-                          MCContext *Ctx) override {
-    Inst.setOpcode(AArch64::B);
-    Inst.clear();
-    Inst.addOperand(MCOperand::createExpr(getTargetExprFor(
-        Inst, MCSymbolRefExpr::create(Target, *Ctx), *Ctx, 0)));
-  }
-
   bool analyzeBranch(InstructionIterator Begin, InstructionIterator End,
                      const MCSymbol *&TBB, const MCSymbol *&FBB,
                      MCInst *&CondBranch,
@@ -2479,14 +2471,21 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
   }
 
   InstructionListType createInstrumentedIndCallHandlerExitBB() const override {
+    InstructionListType Insts(5);
     // Code sequence for instrumented indirect call handler:
-    //   ret
-
-    InstructionListType Insts;
-
-    Insts.emplace_back();
-    createReturn(Insts.back());
-
+    //   msr  nzcv, x1
+    //   ldp  x0, x1, [sp], #16
+    //   ldr  x16, [sp], #16
+    //   ldp  x0, x1, [sp], #16
+    //   br   x16
+    setSystemFlag(Insts[0], AArch64::X1);
+    createPopRegisters(Insts[1], AArch64::X0, AArch64::X1);
+    // Here we load address of the next function which should be called in the
+    // original binary to X16 register. Writing to X16 is permitted without
+    // needing to restore.
+    loadReg(Insts[2], AArch64::X16, AArch64::SP);
+    createPopRegisters(Insts[3], AArch64::X0, AArch64::X1);
+    createIndirectBranch(Insts[4], AArch64::X16, 0);
     return Insts;
   }
 
@@ -2562,59 +2561,39 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
                                                      MCSymbol *HandlerFuncAddr,
                                                      int CallSiteID,
                                                      MCContext *Ctx) override {
+    InstructionListType Insts;
     // Code sequence used to enter indirect call instrumentation helper:
-    //   stp x0, x1, [sp, #-16]! createPushRegisters  (1)
-    //   mov target, x0  convertIndirectCallToLoad -> orr x0 target xzr
+    //   stp x0, x1, [sp, #-16]! createPushRegisters
+    //   mov target x0  convertIndirectCallToLoad -> orr x0 target xzr
     //   mov x1 CallSiteID createLoadImmediate ->
     //   movk    x1, #0x0, lsl #48
     //   movk    x1, #0x0, lsl #32
     //   movk    x1, #0x0, lsl #16
     //   movk    x1, #0x0
-    //   stp x0, x30, [sp, #-16]!    (2)
+    //   stp x0, x1, [sp, #-16]!
+    //   bl *HandlerFuncAddr createIndirectCall ->
     //   adr x0 *HandlerFuncAddr -> adrp + add
-    //   blr x0   (__bolt_instr_ind_call_handler_func)
-    //   ldp x0, x30, [sp], #16   (2)
-    //   mov x0, target ; move target address to used register
-    //   ldp x0, x1, [sp], #16   (1)
-
-    InstructionListType Insts;
+    //   blr x0
     Insts.emplace_back();
-    createPushRegisters(Insts.back(), getIntArgRegister(0),
-                        getIntArgRegister(1));
+    createPushRegisters(Insts.back(), AArch64::X0, AArch64::X1);
     Insts.emplace_back(CallInst);
-    convertIndirectCallToLoad(Insts.back(), getIntArgRegister(0));
+    convertIndirectCallToLoad(Insts.back(), AArch64::X0);
     InstructionListType LoadImm =
         createLoadImmediate(getIntArgRegister(1), CallSiteID);
     Insts.insert(Insts.end(), LoadImm.begin(), LoadImm.end());
     Insts.emplace_back();
-    createPushRegisters(Insts.back(), getIntArgRegister(0), AArch64::LR);
+    createPushRegisters(Insts.back(), AArch64::X0, AArch64::X1);
     Insts.resize(Insts.size() + 2);
-    InstructionListType Addr = materializeAddress(
-        HandlerFuncAddr, Ctx, CallInst.getOperand(0).getReg());
+    InstructionListType Addr =
+        materializeAddress(HandlerFuncAddr, Ctx, AArch64::X0);
     assert(Addr.size() == 2 && "Invalid Addr size");
     std::copy(Addr.begin(), Addr.end(), Insts.end() - Addr.size());
-
     Insts.emplace_back();
-    createIndirectCallInst(Insts.back(), false,
-                           CallInst.getOperand(0).getReg());
+    createIndirectCallInst(Insts.back(), isTailCall(CallInst), AArch64::X0);
 
-    Insts.emplace_back();
-    createPopRegisters(Insts.back(), getIntArgRegister(0), AArch64::LR);
-
-    // move x0 to indirect call register
-    Insts.emplace_back();
-    Insts.back().setOpcode(AArch64::ORRXrs);
-    Insts.back().insert(Insts.back().begin(),
-                        MCOperand::createReg(CallInst.getOperand(0).getReg()));
-    Insts.back().insert(Insts.back().begin() + 1,
-                        MCOperand::createReg(AArch64::XZR));
-    Insts.back().insert(Insts.back().begin() + 2,
-                        MCOperand::createReg(getIntArgRegister(0)));
-    Insts.back().insert(Insts.back().begin() + 3, MCOperand::createImm(0));
-
-    Insts.emplace_back();
-    createPopRegisters(Insts.back(), getIntArgRegister(0),
-                       getIntArgRegister(1));
+    // Carry over metadata including tail call marker if present.
+    stripAnnotations(Insts.back());
+    moveAnnotations(std::move(CallInst), Insts.back());
 
     return Insts;
   }
@@ -2623,10 +2602,12 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
   createInstrumentedIndCallHandlerEntryBB(const MCSymbol *InstrTrampoline,
                                           const MCSymbol *IndCallHandler,
                                           MCContext *Ctx) override {
-    // Code sequence used to check whether InstrTrampoline was initialized
+    // Code sequence used to check whether InstrTampoline was initialized
     // and call it if so, returns via IndCallHandler
-    //   adrp    x0, InstrTrampoline
-    //   ldr     x0, [x0, #lo12:InstrTrampoline]
+    //   stp     x0, x1, [sp, #-16]!
+    //   mrs     x1, nzcv
+    //   adr     x0, InstrTrampoline -> adrp + add
+    //   ldr     x0, [x0]
     //   subs    x0, x0, #0x0
     //   b.eq    IndCallHandler
     //   str     x30, [sp, #-16]!
@@ -2634,42 +2615,30 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
     //   ldr     x30, [sp], #16
     //   b       IndCallHandler
     InstructionListType Insts;
-
-    // load handler address
-    MCInst InstAdrp;
-    InstAdrp.setOpcode(AArch64::ADRP);
-    InstAdrp.addOperand(MCOperand::createReg(getIntArgRegister(0)));
-    InstAdrp.addOperand(MCOperand::createImm(0));
-    setOperandToSymbolRef(InstAdrp, /* OpNum */ 1, InstrTrampoline,
-                          /* Addend */ 0, Ctx, ELF::R_AARCH64_ADR_GOT_PAGE);
-    Insts.emplace_back(InstAdrp);
-
-    MCInst InstLoad;
-    InstLoad.setOpcode(AArch64::LDRXui);
-    InstLoad.addOperand(MCOperand::createReg(getIntArgRegister(0)));
-    InstLoad.addOperand(MCOperand::createReg(getIntArgRegister(0)));
-    InstLoad.addOperand(MCOperand::createImm(0));
-    setOperandToSymbolRef(InstLoad, /* OpNum */ 2, InstrTrampoline,
-                          /* Addend */ 0, Ctx, ELF::R_AARCH64_LD64_GOT_LO12_NC);
-    Insts.emplace_back(InstLoad);
-
-    InstructionListType CmpJmp =
-        createCmpJE(getIntArgRegister(0), 0, IndCallHandler, Ctx);
-    Insts.insert(Insts.end(), CmpJmp.begin(), CmpJmp.end());
-
     Insts.emplace_back();
-    storeReg(Insts.back(), AArch64::LR, getSpRegister(/*Size*/ 8));
-
+    createPushRegisters(Insts.back(), AArch64::X0, AArch64::X1);
+    Insts.emplace_back();
+    getSystemFlag(Insts.back(), getIntArgRegister(1));
+    Insts.emplace_back();
+    Insts.emplace_back();
+    InstructionListType Addr =
+        materializeAddress(InstrTrampoline, Ctx, AArch64::X0);
+    std::copy(Addr.begin(), Addr.end(), Insts.end() - Addr.size());
+    assert(Addr.size() == 2 && "Invalid Addr size");
+    Insts.emplace_back();
+    loadReg(Insts.back(), AArch64::X0, AArch64::X0);
+    InstructionListType cmpJmp =
+        createCmpJE(AArch64::X0, 0, IndCallHandler, Ctx);
+    Insts.insert(Insts.end(), cmpJmp.begin(), cmpJmp.end());
+    Insts.emplace_back();
+    storeReg(Insts.back(), AArch64::LR, AArch64::SP);
     Insts.emplace_back();
     Insts.back().setOpcode(AArch64::BLR);
-    Insts.back().addOperand(MCOperand::createReg(getIntArgRegister(0)));
-
+    Insts.back().addOperand(MCOperand::createReg(AArch64::X0));
     Insts.emplace_back();
-    loadReg(Insts.back(), AArch64::LR, getSpRegister(/*Size*/ 8));
-
+    loadReg(Insts.back(), AArch64::LR, AArch64::SP);
     Insts.emplace_back();
-    createDirectBranch(Insts.back(), IndCallHandler, Ctx);
-
+    createDirectCall(Insts.back(), IndCallHandler, Ctx, /*IsTailCall*/ true);
     return Insts;
   }
 
diff --git a/bolt/runtime/instr.cpp b/bolt/runtime/instr.cpp
index 634ade6bdd407..f586db2b0f9ba 100644
--- a/bolt/runtime/instr.cpp
+++ b/bolt/runtime/instr.cpp
@@ -1691,12 +1691,9 @@ instrumentIndirectCall(uint64_t Target, uint64_t IndCallID) {
 extern "C" __attribute((naked)) void __bolt_instr_indirect_call()
 {
 #if defined(__aarch64__)
-  // the target address is placed on stack
-  // the identifier of the indirect call site is placed in X1 register
-
   // clang-format off
   __asm__ __volatile__(SAVE_ALL
-                       "ldr x0, [sp, #272]\n"
+                       "ldp x0, x1, [sp, #288]\n"
                        "bl instrumentIndirectCall\n"
                        RESTORE_ALL
                        "ret\n"
@@ -1731,12 +1728,9 @@ extern "C" __attribute((naked)) void __bolt_instr_indirect_call()
 extern "C" __attribute((naked)) void __bolt_instr_indirect_tailcall()
 {
 #if defined(__aarch64__)
-  // the target address is placed on stack
-  // the identifier of the indirect call site is placed in X1 register
-
   // clang-format off
   __asm__ __volatile__(SAVE_ALL
-                       "ldr x0, [sp, #272]\n"
+                       "ldp x0, x1, [sp, #288]\n"
                        "bl instrumentIndirectCall\n"
                        RESTORE_ALL
                        "ret\n"
diff --git a/bolt/runtime/sys_aarch64.h b/bolt/runtime/sys_aarch64.h
index 9cb8e022f58df..b1d04f9d558e0 100644
--- a/bolt/runtime/sys_aarch64.h
+++ b/bolt/runtime/sys_aarch64.h
@@ -18,12 +18,10 @@
   "stp x24, x25, [sp, #-16]!\n"                                                \
   "stp x26, x27, [sp, #-16]!\n"                                                \
   "stp x28, x29, [sp, #-16]!\n"                                                \
-  "mrs x29, nzcv\n"                                                            \
-  "stp x29, x30, [sp, #-16]!\n"
+  "str x30, [sp,#-16]!\n"
 // Mirrors SAVE_ALL
 #define RESTORE_ALL                                                            \
-  "ldp x29, x30, [sp], #16\n"                                                  \
-  "msr nzcv, x29\n"                                                            \
+  "ldr x30, [sp], #16\n"                                                       \
   "ldp x28, x29, [sp], #16\n"                                                  \
   "ldp x26, x27, [sp], #16\n"                                                  \
   "ldp x24, x25, [sp], #16\n"                                                  \
diff --git a/bolt/test/runtime/AArch64/instrumentation-ind-call.c b/bolt/test/runtime/AArch64/instrumentation-ind-call.c
index eddecba4d8b52..f9056da333b4e 100644
--- a/bolt/test/runtime/AArch64/instrumentation-ind-call.c
+++ b/bolt/test/runtime/AArch64/instrumentation-ind-call.c
@@ -15,63 +15,9 @@ int main() {
 REQUIRES: system-linux,bolt-runtime
 
 RUN: %clang %cflags %s -o %t.exe -Wl,-q -no-pie -fpie
-RUN: llvm-objdump --disassemble-symbols=main %t.exe \
-RUN:   | FileCheck %s --check-prefix=CHECKINDIRECTREG
-
-CHECKINDIRECTREG: mov w0, #0xa
-CHECKINDIRECTREG-NEXT: mov w1, #0x14
-CHECKINDIRECTREG-NEXT: blr x8
 
 RUN: llvm-bolt %t.exe --instrument --instrumentation-file=%t.fdata \
-RUN:   -o %t.instrumented \
-RUN:   | FileCheck %s --check-prefix=CHECK-INSTR-LOG
-
-CHECK-INSTR-LOG: BOLT-INSTRUMENTER: Number of indirect call site descriptors: 1
-
-RUN: llvm-objdump --disassemble-symbols=main %t.instrumented \
-RUN:   | FileCheck %s --check-prefix=CHECK-INSTR-INDIRECTREG
-
-RUN: llvm-objdump --disassemble-symbols=__bolt_instr_ind_call_handler \
-RUN:   %t.instrumented | FileCheck %s --check-prefix=CHECK-INSTR-INDIR-CALL
-RUN: llvm-objdump --disassemble-symbols=__bolt_instr_ind_call_handler_func \
-RUN:   %t.instrumented | FileCheck %s --check-prefix=CHECK-INSTR-INDIR-CALL-FUNC
-
-CHECK-INSTR-INDIRECTREG: mov w0, #0xa
-CHECK-INSTR-INDIRECTREG-NEXT: mov w1, #0x14
-// store current values
-CHECK-INSTR-INDIRECTREG-NEXT: stp x0, x1, {{.*}}
-// store the indirect target address in x0
-CHECK-INSTR-INDIRECTREG-NEXT: mov x0, x8
-// load callsite id into x1
-CHECK-INSTR-INDIRECTREG-NEXT: movk x1, {{.*}}
-CHECK-INSTR-INDIRECTREG-NEXT: movk x1, {{.*}}
-CHECK-INSTR-INDIRECTREG-NEXT: movk x1, {{.*}}
-CHECK-INSTR-INDIRECTREG-NEXT: movk x1, {{.*}}
-CHECK-INSTR-INDIRECTREG-NEXT: stp x0, x30, {{.*}}
-CHECK-INSTR-INDIRECTREG-NEXT: adrp x8, {{.*}}
-CHECK-INSTR-INDIRECTREG-NEXT: add x8, {{.*}}
-// call instrumentation library handler function
-CHECK-INSTR-INDIRECTREG-NEXT: blr x8
-// restore registers saved before
-CHECK-INSTR-INDIRECTREG-NEXT: ldp x0, x30, {{.*}}
-CHECK-INSTR-INDIRECTREG-NEXT: mov x8, x0
-CHECK-INSTR-INDIRECTREG-NEXT: ldp x0, x1, {{.*}}
-// original indirect call instruction
-CHECK-INSTR-INDIRECTREG-NEXT: blr x8
-
-
-CHECK-INSTR-INDIR-CALL: __bolt_instr_ind_call_handler>:
-CHECK-INSTR-INDIR-CALL-NEXT: ret
-
-CHECK-INSTR-INDIR-CALL-FUNC: __bolt_instr_ind_call_handler_func>:
-CHECK-INSTR-INDIR-CALL-FUNC-NEXT: adrp x0
-CHECK-INSTR-INDIR-CALL-FUNC-NEXT: ldr x0
-CHECK-INSTR-INDIR-CALL-FUNC-NEXT: cmp x0, #0x0
-CHECK-INSTR-INDIR-CALL-FUNC-NEXT: b.eq{{.*}}__bolt_instr_ind_call_handler
-CHECK-INSTR-INDIR-CALL-FUNC-NEXT: str x30
-CHECK-INSTR-INDIR-CALL-FUNC-NEXT: blr x0
-CHECK-INSTR-INDIR-CALL-FUNC-NEXT: ldr x30
-CHECK-INSTR-INDIR-CALL-FUNC-NEXT: b{{.*}}__bolt_instr_ind_call_handler
+RUN:   -o %t.instrumented
 
 # Instrumented program needs to finish returning zero
 RUN: %t.instrumented | FileCheck %s -check-prefix=CHECK-OUTPUT

``````````

</details>


https://github.com/llvm/llvm-project/pull/170874


More information about the llvm-commits mailing list