[llvm] [BOLT][RISCV]Fix handling of GOT relocation pairs (PR #149658)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 5 06:27:16 PDT 2025
https://github.com/dinyy updated https://github.com/llvm/llvm-project/pull/149658
>From 4ed097d494f9117797fdef5338f68fd16bd84809 Mon Sep 17 00:00:00 2001
From: yjn <1076891326 at qq.com>
Date: Sat, 19 Jul 2025 23:38:47 +0800
Subject: [PATCH] [BOLT][RISCV]fix up GOT Relocation Handling
---
bolt/include/bolt/Core/MCPlusBuilder.h | 14 +++
bolt/lib/Core/BinaryFunction.cpp | 102 ++++++++++++++++---
bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp | 21 ++++
bolt/test/RISCV/reloc-got.s | 16 ++-
4 files changed, 136 insertions(+), 17 deletions(-)
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
index f902a8c43cd1d..108bf1528d886 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -839,6 +839,20 @@ class MCPlusBuilder {
return StringRef();
}
+ /// Returns the base register used by the instruction.
+ virtual unsigned getBaseReg(const MCInst &Inst) const{
+ llvm_unreachable("not implemented");
+ return 0;
+ }
+
+ /// Matches a pair of instructions that implement a GOT load:
+ /// an AUIPC (loading the high part of the address)
+ /// followed by a GOT-loading instruction (loading the low part of the address).
+ virtual bool matchGotAuipcPair(const MCInst &Inst) const{
+ llvm_unreachable("not implemented");
+ return false;
+ }
+
/// Interface and basic functionality of a MCInstMatcher. The idea is to make
/// it easy to match one or more MCInsts against a tree-like pattern and
/// extract the fragment operands. Example:
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index eec68ff5a5fce..a4dab59a18c85 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -1457,7 +1457,7 @@ Error BinaryFunction::disassemble() {
if (BC.isAArch64())
handleAArch64IndirectCall(Instruction, Offset);
}
- } else if (BC.isRISCV()) {
+ }else if (BC.isRISCV()) {
// Check if there's a relocation associated with this instruction.
for (auto Itr = Relocations.lower_bound(Offset),
ItrE = Relocations.lower_bound(Offset + Size);
@@ -1466,15 +1466,7 @@ Error BinaryFunction::disassemble() {
MCSymbol *Symbol = Relocation.Symbol;
if (Relocation::isInstructionReference(Relocation.Type)) {
- uint64_t RefOffset = Relocation.Value - getAddress();
- LabelsMapType::iterator LI = InstructionLabels.find(RefOffset);
-
- if (LI == InstructionLabels.end()) {
- Symbol = BC.Ctx->createNamedTempSymbol();
- InstructionLabels.emplace(RefOffset, Symbol);
- } else {
- Symbol = LI->second;
- }
+ continue;
}
uint64_t Addend = Relocation.Addend;
@@ -1484,8 +1476,7 @@ Error BinaryFunction::disassemble() {
if (Relocation::isGOT(Relocation.Type)) {
assert(Relocation::isPCRelative(Relocation.Type) &&
"GOT relocation must be PC-relative on RISC-V");
- Symbol = BC.registerNameAtAddress("__BOLT_got_zero", 0, 0, 0);
- Addend = Relocation.Value + Relocation.Offset + getAddress();
+ continue;
}
int64_t Value = Relocation.Value;
const bool Result = BC.MIB->replaceImmWithSymbolRef(
@@ -1493,7 +1484,7 @@ Error BinaryFunction::disassemble() {
(void)Result;
assert(Result && "cannot replace immediate with relocation");
}
- }
+ }
add_instruction:
if (getDWARFLineTable()) {
@@ -1514,6 +1505,90 @@ Error BinaryFunction::disassemble() {
addInstruction(Offset, std::move(Instruction));
}
+ if(BC.isRISCV()){
+ for (auto CurInstrIt = Instructions.begin(); CurInstrIt != Instructions.end(); ++CurInstrIt) {
+ uint64_t CurOffset = CurInstrIt->first;
+ if (const size_t DataInCodeSize = getSizeOfDataInCodeAt(CurOffset)) continue;
+
+ if(MIB->isBranch(CurInstrIt->second) || MIB->isCall(CurInstrIt->second)) continue;
+ if (MIB->isPseudo(CurInstrIt->second)) continue;
+ if (isZeroPaddingAt(CurInstrIt->first)) break;
+
+ auto NextInstrIt = std::next(CurInstrIt);
+ uint64_t NextOffset = (NextInstrIt != Instructions.end()) ? NextInstrIt->first : getSize();
+ for (auto Itr = Relocations.lower_bound(CurOffset),
+ ItrE = Relocations.lower_bound(NextOffset);
+ Itr != ItrE; ++Itr) {
+ Relocation &Relocation = Itr->second;
+ MCSymbol *Symbol = Relocation.Symbol;
+
+ if (Relocation::isInstructionReference(Relocation.Type)) {
+ uint64_t RefOffset = Relocation.Value - getAddress();
+ LabelsMapType::iterator LI = InstructionLabels.find(RefOffset);
+
+ if (LI == InstructionLabels.end()) {
+ Symbol = BC.Ctx->createNamedTempSymbol();
+ InstructionLabels.emplace(RefOffset, Symbol);
+ } else {
+ Symbol = LI->second;
+ }
+ }
+
+ uint64_t Addend = Relocation.Addend;
+
+ // For GOT relocations, create a reference against GOT entry ignoring
+ // the relocation symbol.
+ if (Relocation::isGOT(Relocation.Type)) {
+ assert(Relocation::isPCRelative(Relocation.Type) &&
+ "GOT relocation must be PC-relative on RISC-V");
+ // For RISC-V, we need to find the next instruction
+ // that matches the current instruction's base register.
+ auto NextInstrIt = std::next(CurInstrIt);
+ unsigned CurReg = BC.MIB->getBaseReg(CurInstrIt->second);
+ while (NextInstrIt != Instructions.end()) {
+ MCInst &NextInst = NextInstrIt->second;
+ unsigned NextReg = BC.MIB->getBaseReg(NextInst);
+ // some case there exit extra auipc instruction
+ // like auipc+auipc+ld instruction,so we need skip it
+ if(CurReg == NextReg && !BC.MIB->matchGotAuipcPair(NextInst)) {
+ break;
+ }
+ if(CurReg == NextReg && BC.MIB->matchGotAuipcPair(NextInst)){
+
+ int64_t CurImm = 0;
+ for (const MCOperand &Op : CurInstrIt->second) {
+ if (Op.isImm()) {
+ CurImm = Op.getImm();
+ break;
+ }
+ }
+ int64_t NextImm = 0;
+ for (const MCOperand &Op : NextInstrIt->second) {
+ if (Op.isImm()) {
+ NextImm = Op.getImm();
+ break;
+ }
+ }
+ Relocation.Value = (CurImm << 12) + NextImm;
+ break;
+ }
+ NextInstrIt = std::next(NextInstrIt);
+ }
+ Symbol = BC.registerNameAtAddress("__BOLT_got_zero", 0, 0, 0);
+ Addend = Relocation.Value + Relocation.Offset + getAddress();
+
+ }else if (!Relocation::isInstructionReference(Relocation.Type)) {
+ continue;
+ }
+ int64_t Value = Relocation.Value;
+ const bool Result = BC.MIB->replaceImmWithSymbolRef(
+ CurInstrIt->second, Symbol, Addend, Ctx.get(), Value, Relocation.Type);
+ (void)Result;
+ assert(Result && "cannot replace immediate with relocation");
+
+ }
+ }
+ }
for (auto [Offset, Label] : InstructionLabels) {
InstrMapType::iterator II = Instructions.find(Offset);
@@ -1521,7 +1596,6 @@ Error BinaryFunction::disassemble() {
BC.MIB->setInstLabel(II->second, Label);
}
-
// Reset symbolizer for the disassembler.
BC.SymbolicDisAsm->setSymbolizer(nullptr);
diff --git a/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp b/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp
index 10b4913b6ab7f..d6af7e6ef9b8b 100644
--- a/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp
+++ b/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp
@@ -339,6 +339,27 @@ class RISCVMCPlusBuilder : public MCPlusBuilder {
}
}
+ unsigned getBaseReg(const MCInst &Inst) const override{
+ switch (Inst.getOpcode()) {
+ default:
+ return 0;
+ case RISCV::AUIPC:
+ return Inst.getOperand(0).getReg();
+ case RISCV::ADDI:
+ case RISCV::LD:
+ return Inst.getOperand(1).getReg();
+ }
+ }
+
+ bool matchGotAuipcPair(const MCInst &Inst) const override{
+ return Inst.getOpcode() == RISCV::ADDI ||
+ Inst.getOpcode() == RISCV::LD ||
+ Inst.getOpcode() == RISCV::C_JAL ||
+ Inst.getOpcode() == RISCV::C_BEQZ ||
+ Inst.getOpcode() == RISCV::C_BNEZ;
+ }
+
+
const MCSymbol *getTargetSymbol(const MCExpr *Expr) const override {
auto *RISCVExpr = dyn_cast<MCSpecifierExpr>(Expr);
if (RISCVExpr && RISCVExpr->getSubExpr())
diff --git a/bolt/test/RISCV/reloc-got.s b/bolt/test/RISCV/reloc-got.s
index e7e85fddfb1cb..a9049ccf21064 100644
--- a/bolt/test/RISCV/reloc-got.s
+++ b/bolt/test/RISCV/reloc-got.s
@@ -8,16 +8,26 @@
d:
.dword 0
+ .globl e
+ .p2align 3
+e:
+ .dword 0
+
.text
.globl _start
.p2align 1
// CHECK: Binary Function "_start" after building cfg {
_start:
nop // Here to not make the _start and .Ltmp0 symbols coincide
-// CHECK: auipc t0, %pcrel_hi(__BOLT_got_zero+{{[0-9]+}}) # Label: .Ltmp0
-// CHECK-NEXT: ld t0, %pcrel_lo(.Ltmp0)(t0)
+ // CHECK: auipc t0, %pcrel_hi(__BOLT_got_zero+{{[0-9]+}}) # Label: .Ltmp0
+ // CHECK: auipc t1, %pcrel_hi(__BOLT_got_zero+{{[0-9]+}}) # Label: .Ltmp1
+ // CHECK-NEXT: ld t0, %pcrel_lo(.Ltmp0)(t0)
+ // CHECK-NEXT: ld t1, %pcrel_lo(.Ltmp1)(t1)
1:
auipc t0, %got_pcrel_hi(d)
+2:
+ auipc t1, %got_pcrel_hi(e)
ld t0, %pcrel_lo(1b)(t0)
+ ld t1, %pcrel_lo(2b)(t1)
ret
- .size _start, .-_start
+ .size _start, .-_start
\ No newline at end of file
More information about the llvm-commits
mailing list