[llvm] [BOLT][RISCV]Fix handling of GOT relocation pairs (PR #149658)

via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 5 06:20:18 PDT 2025


https://github.com/dinyy updated https://github.com/llvm/llvm-project/pull/149658

>From 9f0a26a20f98d314a5a03ec71f836b1f0052fe37 Mon Sep 17 00:00:00 2001
From: yjn <1076891326 at qq.com>
Date: Sat, 19 Jul 2025 23:38:47 +0800
Subject: [PATCH] [BOLT][RISCV]fix up GOT Relocation Handling

---
 bolt/include/bolt/Core/MCPlusBuilder.h       |  31 +++++-
 bolt/lib/Core/BinaryFunction.cpp             | 102 ++++++++++++++++---
 bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp |  36 +++++++
 bolt/test/RISCV/reloc-got.s                  |  18 +++-
 4 files changed, 168 insertions(+), 19 deletions(-)

diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
index f902a8c43cd1d..a4d8c4c0d1f64 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -192,6 +192,12 @@ class MCPlusBuilder {
 
     SrcInst.erase(AnnotationOp, SrcInst.end());
   }
+  /// Copy annotations from \p SrcInst to \p DstInst.
+  void copyAnnotations (MCInst &SrcInst, MCInst &DstInst) const {
+    MCInst::iterator AnnotationOp = getAnnotationInstOp(SrcInst);
+    for (MCInst::iterator Iter = AnnotationOp; Iter != SrcInst.end(); ++Iter)
+      DstInst.addOperand(*Iter);
+  }
 
   /// Return iterator range covering def operands.
   iterator_range<MCInst::iterator> defOperands(MCInst &Inst) const {
@@ -839,6 +845,29 @@ class MCPlusBuilder {
     return StringRef();
   }
 
+  /// Returns the base register used by the instruction.
+  virtual unsigned getBaseReg(const MCInst &Inst) const{
+    llvm_unreachable("not implemented");
+    return 0;
+  }
+
+  /// Matches a pair of instructions that implement a GOT load:
+  /// an AUIPC (loading the high part of the address)
+  /// followed by a GOT-loading instruction (loading the low part of the address).
+  virtual bool matchGotAuipcPair(const MCInst &Inst) const{
+    llvm_unreachable("not implemented");
+    return false;
+  }
+
+  /// Try to find a symbol referenced by a PC-relative LO (low 12 bits)
+  //  relocation in the instruction.
+  virtual const MCSymbol *getPCRelLoSymbol(const MCInst &Inst) const{
+    llvm_unreachable("not implemented");
+    return nullptr;
+  }
+
+
+
   /// Interface and basic functionality of a MCInstMatcher. The idea is to make
   /// it easy to match one or more MCInsts against a tree-like pattern and
   /// extract the fragment operands. Example:
@@ -2303,7 +2332,7 @@ class MCPlusBuilder {
     // We have to use at least 2-byte alignment for functions because of C++
     // ABI.
     return 2;
-  }
+  }  
 
   // AliasMap caches a mapping of registers to the set of registers that
   // alias (are sub or superregs of itself, including itself).
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index eec68ff5a5fce..a4dab59a18c85 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -1457,7 +1457,7 @@ Error BinaryFunction::disassemble() {
         if (BC.isAArch64())
           handleAArch64IndirectCall(Instruction, Offset);
       }
-    } else if (BC.isRISCV()) {
+    }else if (BC.isRISCV()) {
       // Check if there's a relocation associated with this instruction.
       for (auto Itr = Relocations.lower_bound(Offset),
                 ItrE = Relocations.lower_bound(Offset + Size);
@@ -1466,15 +1466,7 @@ Error BinaryFunction::disassemble() {
         MCSymbol *Symbol = Relocation.Symbol;
 
         if (Relocation::isInstructionReference(Relocation.Type)) {
-          uint64_t RefOffset = Relocation.Value - getAddress();
-          LabelsMapType::iterator LI = InstructionLabels.find(RefOffset);
-
-          if (LI == InstructionLabels.end()) {
-            Symbol = BC.Ctx->createNamedTempSymbol();
-            InstructionLabels.emplace(RefOffset, Symbol);
-          } else {
-            Symbol = LI->second;
-          }
+          continue;
         }
 
         uint64_t Addend = Relocation.Addend;
@@ -1484,8 +1476,7 @@ Error BinaryFunction::disassemble() {
         if (Relocation::isGOT(Relocation.Type)) {
           assert(Relocation::isPCRelative(Relocation.Type) &&
                  "GOT relocation must be PC-relative on RISC-V");
-          Symbol = BC.registerNameAtAddress("__BOLT_got_zero", 0, 0, 0);
-          Addend = Relocation.Value + Relocation.Offset + getAddress();
+          continue;
         }
         int64_t Value = Relocation.Value;
         const bool Result = BC.MIB->replaceImmWithSymbolRef(
@@ -1493,7 +1484,7 @@ Error BinaryFunction::disassemble() {
         (void)Result;
         assert(Result && "cannot replace immediate with relocation");
       }
-    }
+    } 
 
 add_instruction:
     if (getDWARFLineTable()) {
@@ -1514,6 +1505,90 @@ Error BinaryFunction::disassemble() {
 
     addInstruction(Offset, std::move(Instruction));
   }
+    if(BC.isRISCV()){
+    for (auto CurInstrIt = Instructions.begin(); CurInstrIt != Instructions.end(); ++CurInstrIt) {
+      uint64_t CurOffset = CurInstrIt->first; 
+      if (const size_t DataInCodeSize = getSizeOfDataInCodeAt(CurOffset)) continue;
+      
+      if(MIB->isBranch(CurInstrIt->second) || MIB->isCall(CurInstrIt->second)) continue;
+      if (MIB->isPseudo(CurInstrIt->second)) continue;
+      if (isZeroPaddingAt(CurInstrIt->first)) break;
+
+      auto NextInstrIt = std::next(CurInstrIt);
+      uint64_t NextOffset = (NextInstrIt != Instructions.end()) ? NextInstrIt->first : getSize();
+      for (auto Itr = Relocations.lower_bound(CurOffset),
+                ItrE = Relocations.lower_bound(NextOffset);
+           Itr != ItrE; ++Itr) {
+        Relocation &Relocation = Itr->second;
+        MCSymbol *Symbol = Relocation.Symbol;
+
+        if (Relocation::isInstructionReference(Relocation.Type)) {
+          uint64_t RefOffset = Relocation.Value - getAddress();
+          LabelsMapType::iterator LI = InstructionLabels.find(RefOffset);
+
+          if (LI == InstructionLabels.end()) {
+            Symbol = BC.Ctx->createNamedTempSymbol();
+            InstructionLabels.emplace(RefOffset, Symbol);
+          } else {
+            Symbol = LI->second;
+          }
+        }
+
+        uint64_t Addend = Relocation.Addend;
+
+        // For GOT relocations, create a reference against GOT entry ignoring
+        // the relocation symbol.
+        if (Relocation::isGOT(Relocation.Type)) {
+          assert(Relocation::isPCRelative(Relocation.Type) &&
+                 "GOT relocation must be PC-relative on RISC-V");
+          // For RISC-V, we need to find the next instruction
+          // that matches the current instruction's base register.
+          auto NextInstrIt = std::next(CurInstrIt);
+          unsigned CurReg = BC.MIB->getBaseReg(CurInstrIt->second);
+          while (NextInstrIt != Instructions.end()) {
+              MCInst &NextInst = NextInstrIt->second;
+              unsigned NextReg = BC.MIB->getBaseReg(NextInst);
+              // some case there exit extra auipc instruction
+              // like auipc+auipc+ld instruction,so we need skip it
+              if(CurReg == NextReg && !BC.MIB->matchGotAuipcPair(NextInst)) {
+                break;
+              }
+              if(CurReg == NextReg && BC.MIB->matchGotAuipcPair(NextInst)){
+
+                int64_t CurImm = 0;
+                for (const MCOperand &Op : CurInstrIt->second) {
+                    if (Op.isImm()) {
+                        CurImm = Op.getImm();
+                        break; 
+                    }
+                }
+                int64_t NextImm = 0;
+                for (const MCOperand &Op : NextInstrIt->second) {
+                    if (Op.isImm()) {
+                        NextImm = Op.getImm();
+                        break; 
+                    }
+                }
+                Relocation.Value = (CurImm << 12) + NextImm;
+                break; 
+              }
+              NextInstrIt = std::next(NextInstrIt);
+          }
+          Symbol = BC.registerNameAtAddress("__BOLT_got_zero", 0, 0, 0);
+          Addend = Relocation.Value + Relocation.Offset + getAddress();
+
+        }else if (!Relocation::isInstructionReference(Relocation.Type)) {
+          continue;
+        }
+          int64_t Value = Relocation.Value;
+          const bool Result = BC.MIB->replaceImmWithSymbolRef(
+              CurInstrIt->second, Symbol, Addend, Ctx.get(), Value, Relocation.Type);
+          (void)Result;
+          assert(Result && "cannot replace immediate with relocation");
+
+      }  
+    }
+  }
 
   for (auto [Offset, Label] : InstructionLabels) {
     InstrMapType::iterator II = Instructions.find(Offset);
@@ -1521,7 +1596,6 @@ Error BinaryFunction::disassemble() {
 
     BC.MIB->setInstLabel(II->second, Label);
   }
-
   // Reset symbolizer for the disassembler.
   BC.SymbolicDisAsm->setSymbolizer(nullptr);
 
diff --git a/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp b/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp
index 10b4913b6ab7f..2fdac8a3d0ff2 100644
--- a/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp
+++ b/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp
@@ -339,6 +339,42 @@ class RISCVMCPlusBuilder : public MCPlusBuilder {
     }
   }
 
+  unsigned getBaseReg(const MCInst &Inst) const override{
+    switch (Inst.getOpcode()) {
+      default:
+        return 0;
+      case RISCV::AUIPC:
+        return Inst.getOperand(0).getReg();
+      case RISCV::ADDI:
+      case RISCV::LD:
+        return Inst.getOperand(1).getReg();
+    }
+  }
+
+  bool matchGotAuipcPair(const MCInst &Inst) const override{
+    return Inst.getOpcode() == RISCV::ADDI ||
+           Inst.getOpcode() == RISCV::LD ||
+           Inst.getOpcode() == RISCV::C_JAL ||
+           Inst.getOpcode() == RISCV::C_BEQZ ||
+           Inst.getOpcode() == RISCV::C_BNEZ;
+  }
+
+
+const MCSymbol *getPCRelLoSymbol(const MCInst &Inst) const override {
+  for (unsigned i = 0; i < Inst.getNumOperands(); ++i) {
+    const auto &Op = Inst.getOperand(i);
+    if (!Op.isExpr())
+      continue;
+    const MCExpr *Expr = Op.getExpr();
+    auto *Spec = llvm::dyn_cast<MCSpecifierExpr>(Expr);
+    if (Spec && Spec->getSpecifier() == RISCV::S_PCREL_LO) {
+      return getTargetSymbol(Spec->getSubExpr());
+    }
+  }
+  return nullptr;
+}
+
+
   const MCSymbol *getTargetSymbol(const MCExpr *Expr) const override {
     auto *RISCVExpr = dyn_cast<MCSpecifierExpr>(Expr);
     if (RISCVExpr && RISCVExpr->getSubExpr())
diff --git a/bolt/test/RISCV/reloc-got.s b/bolt/test/RISCV/reloc-got.s
index e7e85fddfb1cb..7c8c7c81238be 100644
--- a/bolt/test/RISCV/reloc-got.s
+++ b/bolt/test/RISCV/reloc-got.s
@@ -1,5 +1,5 @@
 // RUN: %clang %cflags -o %t %s
-// RUN: llvm-bolt --print-cfg --print-only=_start -o %t.null %t \
+// RUN: llvm-bolt --print-finalized --print-only=_start -o %t.null %t \
 // RUN:    | FileCheck %s
 
   .data
@@ -8,16 +8,26 @@
 d:
   .dword 0
 
+  .globl e
+  .p2align 3
+e:
+  .dword 0
+
   .text
   .globl _start
   .p2align 1
 // CHECK: Binary Function "_start" after building cfg {
 _start:
   nop // Here to not make the _start and .Ltmp0 symbols coincide
-// CHECK: auipc t0, %pcrel_hi(__BOLT_got_zero+{{[0-9]+}}) # Label: .Ltmp0
-// CHECK-NEXT: ld t0, %pcrel_lo(.Ltmp0)(t0)
+      // CHECK: auipc t0, %pcrel_hi(__BOLT_got_zero+{{[0-9]+}}) # Label: .Ltmp0
+      // CHECK-NEXT: ld t0, %pcrel_lo(.Ltmp0)(t0)
+      // CHECK: auipc t1, %pcrel_hi(__BOLT_got_zero+{{[0-9]+}}) # Label: .Ltmp1
+      // CHECK-NEXT: ld t1, %pcrel_lo(.Ltmp1)(t1)
 1:
   auipc t0, %got_pcrel_hi(d)
+2:
+  auipc t1, %got_pcrel_hi(e)
   ld t0, %pcrel_lo(1b)(t0)
+  ld t1, %pcrel_lo(2b)(t1)
   ret
-  .size _start, .-_start
+  .size _start, .-_start
\ No newline at end of file



More information about the llvm-commits mailing list