[llvm] [BOLT][RISCV]Fix handling of GOT relocation pairs (PR #149658)

via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 5 07:12:39 PDT 2025


https://github.com/dinyy updated https://github.com/llvm/llvm-project/pull/149658

>From a54f1449c5b3ec65a7d036138616663d7b428cfa Mon Sep 17 00:00:00 2001
From: yjn <1076891326 at qq.com>
Date: Sat, 19 Jul 2025 23:38:47 +0800
Subject: [PATCH] [BOLT][RISCV]fix up GOT Relocation Handling

---
 bolt/include/bolt/Core/MCPlusBuilder.h       |  14 +++
 bolt/lib/Core/BinaryFunction.cpp             | 102 ++++++++++++++++---
 bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp |  18 ++++
 bolt/test/RISCV/reloc-got.s                  |  16 ++-
 4 files changed, 133 insertions(+), 17 deletions(-)

diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
index f902a8c43cd1d..108bf1528d886 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -839,6 +839,20 @@ class MCPlusBuilder {
     return StringRef();
   }
 
+  /// Returns the base register used by the instruction.
+  virtual unsigned getBaseReg(const MCInst &Inst) const{
+    llvm_unreachable("not implemented");
+    return 0;
+  }
+
+  /// Matches a pair of instructions that implement a GOT load:
+  /// an AUIPC (loading the high part of the address)
+  /// followed by a GOT-loading instruction (loading the low part of the address).
+  virtual bool matchGotAuipcPair(const MCInst &Inst) const{
+    llvm_unreachable("not implemented");
+    return false;
+  }
+
   /// Interface and basic functionality of a MCInstMatcher. The idea is to make
   /// it easy to match one or more MCInsts against a tree-like pattern and
   /// extract the fragment operands. Example:
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index eec68ff5a5fce..a4dab59a18c85 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -1457,7 +1457,7 @@ Error BinaryFunction::disassemble() {
         if (BC.isAArch64())
           handleAArch64IndirectCall(Instruction, Offset);
       }
-    } else if (BC.isRISCV()) {
+    }else if (BC.isRISCV()) {
       // Check if there's a relocation associated with this instruction.
       for (auto Itr = Relocations.lower_bound(Offset),
                 ItrE = Relocations.lower_bound(Offset + Size);
@@ -1466,15 +1466,7 @@ Error BinaryFunction::disassemble() {
         MCSymbol *Symbol = Relocation.Symbol;
 
         if (Relocation::isInstructionReference(Relocation.Type)) {
-          uint64_t RefOffset = Relocation.Value - getAddress();
-          LabelsMapType::iterator LI = InstructionLabels.find(RefOffset);
-
-          if (LI == InstructionLabels.end()) {
-            Symbol = BC.Ctx->createNamedTempSymbol();
-            InstructionLabels.emplace(RefOffset, Symbol);
-          } else {
-            Symbol = LI->second;
-          }
+          continue;
         }
 
         uint64_t Addend = Relocation.Addend;
@@ -1484,8 +1476,7 @@ Error BinaryFunction::disassemble() {
         if (Relocation::isGOT(Relocation.Type)) {
           assert(Relocation::isPCRelative(Relocation.Type) &&
                  "GOT relocation must be PC-relative on RISC-V");
-          Symbol = BC.registerNameAtAddress("__BOLT_got_zero", 0, 0, 0);
-          Addend = Relocation.Value + Relocation.Offset + getAddress();
+          continue;
         }
         int64_t Value = Relocation.Value;
         const bool Result = BC.MIB->replaceImmWithSymbolRef(
@@ -1493,7 +1484,7 @@ Error BinaryFunction::disassemble() {
         (void)Result;
         assert(Result && "cannot replace immediate with relocation");
       }
-    }
+    } 
 
 add_instruction:
     if (getDWARFLineTable()) {
@@ -1514,6 +1505,90 @@ Error BinaryFunction::disassemble() {
 
     addInstruction(Offset, std::move(Instruction));
   }
+    if(BC.isRISCV()){
+    for (auto CurInstrIt = Instructions.begin(); CurInstrIt != Instructions.end(); ++CurInstrIt) {
+      uint64_t CurOffset = CurInstrIt->first; 
+      if (const size_t DataInCodeSize = getSizeOfDataInCodeAt(CurOffset)) continue;
+      
+      if(MIB->isBranch(CurInstrIt->second) || MIB->isCall(CurInstrIt->second)) continue;
+      if (MIB->isPseudo(CurInstrIt->second)) continue;
+      if (isZeroPaddingAt(CurInstrIt->first)) break;
+
+      auto NextInstrIt = std::next(CurInstrIt);
+      uint64_t NextOffset = (NextInstrIt != Instructions.end()) ? NextInstrIt->first : getSize();
+      for (auto Itr = Relocations.lower_bound(CurOffset),
+                ItrE = Relocations.lower_bound(NextOffset);
+           Itr != ItrE; ++Itr) {
+        Relocation &Relocation = Itr->second;
+        MCSymbol *Symbol = Relocation.Symbol;
+
+        if (Relocation::isInstructionReference(Relocation.Type)) {
+          uint64_t RefOffset = Relocation.Value - getAddress();
+          LabelsMapType::iterator LI = InstructionLabels.find(RefOffset);
+
+          if (LI == InstructionLabels.end()) {
+            Symbol = BC.Ctx->createNamedTempSymbol();
+            InstructionLabels.emplace(RefOffset, Symbol);
+          } else {
+            Symbol = LI->second;
+          }
+        }
+
+        uint64_t Addend = Relocation.Addend;
+
+        // For GOT relocations, create a reference against GOT entry ignoring
+        // the relocation symbol.
+        if (Relocation::isGOT(Relocation.Type)) {
+          assert(Relocation::isPCRelative(Relocation.Type) &&
+                 "GOT relocation must be PC-relative on RISC-V");
+          // For RISC-V, we need to find the next instruction
+          // that matches the current instruction's base register.
+          auto NextInstrIt = std::next(CurInstrIt);
+          unsigned CurReg = BC.MIB->getBaseReg(CurInstrIt->second);
+          while (NextInstrIt != Instructions.end()) {
+              MCInst &NextInst = NextInstrIt->second;
+              unsigned NextReg = BC.MIB->getBaseReg(NextInst);
+              // some case there exit extra auipc instruction
+              // like auipc+auipc+ld instruction,so we need skip it
+              if(CurReg == NextReg && !BC.MIB->matchGotAuipcPair(NextInst)) {
+                break;
+              }
+              if(CurReg == NextReg && BC.MIB->matchGotAuipcPair(NextInst)){
+
+                int64_t CurImm = 0;
+                for (const MCOperand &Op : CurInstrIt->second) {
+                    if (Op.isImm()) {
+                        CurImm = Op.getImm();
+                        break; 
+                    }
+                }
+                int64_t NextImm = 0;
+                for (const MCOperand &Op : NextInstrIt->second) {
+                    if (Op.isImm()) {
+                        NextImm = Op.getImm();
+                        break; 
+                    }
+                }
+                Relocation.Value = (CurImm << 12) + NextImm;
+                break; 
+              }
+              NextInstrIt = std::next(NextInstrIt);
+          }
+          Symbol = BC.registerNameAtAddress("__BOLT_got_zero", 0, 0, 0);
+          Addend = Relocation.Value + Relocation.Offset + getAddress();
+
+        }else if (!Relocation::isInstructionReference(Relocation.Type)) {
+          continue;
+        }
+          int64_t Value = Relocation.Value;
+          const bool Result = BC.MIB->replaceImmWithSymbolRef(
+              CurInstrIt->second, Symbol, Addend, Ctx.get(), Value, Relocation.Type);
+          (void)Result;
+          assert(Result && "cannot replace immediate with relocation");
+
+      }  
+    }
+  }
 
   for (auto [Offset, Label] : InstructionLabels) {
     InstrMapType::iterator II = Instructions.find(Offset);
@@ -1521,7 +1596,6 @@ Error BinaryFunction::disassemble() {
 
     BC.MIB->setInstLabel(II->second, Label);
   }
-
   // Reset symbolizer for the disassembler.
   BC.SymbolicDisAsm->setSymbolizer(nullptr);
 
diff --git a/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp b/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp
index 10b4913b6ab7f..9bebe59b400aa 100644
--- a/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp
+++ b/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp
@@ -339,6 +339,24 @@ class RISCVMCPlusBuilder : public MCPlusBuilder {
     }
   }
 
+  unsigned getBaseReg(const MCInst &Inst) const override{
+    switch (Inst.getOpcode()) {
+      default:
+        return 0;
+      case RISCV::AUIPC:
+        return Inst.getOperand(0).getReg();
+      case RISCV::ADDI:
+      case RISCV::LD:
+        return Inst.getOperand(1).getReg();
+    }
+  }
+
+  bool matchGotAuipcPair(const MCInst &Inst) const override{
+    return Inst.getOpcode() == RISCV::ADDI ||
+           Inst.getOpcode() == RISCV::LD ;
+  }
+
+
   const MCSymbol *getTargetSymbol(const MCExpr *Expr) const override {
     auto *RISCVExpr = dyn_cast<MCSpecifierExpr>(Expr);
     if (RISCVExpr && RISCVExpr->getSubExpr())
diff --git a/bolt/test/RISCV/reloc-got.s b/bolt/test/RISCV/reloc-got.s
index e7e85fddfb1cb..a9049ccf21064 100644
--- a/bolt/test/RISCV/reloc-got.s
+++ b/bolt/test/RISCV/reloc-got.s
@@ -8,16 +8,26 @@
 d:
   .dword 0
 
+  .globl e
+  .p2align 3
+e:
+  .dword 0
+
   .text
   .globl _start
   .p2align 1
 // CHECK: Binary Function "_start" after building cfg {
 _start:
   nop // Here to not make the _start and .Ltmp0 symbols coincide
-// CHECK: auipc t0, %pcrel_hi(__BOLT_got_zero+{{[0-9]+}}) # Label: .Ltmp0
-// CHECK-NEXT: ld t0, %pcrel_lo(.Ltmp0)(t0)
+      // CHECK: auipc t0, %pcrel_hi(__BOLT_got_zero+{{[0-9]+}}) # Label: .Ltmp0
+      // CHECK: auipc t1, %pcrel_hi(__BOLT_got_zero+{{[0-9]+}}) # Label: .Ltmp1
+      // CHECK-NEXT: ld t0, %pcrel_lo(.Ltmp0)(t0)
+      // CHECK-NEXT: ld t1, %pcrel_lo(.Ltmp1)(t1)
 1:
   auipc t0, %got_pcrel_hi(d)
+2:
+  auipc t1, %got_pcrel_hi(e)
   ld t0, %pcrel_lo(1b)(t0)
+  ld t1, %pcrel_lo(2b)(t1)
   ret
-  .size _start, .-_start
+  .size _start, .-_start
\ No newline at end of file



More information about the llvm-commits mailing list