[lld] [ELF] Add target-specific relocation scanning for RISC-V (PR #181332)

Fangrui Song via llvm-commits llvm-commits at lists.llvm.org
Fri Feb 13 21:51:23 PST 2026


https://github.com/MaskRay updated https://github.com/llvm/llvm-project/pull/181332

>From 5a3f8ba98f223e452656d8e0b089dcf652d0e423 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i at maskray.me>
Date: Thu, 12 Feb 2026 00:26:01 -0800
Subject: [PATCH] [ELF] Add target-specific relocation scanning for RISC-V

Implement RISCV::scanSectionImpl, following the pattern established for
x86 (#178846). This merges the getRelExpr and TLS handling for SHF_ALLOC
sections into the target-specific scanner, enabling devirtualization and
eliminating abstraction overhead.

- Use processR_PC/processR_PLT_PC for common PC-relative and PLT
  relocations.
- Handle TLS relocations directly and remove RISC-V-specific checks from
  handleTlsRelocation.
- Simplify getRelExpr to only handle relocations needed by
  relocateNonAlloc and relocateEh.

Move scanSection after getRelExpr and before relocate similar to x86.
---
 lld/ELF/Arch/RISCV.cpp  | 355 +++++++++++++++++++++++++---------------
 lld/ELF/Relocations.cpp |  23 +--
 2 files changed, 229 insertions(+), 149 deletions(-)

diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp
index 597ab602d3bd0..78ebd7054ab06 100644
--- a/lld/ELF/Arch/RISCV.cpp
+++ b/lld/ELF/Arch/RISCV.cpp
@@ -279,17 +279,14 @@ RelType RISCV::getDynRel(RelType type) const {
                                          : static_cast<RelType>(R_RISCV_NONE);
 }
 
+// Only needed to support relocations used by relocateNonAlloc and relocateEh.
 RelExpr RISCV::getRelExpr(const RelType type, const Symbol &s,
                           const uint8_t *loc) const {
   switch (type) {
   case R_RISCV_NONE:
-  case R_RISCV_VENDOR:
     return R_NONE;
   case R_RISCV_32:
   case R_RISCV_64:
-  case R_RISCV_HI20:
-  case R_RISCV_LO12_I:
-  case R_RISCV_LO12_S:
     return R_ABS;
   case R_RISCV_ADD8:
   case R_RISCV_ADD16:
@@ -305,59 +302,201 @@ RelExpr RISCV::getRelExpr(const RelType type, const Symbol &s,
   case R_RISCV_SUB32:
   case R_RISCV_SUB64:
     return RE_RISCV_ADD;
-  case R_RISCV_JAL:
-  case R_RISCV_BRANCH:
-  case R_RISCV_PCREL_HI20:
-  case R_RISCV_RVC_BRANCH:
-  case R_RISCV_RVC_JUMP:
   case R_RISCV_32_PCREL:
     return R_PC;
-  case R_RISCV_CALL:
-  case R_RISCV_CALL_PLT:
-  case R_RISCV_PLT32:
-    return R_PLT_PC;
-  case R_RISCV_GOT_HI20:
-  case R_RISCV_GOT32_PCREL:
-    return R_GOT_PC;
-  case R_RISCV_PCREL_LO12_I:
-  case R_RISCV_PCREL_LO12_S:
-    return RE_RISCV_PC_INDIRECT;
-  case R_RISCV_TLSDESC_HI20:
-  case R_RISCV_TLSDESC_LOAD_LO12:
-  case R_RISCV_TLSDESC_ADD_LO12:
-    return R_TLSDESC_PC;
-  case R_RISCV_TLSDESC_CALL:
-    return R_TLSDESC_CALL;
-  case R_RISCV_TLS_GD_HI20:
-    return R_TLSGD_PC;
-  case R_RISCV_TLS_GOT_HI20:
-    return R_GOT_PC;
-  case R_RISCV_TPREL_HI20:
-  case R_RISCV_TPREL_LO12_I:
-  case R_RISCV_TPREL_LO12_S:
-    return R_TPREL;
-  case R_RISCV_ALIGN:
-    return R_RELAX_HINT;
-  case R_RISCV_TPREL_ADD:
-  case R_RISCV_RELAX:
-    return ctx.arg.relax ? R_RELAX_HINT : R_NONE;
   case R_RISCV_SET_ULEB128:
   case R_RISCV_SUB_ULEB128:
     return RE_RISCV_LEB128;
   default:
-    if (type.v & INTERNAL_RISCV_VENDOR_MASK) {
-      Err(ctx) << getErrorLoc(ctx, loc)
-               << "unsupported vendor-specific relocation " << type
-               << " against symbol " << &s;
-      return R_NONE;
-    }
-    Err(ctx) << getErrorLoc(ctx, loc) << "unknown relocation ("
-             << (type.v & ~INTERNAL_RISCV_VENDOR_MASK) << ") against symbol "
-             << &s;
+    Err(ctx) << getErrorLoc(ctx, loc) << "unknown relocation (" << type.v
+             << ") against symbol " << &s;
     return R_NONE;
   }
 }
 
+template <class ELFT, class RelTy>
+void RISCV::scanSectionImpl(InputSectionBase &sec, Relocs<RelTy> rels) {
+  RelocScan rs(ctx, &sec);
+  // Many relocations end up in sec.relocations.
+  sec.relocations.reserve(rels.size());
+
+  StringRef rvVendor;
+  for (auto it = rels.begin(); it != rels.end(); ++it) {
+    RelType type = it->getType(false);
+    uint32_t symIndex = it->getSymbol(false);
+    Symbol &sym = sec.getFile<ELFT>()->getSymbol(symIndex);
+    uint64_t offset = it->r_offset;
+
+    if (type == R_RISCV_VENDOR) {
+      if (!rvVendor.empty())
+        Err(ctx) << getErrorLoc(ctx, sec.content().data() + offset)
+                 << "malformed consecutive R_RISCV_VENDOR relocations";
+      rvVendor = sym.getName();
+      continue;
+    } else if (!rvVendor.empty()) {
+      uint32_t VendorFlag = getRISCVVendorRelMarker(rvVendor);
+      if (!VendorFlag) {
+        Err(ctx) << getErrorLoc(ctx, sec.content().data() + offset)
+                 << "unknown vendor-specific relocation (" << type.v
+                 << ") in namespace '" << rvVendor << "' against symbol '"
+                 << &sym << "'";
+        rvVendor = "";
+        continue;
+      }
+
+      rvVendor = "";
+      assert((type.v < 256) && "Out of range relocation detected!");
+      type.v |= VendorFlag;
+    }
+
+    if (sym.isUndefined() && symIndex != 0 &&
+        rs.maybeReportUndefined(cast<Undefined>(sym), offset))
+      continue;
+    int64_t addend = rs.getAddend<ELFT>(*it, type);
+    RelExpr expr;
+    // Relocation types that only need a RelExpr set `expr` and break out of
+    // the switch to reach rs.process(). Types that need special handling
+    // (fast-path helpers, TLS) call a handler and use `continue`.
+    switch (type) {
+    case R_RISCV_NONE:
+      continue;
+
+    // Absolute relocations:
+    case R_RISCV_32:
+    case R_RISCV_64:
+    case R_RISCV_HI20:
+    case R_RISCV_LO12_I:
+    case R_RISCV_LO12_S:
+      expr = R_ABS;
+      break;
+
+    // ADD/SET/SUB:
+    case R_RISCV_ADD8:
+    case R_RISCV_ADD16:
+    case R_RISCV_ADD32:
+    case R_RISCV_ADD64:
+    case R_RISCV_SET6:
+    case R_RISCV_SET8:
+    case R_RISCV_SET16:
+    case R_RISCV_SET32:
+    case R_RISCV_SUB6:
+    case R_RISCV_SUB8:
+    case R_RISCV_SUB16:
+    case R_RISCV_SUB32:
+    case R_RISCV_SUB64:
+      expr = RE_RISCV_ADD;
+      break;
+
+    // PC-relative:
+    case R_RISCV_JAL:
+    case R_RISCV_BRANCH:
+    case R_RISCV_PCREL_HI20:
+    case R_RISCV_RVC_BRANCH:
+    case R_RISCV_RVC_JUMP:
+    case R_RISCV_32_PCREL:
+      rs.processR_PC(type, offset, addend, sym);
+      continue;
+    case R_RISCV_PCREL_LO12_I:
+    case R_RISCV_PCREL_LO12_S:
+      expr = RE_RISCV_PC_INDIRECT;
+      break;
+
+    // GOT-generating relocations:
+    case R_RISCV_GOT_HI20:
+    case R_RISCV_GOT32_PCREL:
+      expr = R_GOT_PC;
+      break;
+
+    // PLT-generating relocations:
+    case R_RISCV_CALL:
+    case R_RISCV_CALL_PLT:
+    case R_RISCV_PLT32:
+      rs.processR_PLT_PC(type, offset, addend, sym);
+      continue;
+
+    // TLS relocations:
+    case R_RISCV_TPREL_HI20:
+    case R_RISCV_TPREL_LO12_I:
+    case R_RISCV_TPREL_LO12_S:
+      if (rs.checkTlsLe(offset, sym, type))
+        continue;
+      expr = R_TPREL;
+      break;
+    case R_RISCV_TLS_GOT_HI20:
+      // There is no IE to LE optimization.
+      ctx.hasTlsIe.store(true, std::memory_order_relaxed);
+      sym.setFlags(NEEDS_TLSIE);
+      sec.addReloc({R_GOT_PC, type, offset, addend, &sym});
+      continue;
+    case R_RISCV_TLS_GD_HI20:
+      // There is no GD to IE/LE optimization.
+      sym.setFlags(NEEDS_TLSGD);
+      sec.addReloc({R_TLSGD_PC, type, offset, addend, &sym});
+      continue;
+
+    // TLSDESC relocations:
+    case R_RISCV_TLSDESC_HI20:
+      rs.handleTlsDesc(R_TLSDESC_PC, R_GOT_PC, type, offset, addend, sym);
+      continue;
+    case R_RISCV_TLSDESC_LOAD_LO12:
+    case R_RISCV_TLSDESC_ADD_LO12:
+      // R_RISCV_TLSDESC_{LOAD_LO12,ADD_LO12,CALL} reference a label, not the
+      // TLS symbol, so we cannot use handleTlsDesc (which sets NEEDS_TLSDESC).
+      // For TLSDESC->IE, use R_TPREL as well, but relocateAlloc uses isToLe
+      // (from HI20) to select the correct transform.
+      sec.addReloc({ctx.arg.shared ? R_TLSDESC_PC : R_TPREL, type, offset,
+                    addend, &sym});
+      continue;
+    case R_RISCV_TLSDESC_CALL:
+      if (!ctx.arg.shared)
+        sec.addReloc({R_TPREL, type, offset, addend, &sym});
+      continue;
+
+    // Relaxation hints:
+    case R_RISCV_ALIGN:
+      sec.addReloc({R_RELAX_HINT, type, offset, addend, &sym});
+      continue;
+    case R_RISCV_TPREL_ADD:
+    case R_RISCV_RELAX:
+      if (ctx.arg.relax)
+        sec.addReloc({R_RELAX_HINT, type, offset, addend, &sym});
+      continue;
+
+    case R_RISCV_SET_ULEB128:
+    case R_RISCV_SUB_ULEB128:
+      expr = RE_RISCV_LEB128;
+      break;
+
+    default:
+      if (type.v & INTERNAL_RISCV_VENDOR_MASK) {
+        Err(ctx) << getErrorLoc(ctx, sec.content().data() + offset)
+                 << "unsupported vendor-specific relocation " << type
+                 << " against symbol " << &sym;
+        continue;
+      }
+      Err(ctx) << getErrorLoc(ctx, sec.content().data() + offset)
+               << "unknown relocation (" << type.v << ") against symbol "
+               << &sym;
+      continue;
+    }
+    rs.process(expr, type, offset, sym, addend);
+  }
+
+  // Sort relocations by offset for more efficient searching for
+  // R_RISCV_PCREL_HI20.
+  llvm::stable_sort(sec.relocs(),
+                    [](const Relocation &lhs, const Relocation &rhs) {
+                      return lhs.offset < rhs.offset;
+                    });
+}
+
+void RISCV::scanSection(InputSectionBase &sec) {
+  if (ctx.arg.is64)
+    elf::scanSection1<RISCV, ELF64LE>(*this, sec);
+  else
+    elf::scanSection1<RISCV, ELF32LE>(*this, sec);
+}
+
 void RISCV::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
   const unsigned bits = ctx.arg.wordsize * 8;
 
@@ -637,43 +776,48 @@ void RISCV::relocateAlloc(InputSection &sec, uint8_t *buf) const {
       else
         val = tlsdescVal;
       break;
-    case R_RELAX_TLS_GD_TO_IE:
-      // Only R_RISCV_TLSDESC_HI20 reaches here. tlsdescVal will be finalized
-      // after we see R_RISCV_TLSDESC_ADD_LO12 in the R_RELAX_TLS_GD_TO_LE case.
-      // The net effect is that tlsdescVal will be smaller than `val` to take
-      // into account of NOP instructions (in the absence of R_RISCV_RELAX)
-      // before AUIPC.
-      tlsdescVal = val + rel.offset;
-      isToLe = false;
-      tlsdescRelax = relaxable(relocs, i);
-      if (!tlsdescRelax)
-        tlsdescToIe(ctx, loc, rel, val);
-      continue;
-    case R_RELAX_TLS_GD_TO_LE:
-      // See the comment in handleTlsRelocation. For TLSDESC=>IE,
-      // R_RISCV_TLSDESC_{LOAD_LO12,ADD_LO12,CALL} also reach here. If isToLe is
-      // false, this is actually TLSDESC=>IE optimization.
+    case R_GOT_PC:
+    case R_TPREL:
+      // TLSDESC->IE/LE: R_GOT_PC is TLSDESC->IE, R_TPREL is TLSDESC->LE.
       if (rel.type == R_RISCV_TLSDESC_HI20) {
-        tlsdescVal = val;
-        isToLe = true;
+        isToLe = rel.expr == R_TPREL;
+        if (isToLe) {
+          tlsdescVal = val;
+        } else {
+          // tlsdescVal will be finalized after we see R_RISCV_TLSDESC_ADD_LO12.
+          // The net effect is that tlsdescVal will be smaller than `val` to
+          // take into account of NOP instructions (in the absence of
+          // R_RISCV_RELAX) before AUIPC.
+          tlsdescVal = val + rel.offset;
+        }
         tlsdescRelax = relaxable(relocs, i);
-      } else {
+        if (!tlsdescRelax) {
+          if (isToLe)
+            tlsdescToLe(loc, rel, val);
+          else
+            tlsdescToIe(ctx, loc, rel, val);
+        }
+        continue;
+      }
+      if (rel.type == R_RISCV_TLSDESC_LOAD_LO12 ||
+          rel.type == R_RISCV_TLSDESC_ADD_LO12 ||
+          rel.type == R_RISCV_TLSDESC_CALL) {
         if (!isToLe && rel.type == R_RISCV_TLSDESC_ADD_LO12)
           tlsdescVal -= rel.offset;
         val = tlsdescVal;
-      }
-      // When NOP conversion is eligible and relaxation applies, don't write a
-      // NOP in case an unrelated instruction follows the current instruction.
-      if (tlsdescRelax &&
-          (rel.type == R_RISCV_TLSDESC_HI20 ||
-           rel.type == R_RISCV_TLSDESC_LOAD_LO12 ||
-           (rel.type == R_RISCV_TLSDESC_ADD_LO12 && isToLe && !hi20(val))))
+        // When NOP conversion is eligible and relaxation applies, don't write a
+        // NOP in case an unrelated instruction follows the current instruction.
+        if (tlsdescRelax &&
+            (rel.type == R_RISCV_TLSDESC_LOAD_LO12 ||
+             (rel.type == R_RISCV_TLSDESC_ADD_LO12 && isToLe && !hi20(val))))
+          continue;
+        if (isToLe)
+          tlsdescToLe(loc, rel, val);
+        else
+          tlsdescToIe(ctx, loc, rel, val);
         continue;
-      if (isToLe)
-        tlsdescToLe(loc, rel, val);
-      else
-        tlsdescToIe(ctx, loc, rel, val);
-      continue;
+      }
+      break;
     case RE_RISCV_LEB128:
       if (i + 1 < size) {
         const Relocation &rel1 = relocs[i + 1];
@@ -918,7 +1062,7 @@ static bool relax(Ctx &ctx, int pass, InputSection &sec) {
     case R_RISCV_TLSDESC_HI20:
       // For TLSDESC=>LE, we can use the short form if hi20 is zero.
       tlsdescRelax = relaxable(relocs, i);
-      toLeShortForm = tlsdescRelax && r.expr == R_RELAX_TLS_GD_TO_LE &&
+      toLeShortForm = tlsdescRelax && r.expr == R_TPREL &&
                       !hi20(r.sym->getVA(ctx, r.addend));
       [[fallthrough]];
     case R_RISCV_TLSDESC_LOAD_LO12:
@@ -1495,59 +1639,6 @@ void elf::mergeRISCVAttributesSections(Ctx &ctx) {
 
 void elf::setRISCVTargetInfo(Ctx &ctx) { ctx.target.reset(new RISCV(ctx)); }
 
-template <class ELFT, class RelTy>
-void RISCV::scanSectionImpl(InputSectionBase &sec, Relocs<RelTy> rels) {
-  RelocScan rs(ctx, &sec);
-  // Many relocations end up in sec.relocations.
-  sec.relocations.reserve(rels.size());
-
-  StringRef rvVendor;
-  for (auto it = rels.begin(); it != rels.end(); ++it) {
-    RelType type = it->getType(false);
-    uint32_t symIndex = it->getSymbol(false);
-    Symbol &sym = sec.getFile<ELFT>()->getSymbol(symIndex);
-    const uint8_t *loc = sec.content().data() + it->r_offset;
-
-    if (type == R_RISCV_VENDOR) {
-      if (!rvVendor.empty())
-        Err(ctx) << getErrorLoc(ctx, loc)
-                 << "malformed consecutive R_RISCV_VENDOR relocations";
-      rvVendor = sym.getName();
-      continue;
-    } else if (!rvVendor.empty()) {
-      uint32_t VendorFlag = getRISCVVendorRelMarker(rvVendor);
-      if (!VendorFlag) {
-        Err(ctx) << getErrorLoc(ctx, loc)
-                 << "unknown vendor-specific relocation (" << type.v
-                 << ") in namespace '" << rvVendor << "' against symbol '"
-                 << &sym << "'";
-        rvVendor = "";
-        continue;
-      }
-
-      rvVendor = "";
-      assert((type.v < 256) && "Out of range relocation detected!");
-      type.v |= VendorFlag;
-    }
-
-    rs.scan<ELFT, RelTy>(it, type, rs.getAddend<ELFT>(*it, type));
-  }
-
-  // Sort relocations by offset for more efficient searching for
-  // R_RISCV_PCREL_HI20.
-  llvm::stable_sort(sec.relocs(),
-                    [](const Relocation &lhs, const Relocation &rhs) {
-                      return lhs.offset < rhs.offset;
-                    });
-}
-
-void RISCV::scanSection(InputSectionBase &sec) {
-  if (ctx.arg.is64)
-    elf::scanSection1<RISCV, ELF64LE>(*this, sec);
-  else
-    elf::scanSection1<RISCV, ELF32LE>(*this, sec);
-}
-
 uint32_t elf::getRISCVVendorRelMarker(StringRef rvVendor) {
   return StringSwitch<uint32_t>(rvVendor)
       .Case("QUALCOMM", INTERNAL_RISCV_VENDOR_QUALCOMM)
diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp
index 797acb08b0506..17d405dccc7d9 100644
--- a/lld/ELF/Relocations.cpp
+++ b/lld/ELF/Relocations.cpp
@@ -1183,17 +1183,13 @@ unsigned RelocScan::handleTlsRelocation(RelExpr expr, RelType type,
   if (expr == R_TPREL || expr == R_TPREL_NEG)
     return checkTlsLe(offset, sym, type) ? 1 : 0;
 
-  bool isRISCV = ctx.arg.emachine == EM_RISCV;
-
   if (oneof<RE_AARCH64_TLSDESC_PAGE, R_TLSDESC, R_TLSDESC_CALL, R_TLSDESC_PC,
             R_TLSDESC_GOTPLT, RE_LOONGARCH_TLSDESC_PAGE_PC>(expr) &&
       ctx.arg.shared) {
-    // R_RISCV_TLSDESC_{LOAD_LO12,ADD_LO12_I,CALL} reference a label. Do not
-    // set NEEDS_TLSDESC on the label.
     if (expr != R_TLSDESC_CALL) {
       if (isAArch64)
         sym.setFlags(NEEDS_TLSDESC | NEEDS_TLSDESC_NONAUTH);
-      else if (!isRISCV || type == R_RISCV_TLSDESC_HI20)
+      else
         sym.setFlags(NEEDS_TLSDESC);
       sec->addReloc({expr, type, offset, addend, &sym});
     }
@@ -1209,25 +1205,22 @@ unsigned RelocScan::handleTlsRelocation(RelExpr expr, RelType type,
        type == R_LARCH_TLS_DESC_LD || type == R_LARCH_TLS_DESC_CALL ||
        type == R_LARCH_TLS_DESC_PCREL20_S2);
 
-  // ARM, Hexagon, LoongArch and RISC-V do not support GD/LD to IE/LE
-  // optimizations.
-  // RISC-V supports TLSDESC to IE/LE optimizations.
+  // ARM, Hexagon, and LoongArch do not support GD/LD to IE/LE optimizations.
   // For PPC64, if the file has missing R_PPC64_TLSGD/R_PPC64_TLSLD, disable
   // optimization as well.
   bool execOptimize =
       !ctx.arg.shared && ctx.arg.emachine != EM_ARM &&
       ctx.arg.emachine != EM_HEXAGON &&
       (ctx.arg.emachine != EM_LOONGARCH || execOptimizeInLoongArch) &&
-      !(isRISCV && expr != R_TLSDESC_PC && expr != R_TLSDESC_CALL) &&
       !sec->file->ppc64DisableTLSRelax;
 
   // If we are producing an executable and the symbol is non-preemptable, it
   // must be defined and the code sequence can be optimized to use Local-Exec.
   //
-  // ARM and RISC-V do not support any relaxations for TLS relocations, however,
-  // we can omit the DTPMOD dynamic relocations and resolve them at link time
-  // because them are always 1. This may be necessary for static linking as
-  // DTPMOD may not be expected at load time.
+  // While ARM does not have TLS optimizations, we can omit the DTPMOD
+  // dynamic relocations and resolve them at link time because them are
+  // always 1. This may be necessary for static linking as DTPMOD may not be
+  // expected at load time.
   bool isLocalInExecutable = !sym.isPreemptible && !ctx.arg.shared;
 
   // Local Dynamic is for access to module local TLS variables, while still
@@ -1290,10 +1283,6 @@ unsigned RelocScan::handleTlsRelocation(RelExpr expr, RelType type,
 
     // Global-Dynamic/TLSDESC can be optimized to Initial-Exec or Local-Exec
     // depending on the symbol being locally defined or not.
-    //
-    // R_RISCV_TLSDESC_{LOAD_LO12,ADD_LO12_I,CALL} reference a non-preemptible
-    // label, so TLSDESC=>IE will be categorized as R_RELAX_TLS_GD_TO_LE. We fix
-    // the categorization in RISCV::relocateAlloc.
     if (sym.isPreemptible) {
       sym.setFlags(NEEDS_TLSIE);
       sec->addReloc({ctx.target->adjustTlsExpr(type, R_RELAX_TLS_GD_TO_IE),



More information about the llvm-commits mailing list