[lld] [ELF] Support R_RISCV_SET_ULEB128/R_RISCV_SUB_ULEB128 in non-SHF_ALLOC sections (PR #72610)

Fangrui Song via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 16 22:45:41 PST 2023


https://github.com/MaskRay updated https://github.com/llvm/llvm-project/pull/72610

>From 6240191a20e75823a17275508393f86dc336d99f Mon Sep 17 00:00:00 2001
From: Fangrui Song <i at maskray.me>
Date: Thu, 16 Nov 2023 19:03:38 -0800
Subject: [PATCH] [ELF] Support R_RISCV_SET_ULEB128/R_RISCV_SUB_ULEB128 in
 non-SHF_ALLOC sections

For a label difference like `.uleb128 A-B`, MC generates a pair of
R_RISCV_SET_ULEB128/R_RISCV_SUB_ULEB128 if A-B cannot be folded as a
constant. GNU assembler generates a pair of relocations in more cases
(when A or B is in a code section with linker relaxation).

`.uleb128 A-B` is primarily used by DWARF v5
.debug_loclists/.debug_rnglists (DW_LLE_offset_pair/DW_RLE_offset_pair
entry kinds) implemented in Clang and GCC.

`.uleb128 A-B` can be used in SHF_ALLOC sections as well (e.g.
`.gcc_except_table`). This patch does not handle SHF_ALLOC.

`-z dead-reloc-in-nonalloc=` can be used to change the relocated value,
if the R_RISCV_SET_ULEB128 symbol is in a discarded section. We don't
check the R_RISCV_SUB_ULEB128 symbol since for the expected cases A and
B should be defined in the same input section.
---
 lld/ELF/Arch/RISCV.cpp            |   2 +
 lld/ELF/InputSection.cpp          |  42 +++++++++-
 lld/ELF/Relocations.h             |   1 +
 lld/test/ELF/riscv-reloc-leb128.s | 129 ++++++++++++++++++++++++++++++
 4 files changed, 171 insertions(+), 3 deletions(-)
 create mode 100644 lld/test/ELF/riscv-reloc-leb128.s

diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp
index 6413dcd7dcd7976..a556d89c36400d3 100644
--- a/lld/ELF/Arch/RISCV.cpp
+++ b/lld/ELF/Arch/RISCV.cpp
@@ -306,6 +306,8 @@ RelExpr RISCV::getRelExpr(const RelType type, const Symbol &s,
   case R_RISCV_TPREL_ADD:
   case R_RISCV_RELAX:
     return config->relax ? R_RELAX_HINT : R_NONE;
+  case R_RISCV_SET_ULEB128:
+    return R_RISCV_LEB128;
   default:
     error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) +
           ") against symbol " + toString(s));
diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index e6942a928787a5b..49701dd34cf3d90 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -19,6 +19,7 @@
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Compression.h"
 #include "llvm/Support/Endian.h"
+#include "llvm/Support/LEB128.h"
 #include "llvm/Support/xxhash.h"
 #include <algorithm>
 #include <mutex>
@@ -874,6 +875,16 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type,
   }
 }
 
+// Overwrite a ULEB128 value and keep the original length.
+static uint64_t overwriteULEB128(uint8_t *bufLoc, uint64_t val) {
+  while (*bufLoc & 0x80) {
+    *bufLoc++ = 0x80 | (val & 0x7f);
+    val >>= 7;
+  }
+  *bufLoc = val;
+  return val;
+}
+
 // This function applies relocations to sections without SHF_ALLOC bit.
 // Such sections are never mapped to memory at runtime. Debug sections are
 // an example. Relocations in non-alloc sections are much easier to
@@ -885,6 +896,7 @@ template <class ELFT, class RelTy>
 void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef<RelTy> rels) {
   const unsigned bits = sizeof(typename ELFT::uint) * 8;
   const TargetInfo &target = *elf::target;
+  const auto emachine = config->emachine;
   const bool isDebug = isDebugSection(*this);
   const bool isDebugLocOrRanges =
       isDebug && (name == ".debug_loc" || name == ".debug_ranges");
@@ -896,14 +908,15 @@ void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef<RelTy> rels) {
       break;
     }
 
-  for (const RelTy &rel : rels) {
+  for (size_t i = 0, relsSize = rels.size(); i != relsSize; ++i) {
+    const RelTy &rel = rels[i];
     RelType type = rel.getType(config->isMips64EL);
 
     // GCC 8.0 or earlier have a bug that they emit R_386_GOTPC relocations
     // against _GLOBAL_OFFSET_TABLE_ for .debug_info. The bug has been fixed
     // in 2017 (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82630), but we
     // need to keep this bug-compatible code for a while.
-    if (config->emachine == EM_386 && type == R_386_GOTPC)
+    if (emachine == EM_386 && type == R_386_GOTPC)
       continue;
 
     uint64_t offset = rel.r_offset;
@@ -916,6 +929,30 @@ void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef<RelTy> rels) {
     RelExpr expr = target.getRelExpr(type, sym, bufLoc);
     if (expr == R_NONE)
       continue;
+    auto *ds = dyn_cast<Defined>(&sym);
+
+    if (emachine == EM_RISCV && type == R_RISCV_SET_ULEB128) {
+      if (++i < relsSize &&
+          rels[i].getType(/*isMips64EL=*/false) == R_RISCV_SUB_ULEB128 &&
+          rels[i].r_offset == offset) {
+        uint64_t val;
+        if (!ds && tombstone) {
+          val = *tombstone;
+        } else {
+          val = sym.getVA(addend) -
+                (getFile<ELFT>()->getRelocTargetSym(rels[i]).getVA(0) +
+                 getAddend<ELFT>(rels[i]));
+        }
+        if (overwriteULEB128(bufLoc, val) >= 0x80)
+          errorOrWarn(getLocation(offset) + ": ULEB128 value " + Twine(val) +
+                      " exceeds available space; references '" +
+                      lld::toString(sym) + "'");
+        continue;
+      }
+      errorOrWarn(getLocation(offset) +
+                  ": R_RISCV_SET_ULEB128 not paired with R_RISCV_SUB_SET128");
+      return;
+    }
 
     if (tombstone ||
         (isDebug && (type == target.symbolicRel || expr == R_DTPREL))) {
@@ -947,7 +984,6 @@ void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef<RelTy> rels) {
       //
       // TODO To reduce disruption, we use 0 instead of -1 as the tombstone
       // value. Enable -1 in a future release.
-      auto *ds = dyn_cast<Defined>(&sym);
       if (!sym.getOutputSection() || (ds && ds->folded && !isDebugLine)) {
         // If -z dead-reloc-in-nonalloc= is specified, respect it.
         const uint64_t value = tombstone ? SignExtend64<bits>(*tombstone)
diff --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h
index 15a2b5fc177c546..cfb9092149f3e0f 100644
--- a/lld/ELF/Relocations.h
+++ b/lld/ELF/Relocations.h
@@ -101,6 +101,7 @@ enum RelExpr {
   R_PPC64_TOCBASE,
   R_PPC64_RELAX_GOT_PC,
   R_RISCV_ADD,
+  R_RISCV_LEB128,
   R_RISCV_PC_INDIRECT,
   // Same as R_PC but with page-aligned semantics.
   R_LOONGARCH_PAGE_PC,
diff --git a/lld/test/ELF/riscv-reloc-leb128.s b/lld/test/ELF/riscv-reloc-leb128.s
new file mode 100644
index 000000000000000..8198819686c3c86
--- /dev/null
+++ b/lld/test/ELF/riscv-reloc-leb128.s
@@ -0,0 +1,129 @@
+# REQUIRES: riscv
+# RUN: rm -rf %t && split-file %s %t && cd %t
+# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+relax a.s -o a.o
+# RUN: llvm-readobj -r -x .debug_rnglists -x .debug_loclists a.o | FileCheck %s --check-prefix=REL
+# RUN: ld.lld -shared --gc-sections a.o -o a.so
+# RUN: llvm-readelf -x .debug_rnglists -x .debug_loclists a.so | FileCheck %s
+
+# REL:      .rela.debug_rnglists {
+# REL-NEXT:   0x0 R_RISCV_SET_ULEB128 w1 0x83
+# REL-NEXT:   0x0 R_RISCV_SUB_ULEB128 w2 0x0
+# REL-NEXT:   0x1 R_RISCV_SET_ULEB128 w2 0x78
+# REL-NEXT:   0x1 R_RISCV_SUB_ULEB128 w1 0x0
+# REL-NEXT:   0x3 R_RISCV_SET_ULEB128 w1 0x89
+# REL-NEXT:   0x3 R_RISCV_SUB_ULEB128 w2 0x0
+# REL-NEXT:   0x5 R_RISCV_SET_ULEB128 w2 0x3FF8
+# REL-NEXT:   0x5 R_RISCV_SUB_ULEB128 w1 0x0
+# REL-NEXT:   0x8 R_RISCV_SET_ULEB128 w1 0x4009
+# REL-NEXT:   0x8 R_RISCV_SUB_ULEB128 w2 0x0
+# REL-NEXT:   0xB R_RISCV_SET_ULEB128 w2 0x1FFFF8
+# REL-NEXT:   0xB R_RISCV_SUB_ULEB128 w1 0x0
+# REL-NEXT:   0xF R_RISCV_SET_ULEB128 w1 0x200009
+# REL-NEXT:   0xF R_RISCV_SUB_ULEB128 w2 0x0
+# REL-NEXT: }
+# REL:      .rela.debug_loclists {
+# REL-NEXT:   0x0 R_RISCV_SET_ULEB128 w2 0x3
+# REL-NEXT:   0x0 R_RISCV_SUB_ULEB128 w1 0x4
+# REL-NEXT:   0x1 R_RISCV_SET_ULEB128 x2 0x0
+# REL-NEXT:   0x1 R_RISCV_SUB_ULEB128 x1 0x0
+# REL-NEXT: }
+
+# REL:        Hex dump of section '.debug_rnglists':
+# REL-NEXT:   0x00000000 7b800181 01808001 81800180 80800181 {
+# REL-NEXT:   0x00000010 808001                              .
+# REL:        Hex dump of section '.debug_loclists':
+# REL-NEXT:   0x00000000 0008                                  .
+
+# CHECK:      Hex dump of section '.debug_rnglists':
+# CHECK-NEXT: 0x00000000 7ffc0085 01fcff00 858001fc ffff0085 .
+# CHECK-NEXT: 0x00000010 808001                              .
+# CHECK:      Hex dump of section '.debug_loclists':
+# CHECK-NEXT: 0x00000000 0300                                .
+
+# RUN: ld.lld -shared --gc-sections -z dead-reloc-in-nonalloc=.debug_loclists=0x7f a.o -o a127.so
+# RUN: llvm-readelf -x .debug_loclists a127.so | FileCheck %s --check-prefix=CHECK127
+# CHECK127:      Hex dump of section '.debug_loclists':
+# CHECK127-NEXT: 0x00000000 037f                                .
+
+# RUN: not ld.lld -shared --gc-sections -z dead-reloc-in-nonalloc=.debug_loclists=0x80 a.o 2>&1 | FileCheck %s --check-prefix=CHECK128
+# CHECK128: error: a.o:(.debug_loclists+0x1): ULEB128 value 128 exceeds available space; references 'x2'
+
+# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+relax sub.s -o sub.o
+# RUN: not ld.lld -shared sub.o 2>&1 | FileCheck %s --check-prefix=SUB
+# SUB: error: sub.o:(.debug_rnglists+0x8): unknown relocation (61) against symbol w2
+
+# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+relax unpaired1.s -o unpaired1.o
+# RUN: not ld.lld -shared unpaired1.o 2>&1 | FileCheck %s --check-prefix=UNPAIRED
+# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+relax unpaired2.s -o unpaired2.o
+# RUN: not ld.lld -shared unpaired2.o 2>&1 | FileCheck %s --check-prefix=UNPAIRED
+# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+relax unpaired3.s -o unpaired3.o
+# RUN: not ld.lld -shared unpaired3.o 2>&1 | FileCheck %s --check-prefix=UNPAIRED
+# UNPAIRED: error: {{.*}}.o:(.debug_rnglists+0x8): R_RISCV_SET_ULEB128 not paired with R_RISCV_SUB_SET128
+
+# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+relax overflow.s -o overflow.o
+# RUN: not ld.lld -shared overflow.o 2>&1 | FileCheck %s --check-prefix=OVERFLOW
+# OVERFLOW: error: overflow.o:(.debug_rnglists+0x8): ULEB128 value 128 exceeds available space; references 'w2'
+
+#--- a.s
+.section .text.w,"axR"
+w1:
+  call foo    # 4 bytes after relaxation
+w2:
+
+.section .text.x,"ax"
+x1:
+  call foo    # 4 bytes after relaxation
+x2:
+
+.section .debug_rnglists
+.uleb128 w1-w2+131                   # initial value: 0x7b
+.uleb128 w2-w1+120                   # initial value: 0x0180
+.uleb128 w1-w2+137                   # initial value: 0x0181
+.uleb128 w2-w1+16376                 # initial value: 0x018080
+.uleb128 w1-w2+16393                 # initial value: 0x018081
+.uleb128 w2-w1+2097144               # initial value: 0x01808080
+.uleb128 w1-w2+2097161               # initial value: 0x01808081
+
+.section .debug_loclists
+.reloc ., R_RISCV_SET_ULEB128, w2+3
+.reloc ., R_RISCV_SUB_ULEB128, w1+4  # SUB with a non-zero addend
+.byte 0
+.uleb128 x2-x1                       # references discarded symbols
+
+#--- sub.s
+w1: call foo; w2:
+.section .debug_rnglists
+.quad 0;
+.reloc ., R_RISCV_SUB_ULEB128, w2+120
+.byte 0x7f
+
+#--- unpaired1.s
+w1: call foo; w2:
+.section .debug_rnglists
+.quad 0;
+.reloc ., R_RISCV_SET_ULEB128, w2+120
+.byte 0x7f
+
+#--- unpaired2.s
+w1: call foo; w2:
+.section .debug_rnglists
+.quad 0
+.reloc ., R_RISCV_SET_ULEB128, w2+120
+.reloc .+1, R_RISCV_SUB_ULEB128, w1
+.byte 0x7f
+
+#--- unpaired3.s
+w1: call foo; w2:
+.section .debug_rnglists
+.quad 0
+.reloc ., R_RISCV_SET_ULEB128, w2+120
+.reloc ., R_RISCV_SUB64, w1
+.byte 0x7f
+
+#--- overflow.s
+w1: call foo; w2:
+.section .debug_rnglists
+.quad 0
+.reloc ., R_RISCV_SET_ULEB128, w2+124
+.reloc ., R_RISCV_SUB_ULEB128, w1
+.byte 0x7f



More information about the llvm-commits mailing list