[llvm] 1df5ea2 - [RISCV] Support R_RISCV_SET_ULEB128/R_RISCV_SUB_ULEB128 for .uleb128 directives

Thu Nov 9 09:27:37 PST 2023

Author: Fangrui Song
Date: 2023-11-09T09:27:32-08:00
New Revision: 1df5ea29b43690b6622db2cad7b745607ca4de6a

URL: https://github.com/llvm/llvm-project/commit/1df5ea29b43690b6622db2cad7b745607ca4de6a
DIFF: https://github.com/llvm/llvm-project/commit/1df5ea29b43690b6622db2cad7b745607ca4de6a.diff

LOG: [RISCV] Support R_RISCV_SET_ULEB128/R_RISCV_SUB_ULEB128 for .uleb128 directives

For a label difference like `.uleb128 A-B`, MC folds A-B even if A and B
are separated by a RISC-V linker-relaxable instruction. This incorrect
behavior is currently abused by DWARF v5 .debug_loclists/.debug_rnglists
(DW_LLE_offset_pair/DW_RLE_offset_pair entry kinds) implemented in
Clang/LLVM (see https://github.com/ClangBuiltLinux/linux/issues/1719 for
an instance).

https://github.com/riscv-non-isa/riscv-elf-psabi-doc/commit/96d6e190e9fc04a8517f9ff7fb9aed3e9876cbd6
defined R_RISCV_SET_ULEB128/R_RISCV_SUB_ULEB128. This patch generates such
a pair of relocations to represent A-B that should not be folded.
GNU assembler computes the directive size by ignoring shrinkable section
content, therefore after linking the value of A-B cannot use more bytes
than the reserved number (`final size of uleb128 value at offset ... exceeds available space`).
We make the same assumption.
```
w1:
  call foo
w2:
  .space 120
w3:
.uleb128 w2-w1  # 1 byte, 0x08
.uleb128 w3-w1  # 2 bytes, 0x80 0x01
```

We do not conservatively reserve 10 bytes (maximum size of an uleb128
for uint64_t) as that would pessimize DWARF v5
DW_LLE_offset_pair/DW_RLE_offset_pair, nullifying the benefits of
introducing R_RISCV_SET_ULEB128/R_RISCV_SUB_ULEB128 relocations.

The supported expressions are limited. For example,

* non-subtraction `.uleb128 A` is not allowed
* `.uleb128 A-B`: report an error unless A and B are both defined and in the same section

The new cl::opt `-riscv-uleb128-reloc` can be used to suppress the
relocations.

Reviewed By: asb

Differential Revision: https://reviews.llvm.org/D157657

Added: 
    llvm/test/MC/RISCV/leb128.s

Modified: 
    llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def
    llvm/include/llvm/MC/MCAsmBackend.h
    llvm/include/llvm/MC/MCFixup.h
    llvm/include/llvm/MC/MCFragment.h
    llvm/lib/MC/MCAsmBackend.cpp
    llvm/lib/MC/MCAssembler.cpp
    llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
    llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
    llvm/test/MC/ELF/RISCV/gen-dwarf.s

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def b/llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def
index 9a126df01531195..c7fd6490041cd1d 100644

--- a/llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def
+++ b/llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def
@@ -55,3 +55,5 @@ ELF_RELOC(R_RISCV_SET32,             56)
 ELF_RELOC(R_RISCV_32_PCREL,          57)
 ELF_RELOC(R_RISCV_IRELATIVE,         58)
 ELF_RELOC(R_RISCV_PLT32,             59)
+ELF_RELOC(R_RISCV_SET_ULEB128,       60)
+ELF_RELOC(R_RISCV_SUB_ULEB128,       61)

diff  --git a/llvm/include/llvm/MC/MCAsmBackend.h b/llvm/include/llvm/MC/MCAsmBackend.h
index b5c30aa756c2bfb..ebb33d0ab61f4a0 100644
--- a/llvm/include/llvm/MC/MCAsmBackend.h
+++ b/llvm/include/llvm/MC/MCAsmBackend.h
@@ -21,6 +21,7 @@ class MCAlignFragment;
 class MCDwarfCallFrameFragment;
 class MCDwarfLineAddrFragment;
 class MCFragment;
+class MCLEBFragment;
 class MCRelaxableFragment;
 class MCSymbol;
 class MCAsmLayout;
@@ -193,6 +194,13 @@ class MCAsmBackend {
     return false;
   }
 
+  // Defined by linker relaxation targets to possibly emit LEB128 relocations
+  // and set Value at the relocated location.
+  virtual bool relaxLEB128(MCLEBFragment &LF, MCAsmLayout &Layout,
+                           int64_t &Value) const {
+    return false;
+  }
+
   /// @}
 
   /// Returns the minimum size of a nop in bytes on this target. The assembler

diff  --git a/llvm/include/llvm/MC/MCFixup.h b/llvm/include/llvm/MC/MCFixup.h
index 919a00373ac2859..a0ed36d62ce9dce 100644
--- a/llvm/include/llvm/MC/MCFixup.h
+++ b/llvm/include/llvm/MC/MCFixup.h
@@ -24,6 +24,7 @@ enum MCFixupKind {
   FK_Data_2,      ///< A two-byte fixup.
   FK_Data_4,      ///< A four-byte fixup.
   FK_Data_8,      ///< A eight-byte fixup.
+  FK_Data_leb128, ///< A leb128 fixup.
   FK_PCRel_1,     ///< A one-byte pc relative fixup.
   FK_PCRel_2,     ///< A two-byte pc relative fixup.
   FK_PCRel_4,     ///< A four-byte pc relative fixup.

diff  --git a/llvm/include/llvm/MC/MCFragment.h b/llvm/include/llvm/MC/MCFragment.h
index 7be4792a4521983..efe44b0b6917e10 100644
--- a/llvm/include/llvm/MC/MCFragment.h
+++ b/llvm/include/llvm/MC/MCFragment.h
@@ -428,7 +428,7 @@ class MCOrgFragment : public MCFragment {
   }
 };
 
-class MCLEBFragment : public MCFragment {
+class MCLEBFragment final : public MCEncodedFragmentWithFixups<10, 1> {
   /// True if this is a sleb128, false if uleb128.
   bool IsSigned;
 
@@ -438,18 +438,17 @@ class MCLEBFragment : public MCFragment {
   SmallString<8> Contents;
 
 public:
-  MCLEBFragment(const MCExpr &Value_, bool IsSigned_, MCSection *Sec = nullptr)
-      : MCFragment(FT_LEB, false, Sec), IsSigned(IsSigned_), Value(&Value_) {
+  MCLEBFragment(const MCExpr &Value, bool IsSigned, MCSection *Sec = nullptr)
+      : MCEncodedFragmentWithFixups<10, 1>(FT_LEB, false, Sec),
+        IsSigned(IsSigned), Value(&Value) {
     Contents.push_back(0);
   }
 
   const MCExpr &getValue() const { return *Value; }
+  void setValue(const MCExpr *Expr) { Value = Expr; }
 
   bool isSigned() const { return IsSigned; }
 
-  SmallString<8> &getContents() { return Contents; }
-  const SmallString<8> &getContents() const { return Contents; }
-
   /// @}
 
   static bool classof(const MCFragment *F) {

diff  --git a/llvm/lib/MC/MCAsmBackend.cpp b/llvm/lib/MC/MCAsmBackend.cpp
index 4b1064a07e83c52..616576e945be49b 100644
--- a/llvm/lib/MC/MCAsmBackend.cpp
+++ b/llvm/lib/MC/MCAsmBackend.cpp
@@ -92,6 +92,7 @@ const MCFixupKindInfo &MCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
       {"FK_Data_2", 0, 16, 0},
       {"FK_Data_4", 0, 32, 0},
       {"FK_Data_8", 0, 64, 0},
+      {"FK_Data_leb128", 0, 0, 0},
       {"FK_PCRel_1", 0, 8, MCFixupKindInfo::FKF_IsPCRel},
       {"FK_PCRel_2", 0, 16, MCFixupKindInfo::FKF_IsPCRel},
       {"FK_PCRel_4", 0, 32, MCFixupKindInfo::FKF_IsPCRel},

diff  --git a/llvm/lib/MC/MCAssembler.cpp b/llvm/lib/MC/MCAssembler.cpp
index 55558820b670d91..901a66f156663f8 100644
--- a/llvm/lib/MC/MCAssembler.cpp
+++ b/llvm/lib/MC/MCAssembler.cpp
@@ -918,6 +918,12 @@ void MCAssembler::layout(MCAsmLayout &Layout) {
         Contents = DF.getContents();
         break;
       }
+      case MCFragment::FT_LEB: {
+        auto &LF = cast<MCLEBFragment>(Frag);
+        Fixups = LF.getFixups();
+        Contents = LF.getContents();
+        break;
+      }
       case MCFragment::FT_PseudoProbe: {
         MCPseudoProbeAddrFragment &PF = cast<MCPseudoProbeAddrFragment>(Frag);
         Fixups = PF.getFixups();
@@ -1006,15 +1012,27 @@ bool MCAssembler::relaxInstruction(MCAsmLayout &Layout,
 }
 
 bool MCAssembler::relaxLEB(MCAsmLayout &Layout, MCLEBFragment &LF) {
-  uint64_t OldSize = LF.getContents().size();
+  const unsigned OldSize = static_cast<unsigned>(LF.getContents().size());
+  unsigned PadTo = OldSize;
   int64_t Value;
-  bool Abs = LF.getValue().evaluateKnownAbsolute(Value, Layout);
+  SmallVectorImpl<char> &Data = LF.getContents();
+  LF.getFixups().clear();
+  // Use evaluateKnownAbsolute for Mach-O as a hack: .subsections_via_symbols
+  // requires that .uleb128 A-B is foldable where A and B reside in 
diff erent
+  // fragments. This is used by __gcc_except_table.
+  bool Abs = getSubsectionsViaSymbols()
+                 ? LF.getValue().evaluateKnownAbsolute(Value, Layout)
+                 : LF.getValue().evaluateAsAbsolute(Value, Layout);
   if (!Abs) {
-    getContext().reportError(LF.getValue().getLoc(),
-                             Twine(LF.isSigned() ? ".s" : ".u") +
-                                 "leb128 expression is not absolute");
+    if (!getBackend().relaxLEB128(LF, Layout, Value)) {
+      getContext().reportError(LF.getValue().getLoc(),
+                               Twine(LF.isSigned() ? ".s" : ".u") +
+                                   "leb128 expression is not absolute");
+      LF.setValue(MCConstantExpr::create(0, Context));
+    }
+    uint8_t Tmp[10]; // maximum size: ceil(64/7)
+    PadTo = std::max(PadTo, encodeULEB128(uint64_t(Value), Tmp));
   }
-  SmallString<8> &Data = LF.getContents();
   Data.clear();
   raw_svector_ostream OSE(Data);
   // The compiler can generate EH table assembly that is impossible to assemble
@@ -1022,9 +1040,9 @@ bool MCAssembler::relaxLEB(MCAsmLayout &Layout, MCLEBFragment &LF) {
   // to a later alignment fragment. To accommodate such tables, relaxation can
   // only increase an LEB fragment size here, not decrease it. See PR35809.
   if (LF.isSigned())
-    encodeSLEB128(Value, OSE, OldSize);
+    encodeSLEB128(Value, OSE, PadTo);
   else
-    encodeULEB128(Value, OSE, OldSize);
+    encodeULEB128(Value, OSE, PadTo);
   return OldSize != LF.getContents().size();
 }
 

diff  --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
index 731c644198e418a..dfc3c9e9908d888 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
@@ -30,6 +30,12 @@ using namespace llvm;
 
 static cl::opt<bool> RelaxBranches("riscv-asm-relax-branches", cl::init(true),
                                    cl::Hidden);
+// Temporary workaround for old linkers that do not support ULEB128 relocations,
+// which are abused by DWARF v5 DW_LLE_offset_pair/DW_RLE_offset_pair
+// implemented in Clang/LLVM.
+static cl::opt<bool> ULEB128Reloc(
+    "riscv-uleb128-reloc", cl::init(true), cl::Hidden,
+    cl::desc("Emit R_RISCV_SET_ULEB128/E_RISCV_SUB_ULEB128 if appropriate"));
 
 std::optional<MCFixupKind> RISCVAsmBackend::getFixupKind(StringRef Name) const {
   if (STI.getTargetTriple().isOSBinFormatELF()) {
@@ -112,6 +118,7 @@ bool RISCVAsmBackend::shouldForceRelocation(const MCAssembler &Asm,
   case FK_Data_2:
   case FK_Data_4:
   case FK_Data_8:
+  case FK_Data_leb128:
     if (Target.isAbsolute())
       return false;
     break;
@@ -321,6 +328,18 @@ bool RISCVAsmBackend::relaxDwarfCFA(MCDwarfCallFrameFragment &DF,
   return true;
 }
 
+bool RISCVAsmBackend::relaxLEB128(MCLEBFragment &LF, MCAsmLayout &Layout,
+                                  int64_t &Value) const {
+  if (LF.isSigned())
+    return false;
+  const MCExpr &Expr = LF.getValue();
+  if (ULEB128Reloc) {
+    LF.getFixups().push_back(
+        MCFixup::create(0, &Expr, FK_Data_leb128, Expr.getLoc()));
+  }
+  return Expr.evaluateKnownAbsolute(Value, Layout);
+}
+
 // Given a compressed control flow instruction this function returns
 // the expanded instruction.
 unsigned RISCVAsmBackend::getRelaxedOpcode(unsigned Op) const {
@@ -395,6 +414,7 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
   case FK_Data_2:
   case FK_Data_4:
   case FK_Data_8:
+  case FK_Data_leb128:
     return Value;
   case RISCV::fixup_riscv_lo12_i:
   case RISCV::fixup_riscv_pcrel_lo12_i:
@@ -577,6 +597,10 @@ bool RISCVAsmBackend::handleAddSubRelocations(const MCAsmLayout &Layout,
     TA = ELF::R_RISCV_ADD64;
     TB = ELF::R_RISCV_SUB64;
     break;
+  case llvm::FK_Data_leb128:
+    TA = ELF::R_RISCV_SET_ULEB128;
+    TB = ELF::R_RISCV_SUB_ULEB128;
+    break;
   default:
     llvm_unreachable("unsupported fixup size");
   }

diff  --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
index 95596ad5944c821..99b0d7b223b9937 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
@@ -99,6 +99,8 @@ class RISCVAsmBackend : public MCAsmBackend {
                           bool &WasRelaxed) const override;
   bool relaxDwarfCFA(MCDwarfCallFrameFragment &DF, MCAsmLayout &Layout,
                      bool &WasRelaxed) const override;
+  bool relaxLEB128(MCLEBFragment &LF, MCAsmLayout &Layout,
+                   int64_t &Value) const override;
 
   bool writeNopData(raw_ostream &OS, uint64_t Count,
                     const MCSubtargetInfo *STI) const override;

diff  --git a/llvm/test/MC/ELF/RISCV/gen-dwarf.s b/llvm/test/MC/ELF/RISCV/gen-dwarf.s
index 2235559d5f3575a..342ed1cc0e7ef9b 100644
--- a/llvm/test/MC/ELF/RISCV/gen-dwarf.s
+++ b/llvm/test/MC/ELF/RISCV/gen-dwarf.s
@@ -48,9 +48,10 @@
 # RELOC-NEXT:   0x34 R_RISCV_32_PCREL <null> 0x0
 # RELOC-NEXT: }
 
-## TODO A section needs two relocations.
 # RELOC:      Section ([[#]]) .rela.debug_rnglists {
 # RELOC-NEXT:   0xD R_RISCV_64 .text.foo 0x0
+# RELOC-NEXT:   0x15 R_RISCV_SET_ULEB128 <null> 0x0
+# RELOC-NEXT:   0x15 R_RISCV_SUB_ULEB128 .text.foo 0x0
 # RELOC-NEXT:   0x17 R_RISCV_64 .text.bar 0x0
 # RELOC-NEXT: }
 

diff  --git a/llvm/test/MC/RISCV/leb128.s b/llvm/test/MC/RISCV/leb128.s
new file mode 100644
index 000000000000000..429eac697182227
--- /dev/null
+++ b/llvm/test/MC/RISCV/leb128.s
@@ -0,0 +1,81 @@
+# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=-relax %s -o %t
+# RUN: llvm-readobj -r -x .alloc_w %t| FileCheck %s
+# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+relax %s -o %t.relax
+# RUN: llvm-readobj -r -x .alloc_w %t.relax | FileCheck %s --check-prefixes=CHECK,RELAX
+
+# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=-relax %s -o %t
+# RUN: llvm-readobj -r -x .alloc_w %t | FileCheck %s
+# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+relax %s -o %t.relax
+# RUN: llvm-readobj -r -x .alloc_w %t.relax | FileCheck %s --check-prefixes=CHECK,RELAX
+
+## Test temporary workaround for suppressting relocations for actually-non-foldable
+## DWARF v5 DW_LLE_offset_pair/DW_RLE_offset_pair.
+# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=-relax -riscv-uleb128-reloc=0 %s -o %t0
+# RUN: llvm-readobj -r -x .alloc_w %t0 | FileCheck %s --check-prefix=CHECK0
+# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+relax -riscv-uleb128-reloc=0 %s -o %t0.relax
+# RUN: llvm-readobj -r -x .alloc_w %t0.relax | FileCheck %s --check-prefixes=CHECK0,RELAX0
+
+# RUN: not llvm-mc -filetype=obj -triple=riscv64 -mattr=-relax --defsym ERR=1 %s -o /dev/null 2>&1 | \
+# RUN:   FileCheck %s --check-prefix=ERR
+# RUN: not llvm-mc -filetype=obj -triple=riscv64 -mattr=+relax --defsym ERR=1 %s -o /dev/null 2>&1 | \
+# RUN:   FileCheck %s --check-prefix=ERR
+
+# CHECK0:      Relocations [
+# CHECK0-NEXT:   .rela.alloc_w {
+# CHECK0-NEXT:     0x2 R_RISCV_CALL_PLT foo 0x0
+# RELAX0-NEXT:     0x2 R_RISCV_RELAX - 0x0
+# CHECK0-NEXT:   }
+# CHECK0-NEXT: ]
+
+# CHECK:      Relocations [
+# CHECK-NEXT:   .rela.alloc_w {
+# CHECK-NEXT:     0x0 R_RISCV_SET_ULEB128 w1 0x0
+# CHECK-NEXT:     0x0 R_RISCV_SUB_ULEB128 w 0x0
+# RELAX-NEXT:     0x1 R_RISCV_SET_ULEB128 w2 0x0
+# RELAX-NEXT:     0x1 R_RISCV_SUB_ULEB128 w1 0x0
+# CHECK-NEXT:     0x2 R_RISCV_CALL_PLT foo 0x0
+# RELAX-NEXT:     0x2 R_RISCV_RELAX - 0x0
+# RELAX-NEXT:     0xA R_RISCV_SET_ULEB128 w2 0x0
+# RELAX-NEXT:     0xA R_RISCV_SUB_ULEB128 w1 0x0
+# RELAX-NEXT:     0xB R_RISCV_SET_ULEB128 w2 0x78
+# RELAX-NEXT:     0xB R_RISCV_SUB_ULEB128 w1 0x0
+# RELAX-NEXT:     0xD R_RISCV_SET_ULEB128 w1 0x0
+# RELAX-NEXT:     0xD R_RISCV_SUB_ULEB128 w2 0x0
+# CHECK-NEXT:   }
+# CHECK-NEXT: ]
+
+## R_RISCV_SET_ULEB128 relocated locations contain values not accounting for linker relaxation.
+# CHECK:      Hex dump of section '.alloc_w':
+# CHECK-NEXT: 0x00000000 02089700 0000e780 00000880 01f8ffff ................
+# CHECK-NEXT: 0x00000010 ffffffff ffff01                     .......
+
+.section .alloc_w,"ax", at progbits; w:
+.uleb128 w1-w       # w1 is later defined in the same section
+.uleb128 w2-w1      # w1 and w2 are separated by a linker relaxable instruction
+w1:
+  call foo
+w2:
+.uleb128 w2-w1      # 0x08
+.uleb128 w2-w1+120  # 0x0180
+.uleb128 -(w2-w1)   # 0x01fffffffffffffffff8
+
+.ifdef ERR
+# ERR: :[[#@LINE+1]]:16: error: .uleb128 expression is not absolute
+.uleb128 extern-w   # extern is undefined
+# ERR: :[[#@LINE+1]]:11: error: .uleb128 expression is not absolute
+.uleb128 w-extern
+# ERR: :[[#@LINE+1]]:11: error: .uleb128 expression is not absolute
+.uleb128 x-w        # x is later defined in another section
+
+.section .alloc_x,"aw", at progbits; x:
+# ERR: :[[#@LINE+1]]:11: error: .uleb128 expression is not absolute
+.uleb128 y-x
+.section .alloc_y,"aw", at progbits; y:
+# ERR: :[[#@LINE+1]]:11: error: .uleb128 expression is not absolute
+.uleb128 x-y
+
+# ERR: :[[#@LINE+1]]:10: error: .uleb128 expression is not absolute
+.uleb128 extern
+# ERR: :[[#@LINE+1]]:10: error: .uleb128 expression is not absolute
+.uleb128 y
+.endif