[llvm-branch-commits] [llvm-objcopy] Support CREL (PR #97521)

via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Tue Jul 2 22:46:18 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-binary-utilities

@llvm/pr-subscribers-mc

Author: Fangrui Song (MaskRay)

<details>
<summary>Changes</summary>

llvm-objcopy may modify the symbol table and need to rewrite
relocations. For CREL, while we can reuse the decoder from #<!-- -->91280, we
need an encoder to support CREL.

Since MC/ELFObjectWriter.cpp has an existing encoder, and MC is at a
lower layer than Object, extract the encoder to a new header file
llvm/MC/MCELFExtras.h.

Link: https://discourse.llvm.org/t/rfc-crel-a-compact-relocation-format-for-elf/77600


---
Full diff: https://github.com/llvm/llvm-project/pull/97521.diff


7 Files Affected:

- (modified) llvm/include/llvm/BinaryFormat/ELF.h (+9) 
- (added) llvm/include/llvm/MC/MCELFExtras.h (+60) 
- (modified) llvm/lib/MC/ELFObjectWriter.cpp (+10-39) 
- (modified) llvm/lib/ObjCopy/ELF/ELFObject.cpp (+40-7) 
- (modified) llvm/lib/ObjCopy/ELF/ELFObject.h (+3-2) 
- (added) llvm/test/tools/llvm-objcopy/ELF/crel.test (+140) 
- (modified) llvm/test/tools/llvm-objcopy/ELF/strip-reloc-symbol.test (+3-1) 


``````````diff
diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h
index 32cc9ff8cbb78..456cffff6b4a7 100644
--- a/llvm/include/llvm/BinaryFormat/ELF.h
+++ b/llvm/include/llvm/BinaryFormat/ELF.h
@@ -22,6 +22,7 @@
 #include "llvm/ADT/StringRef.h"
 #include <cstdint>
 #include <cstring>
+#include <type_traits>
 
 namespace llvm {
 namespace ELF {
@@ -1430,6 +1431,14 @@ struct Elf64_Rela {
   }
 };
 
+// In-memory representation of CREL. The serialized representation uses LEB128.
+template <bool Is64> struct Elf_Crel {
+  std::conditional_t<Is64, uint64_t, uint32_t> r_offset;
+  uint32_t r_symidx;
+  uint32_t r_type;
+  std::conditional_t<Is64, int64_t, int32_t> r_addend;
+};
+
 // Relocation entry without explicit addend or info (relative relocations only).
 typedef Elf64_Xword Elf64_Relr; // offset/bitmap for relative relocations
 
diff --git a/llvm/include/llvm/MC/MCELFExtras.h b/llvm/include/llvm/MC/MCELFExtras.h
new file mode 100644
index 0000000000000..81c5ddf02dbc5
--- /dev/null
+++ b/llvm/include/llvm/MC/MCELFExtras.h
@@ -0,0 +1,60 @@
+//===- MCELFExtras.h - Extra functions for ELF ------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCELFEXTRAS_H
+#define LLVM_MC_MCELFEXTRAS_H
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/Support/LEB128.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <cstdint>
+#include <type_traits>
+
+namespace llvm::ELF {
+template <bool Is64, class RelocsTy, class F>
+void encodeCrel(raw_ostream &OS, RelocsTy Relocs, F ToCrel) {
+  using uint = std::conditional_t<Is64, uint64_t, uint32_t>;
+  uint OffsetMask = 8, Offset = 0, Addend = 0;
+  uint32_t SymIdx = 0, Type = 0;
+  for (const auto &R : Relocs)
+    OffsetMask |= ToCrel(R).r_offset;
+  const int Shift = llvm::countr_zero(OffsetMask);
+  encodeULEB128(Relocs.size() * 8 + ELF::CREL_HDR_ADDEND + Shift, OS);
+  for (const auto &R : Relocs) {
+    auto CR = ToCrel(R);
+    auto DeltaOffset = static_cast<uint>((CR.r_offset - Offset) >> Shift);
+    Offset = CR.r_offset;
+    uint8_t B = (DeltaOffset << 3) + (SymIdx != CR.r_symidx) +
+                (Type != CR.r_type ? 2 : 0) +
+                (Addend != uint(CR.r_addend) ? 4 : 0);
+    if (DeltaOffset < 0x10) {
+      OS << char(B);
+    } else {
+      OS << char(B | 0x80);
+      encodeULEB128(DeltaOffset >> 4, OS);
+    }
+    // Delta symidx/type/addend members (SLEB128).
+    if (B & 1) {
+      encodeSLEB128(static_cast<int32_t>(CR.r_symidx - SymIdx), OS);
+      SymIdx = CR.r_symidx;
+    }
+    if (B & 2) {
+      encodeSLEB128(static_cast<int32_t>(CR.r_type - Type), OS);
+      Type = CR.r_type;
+    }
+    if (B & 4) {
+      encodeSLEB128(std::make_signed_t<uint>(CR.r_addend - Addend), OS);
+      Addend = CR.r_addend;
+    }
+  }
+}
+} // namespace llvm::ELF
+
+#endif
diff --git a/llvm/lib/MC/ELFObjectWriter.cpp b/llvm/lib/MC/ELFObjectWriter.cpp
index bcc6dfeeeccd6..771afff09ec5d 100644
--- a/llvm/lib/MC/ELFObjectWriter.cpp
+++ b/llvm/lib/MC/ELFObjectWriter.cpp
@@ -23,6 +23,7 @@
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCELFExtras.h"
 #include "llvm/MC/MCELFObjectWriter.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCFixup.h"
@@ -919,44 +920,14 @@ void ELFWriter::WriteSecHdrEntry(uint32_t Name, uint32_t Type, uint64_t Flags,
   WriteWord(EntrySize); // sh_entsize
 }
 
-template <class uint>
+template <bool Is64>
 static void encodeCrel(ArrayRef<ELFRelocationEntry> Relocs, raw_ostream &OS) {
-  uint OffsetMask = 8, Offset = 0, Addend = 0;
-  uint32_t SymIdx = 0, Type = 0;
-  // hdr & 4 indicates 3 flag bits in delta offset and flags members.
-  for (const ELFRelocationEntry &Entry : Relocs)
-    OffsetMask |= Entry.Offset;
-  const int Shift = llvm::countr_zero(OffsetMask);
-  encodeULEB128(Relocs.size() * 8 + ELF::CREL_HDR_ADDEND + Shift, OS);
-  for (const ELFRelocationEntry &Entry : Relocs) {
-    // The delta offset and flags member may be larger than uint64_t. Special
-    // case the first byte (3 flag bits and 4 offset bits). Other ULEB128 bytes
-    // encode the remaining delta offset bits.
-    auto DeltaOffset = static_cast<uint>((Entry.Offset - Offset) >> Shift);
-    Offset = Entry.Offset;
-    uint32_t CurSymIdx = Entry.Symbol ? Entry.Symbol->getIndex() : 0;
-    uint8_t B = (DeltaOffset << 3) + (SymIdx != CurSymIdx) +
-                (Type != Entry.Type ? 2 : 0) + (Addend != Entry.Addend ? 4 : 0);
-    if (DeltaOffset < 0x10) {
-      OS << char(B);
-    } else {
-      OS << char(B | 0x80);
-      encodeULEB128(DeltaOffset >> 4, OS);
-    }
-    // Delta symidx/type/addend members (SLEB128).
-    if (B & 1) {
-      encodeSLEB128(static_cast<int32_t>(CurSymIdx - SymIdx), OS);
-      SymIdx = CurSymIdx;
-    }
-    if (B & 2) {
-      encodeSLEB128(static_cast<int32_t>(Entry.Type - Type), OS);
-      Type = Entry.Type;
-    }
-    if (B & 4) {
-      encodeSLEB128(std::make_signed_t<uint>(Entry.Addend - Addend), OS);
-      Addend = Entry.Addend;
-    }
-  }
+  using uint = std::conditional_t<Is64, uint64_t, uint32_t>;
+  ELF::encodeCrel<Is64>(OS, Relocs, [&](const ELFRelocationEntry &R) {
+    uint32_t SymIdx = R.Symbol ? R.Symbol->getIndex() : 0;
+    return ELF::Elf_Crel<Is64>{static_cast<uint>(R.Offset), SymIdx, R.Type,
+                               std::make_signed_t<uint>(R.Addend)};
+  });
 }
 
 void ELFWriter::writeRelocations(const MCAssembler &Asm,
@@ -1005,9 +976,9 @@ void ELFWriter::writeRelocations(const MCAssembler &Asm,
     }
   } else if (TO && TO->Crel) {
     if (is64Bit())
-      encodeCrel<uint64_t>(Relocs, W.OS);
+      encodeCrel<true>(Relocs, W.OS);
     else
-      encodeCrel<uint32_t>(Relocs, W.OS);
+      encodeCrel<false>(Relocs, W.OS);
   } else {
     for (const ELFRelocationEntry &Entry : Relocs) {
       uint32_t Symidx = Entry.Symbol ? Entry.Symbol->getIndex() : 0;
diff --git a/llvm/lib/ObjCopy/ELF/ELFObject.cpp b/llvm/lib/ObjCopy/ELF/ELFObject.cpp
index 02591e6f987c2..b23df17fb8a62 100644
--- a/llvm/lib/ObjCopy/ELF/ELFObject.cpp
+++ b/llvm/lib/ObjCopy/ELF/ELFObject.cpp
@@ -13,6 +13,7 @@
 #include "llvm/ADT/Twine.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/BinaryFormat/ELF.h"
+#include "llvm/MC/MCELFExtras.h"
 #include "llvm/MC/MCTargetOptions.h"
 #include "llvm/Object/ELF.h"
 #include "llvm/Object/ELFObjectFile.h"
@@ -107,12 +108,29 @@ Error ELFSectionSizer<ELFT>::visit(SymbolTableSection &Sec) {
   return Error::success();
 }
 
+template <bool Is64>
+static SmallVector<char, 0> encodeCrel(ArrayRef<Relocation> Relocations) {
+  using uint = std::conditional_t<Is64, uint64_t, uint32_t>;
+  SmallVector<char, 0> Content;
+  raw_svector_ostream OS(Content);
+  ELF::encodeCrel<Is64>(OS, Relocations, [&](const Relocation &R) {
+    uint32_t CurSymIdx = R.RelocSymbol ? R.RelocSymbol->Index : 0;
+    return ELF::Elf_Crel<Is64>{static_cast<uint>(R.Offset), CurSymIdx, R.Type,
+                               std::make_signed_t<uint>(R.Addend)};
+  });
+  return Content;
+}
+
 template <class ELFT>
 Error ELFSectionSizer<ELFT>::visit(RelocationSection &Sec) {
-  Sec.EntrySize = Sec.Type == SHT_REL ? sizeof(Elf_Rel) : sizeof(Elf_Rela);
-  Sec.Size = Sec.Relocations.size() * Sec.EntrySize;
-  // Align to the largest field in Elf_Rel(a).
-  Sec.Align = ELFT::Is64Bits ? sizeof(Elf_Xword) : sizeof(Elf_Word);
+  if (Sec.Type == SHT_CREL) {
+    Sec.Size = encodeCrel<ELFT::Is64Bits>(Sec.Relocations).size();
+  } else {
+    Sec.EntrySize = Sec.Type == SHT_REL ? sizeof(Elf_Rel) : sizeof(Elf_Rela);
+    Sec.Size = Sec.Relocations.size() * Sec.EntrySize;
+    // Align to the largest field in Elf_Rel(a).
+    Sec.Align = ELFT::Is64Bits ? sizeof(Elf_Xword) : sizeof(Elf_Word);
+  }
   return Error::success();
 }
 
@@ -874,6 +892,8 @@ StringRef RelocationSectionBase::getNamePrefix() const {
     return ".rel";
   case SHT_RELA:
     return ".rela";
+  case SHT_CREL:
+    return ".crel";
   default:
     llvm_unreachable("not a relocation section");
   }
@@ -966,12 +986,16 @@ static void writeRel(const RelRange &Relocations, T *Buf, bool IsMips64EL) {
 template <class ELFT>
 Error ELFSectionWriter<ELFT>::visit(const RelocationSection &Sec) {
   uint8_t *Buf = reinterpret_cast<uint8_t *>(Out.getBufferStart()) + Sec.Offset;
-  if (Sec.Type == SHT_REL)
+  if (Sec.Type == SHT_CREL) {
+    auto Content = encodeCrel<ELFT::Is64Bits>(Sec.Relocations);
+    memcpy(Buf, Content.data(), Content.size());
+  } else if (Sec.Type == SHT_REL) {
     writeRel(Sec.Relocations, reinterpret_cast<Elf_Rel *>(Buf),
              Sec.getObject().IsMips64EL);
-  else
+  } else {
     writeRel(Sec.Relocations, reinterpret_cast<Elf_Rela *>(Buf),
              Sec.getObject().IsMips64EL);
+  }
   return Error::success();
 }
 
@@ -1684,6 +1708,7 @@ Expected<SectionBase &> ELFBuilder<ELFT>::makeSection(const Elf_Shdr &Shdr) {
   switch (Shdr.sh_type) {
   case SHT_REL:
   case SHT_RELA:
+  case SHT_CREL:
     if (Shdr.sh_flags & SHF_ALLOC) {
       if (Expected<ArrayRef<uint8_t>> Data = ElfFile.getSectionContents(Shdr))
         return Obj.addSection<DynamicRelocationSection>(*Data);
@@ -1861,7 +1886,15 @@ template <class ELFT> Error ELFBuilder<ELFT>::readSections(bool EnsureSymtab) {
 
       const typename ELFFile<ELFT>::Elf_Shdr *Shdr =
           Sections->begin() + RelSec->Index;
-      if (RelSec->Type == SHT_REL) {
+      if (RelSec->Type == SHT_CREL) {
+        auto Rels = ElfFile.crels(*Shdr);
+        if (!Rels)
+          return Rels.takeError();
+        if (Error Err = initRelocations(RelSec, Rels->first))
+          return Err;
+        if (Error Err = initRelocations(RelSec, Rels->second))
+          return Err;
+      } else if (RelSec->Type == SHT_REL) {
         Expected<typename ELFFile<ELFT>::Elf_Rel_Range> Rels =
             ElfFile.rels(*Shdr);
         if (!Rels)
diff --git a/llvm/lib/ObjCopy/ELF/ELFObject.h b/llvm/lib/ObjCopy/ELF/ELFObject.h
index 2b1895a30b41e..e3c0e7abda16b 100644
--- a/llvm/lib/ObjCopy/ELF/ELFObject.h
+++ b/llvm/lib/ObjCopy/ELF/ELFObject.h
@@ -881,7 +881,8 @@ class RelocationSectionBase : public SectionBase {
   StringRef getNamePrefix() const;
 
   static bool classof(const SectionBase *S) {
-    return S->OriginalType == ELF::SHT_REL || S->OriginalType == ELF::SHT_RELA;
+    return is_contained({ELF::SHT_REL, ELF::SHT_RELA, ELF::SHT_CREL},
+                        S->OriginalType);
   }
 };
 
@@ -925,7 +926,7 @@ class RelocationSection
   static bool classof(const SectionBase *S) {
     if (S->OriginalFlags & ELF::SHF_ALLOC)
       return false;
-    return S->OriginalType == ELF::SHT_REL || S->OriginalType == ELF::SHT_RELA;
+    return RelocationSectionBase::classof(S);
   }
 };
 
diff --git a/llvm/test/tools/llvm-objcopy/ELF/crel.test b/llvm/test/tools/llvm-objcopy/ELF/crel.test
new file mode 100644
index 0000000000000..3c6c708d4729f
--- /dev/null
+++ b/llvm/test/tools/llvm-objcopy/ELF/crel.test
@@ -0,0 +1,140 @@
+# RUN: yaml2obj %s -o %t
+# RUN: llvm-objcopy --remove-section=.foo --strip-symbol=unused %t %t.out
+# RUN: llvm-readelf -Sr %t.out | FileCheck %s
+
+# CHECK:      [Nr] Name              Type            Address          Off    Size   ES Flg Lk Inf Al
+# CHECK-NEXT: [ 0]                   NULL            0000000000000000 000000 000000 00      0   0  0
+# CHECK-NEXT: [ 1] .text             PROGBITS        0000000000000000 {{.*}} 000008 00   A  0   0  0
+# CHECK-NEXT: [ 2] .crel.text        CREL            0000000000000000 {{.*}} 000022 00      5   1  0
+# CHECK-NEXT: [ 3] nonalloc          PROGBITS        0000000000000000 {{.*}} 000030 00      0   0  0
+# CHECK-NEXT: [ 4] .crelnonalloc     CREL            0000000000000000 {{.*}} 00000b 00      5   3  0
+
+# CHECK:      Relocation section '.crel.text' at offset {{.*}} contains 4 entries:
+# CHECK-NEXT:     Offset             Info             Type               Symbol's Value  Symbol's Name + Addend
+# CHECK-NEXT: 0000000000000001  {{.*}}           R_X86_64_32            0000000000000000 g1 + 1
+# CHECK-NEXT: 0000000000000002  {{.*}}           R_X86_64_64            0000000000000000 l1 + 2
+# CHECK-NEXT: 0000000000000000  {{.*}}           R_X86_64_32S           0000000000000000 g1 - 1
+# CHECK-NEXT: 0000000000000004  {{.*}}           R_X86_64_32S           0000000000000000 .text - 8000000000000000
+# CHECK-EMPTY:
+# CHECK-NEXT: Relocation section '.crelnonalloc' at offset {{.*}} contains 3 entries:
+# CHECK-NEXT:     Offset             Info             Type               Symbol's Value  Symbol's Name + Addend
+# CHECK-NEXT: 0000000000000010  {{.*}}           R_X86_64_64            0000000000000000 g1 + 1
+# CHECK-NEXT: 0000000000000020  {{.*}}           R_X86_64_64            0000000000000000 g2 + 2
+# CHECK-NEXT: 0000000000000030  {{.*}}           R_X86_64_64                       0
+
+--- !ELF
+FileHeader: !FileHeader
+  Class: ELFCLASS64
+  Data: ELFDATA2LSB
+  Type: ET_REL
+  Machine: EM_X86_64
+
+Sections:
+- Name: .foo
+  Type: SHT_PROGBITS
+  Flags: [SHF_ALLOC]
+- Name: .text
+  Type: SHT_PROGBITS
+  Content: "0000000000000000"
+  Flags: [SHF_ALLOC]
+- Name: .crel.text
+  Type: SHT_CREL
+  Info: .text
+  Link: .symtab
+  Relocations:
+    - Offset: 0x1
+      Symbol: g1
+      Type:   R_X86_64_32
+      Addend: 1
+    - Offset: 0x2
+      Symbol: l1
+      Type:   R_X86_64_64
+      Addend: 2
+    - Offset: 0x0
+      Symbol: g1
+      Type:   R_X86_64_32S
+      Addend: 0xffffffffffffffff
+    - Offset: 0x4
+      Symbol: .text
+      Type:   R_X86_64_32S
+      Addend: 0x8000000000000000
+- Name: nonalloc
+  Type: SHT_PROGBITS
+  Size: 0x30
+- Name: .crelnonalloc
+  Type: SHT_CREL
+  Info: nonalloc
+  Link: .symtab
+  Relocations:
+    - Offset: 0x10
+      Symbol: g1
+      Type:   R_X86_64_64
+      Addend: 1
+    - Offset: 0x20
+      Symbol: g2
+      Type:   R_X86_64_64
+      Addend: 2
+    - Offset: 0x30
+      Symbol: 0
+      Type:   R_X86_64_64
+
+Symbols:
+  - Name: unused
+    Section: .text
+  - Name: .text
+    Type: STT_SECTION
+    Section: .text
+  - Name:    l1
+  - Name:    g1
+    Section: .text
+    Value:   0x0
+    Size:    4
+    Binding: STB_GLOBAL
+  - Name:    g2
+    Binding: STB_GLOBAL
+
+# RUN: yaml2obj --docnum=2 %s -o %t.32
+# RUN: llvm-objcopy %t.32 %t.32.out
+# RUN: llvm-readobj -r %t.32.out | FileCheck %s --check-prefix=CHECK2
+
+# CHECK2:      Relocations [
+# CHECK2-NEXT:   Section (2) .crel.text {
+# CHECK2-NEXT:     0x0 R_X86_64_32S g1 0xFFFFFFFF
+# CHECK2-NEXT:     0x4 R_X86_64_32S .text 0x80000000
+# CHECK2-NEXT:   }
+# CHECK2-NEXT: ]
+
+--- !ELF
+FileHeader: !FileHeader
+  Class: ELFCLASS32
+  Data: ELFDATA2LSB
+  Type: ET_REL
+  Machine: EM_X86_64
+
+Sections:
+- Name: .text
+  Type: SHT_PROGBITS
+  Content: "0000000000000000"
+  Flags: [SHF_ALLOC]
+- Name: .crel.text
+  Type: SHT_CREL
+  Info: .text
+  Link: .symtab
+  Relocations:
+    - Offset: 0x0
+      Symbol: g1
+      Type:   R_X86_64_32S
+      Addend: 0xffffffff
+    - Offset: 0x4
+      Symbol: .text
+      Type:   R_X86_64_32S
+      Addend: 0x80000000
+
+Symbols:
+  - Name: .text
+    Type: STT_SECTION
+    Section: .text
+  - Name:    g1
+    Section: .text
+    Size:    4
+    Binding: STB_GLOBAL
diff --git a/llvm/test/tools/llvm-objcopy/ELF/strip-reloc-symbol.test b/llvm/test/tools/llvm-objcopy/ELF/strip-reloc-symbol.test
index 941dacce2edf2..7f2cd726b15c8 100644
--- a/llvm/test/tools/llvm-objcopy/ELF/strip-reloc-symbol.test
+++ b/llvm/test/tools/llvm-objcopy/ELF/strip-reloc-symbol.test
@@ -1,5 +1,7 @@
 # RUN: yaml2obj %s -o %t
 # RUN: not llvm-objcopy -N foo %t %t2 2>&1 | FileCheck %s -DFILE=%t
+# RUN: yaml2obj -DTYPE=SHT_CREL %s -o %t1
+# RUN: not llvm-objcopy -N foo %t1 /dev/null 2>&1 | FileCheck %s -DFILE=%t1
 
 !ELF
 FileHeader:
@@ -15,7 +17,7 @@ Sections:
     AddressAlign:    0x0000000000000010
     Size:            64
   - Name:            .rel.text
-    Type:            SHT_REL
+    Type:            [[TYPE=SHT_REL]]
     Info:            .text
     Relocations:
       - Offset: 0x1000

``````````

</details>


https://github.com/llvm/llvm-project/pull/97521


More information about the llvm-branch-commits mailing list