[lld] r267164 - GC entries of SHF_MERGE sections.

Rafael EspĂ­ndola via llvm-commits llvm-commits at lists.llvm.org
Fri Apr 22 12:22:20 PDT 2016


Debugging.

Do you want me to revert for now?

Cheers,
Rafael


On 22 April 2016 at 15:14, Peter Collingbourne <peter at pcc.me.uk> wrote:
> This change appears to break the bootstrap when using the steps given in
> PR27482.
>
> I get:
>
> ld.lld: ../lib/MC/StringTableBuilder.cpp:180: size_t
> llvm::StringTableBuilder::getOffset(llvm::StringRef) const: Assertion `I !=
> StringIndexMap.end() && "String is not in table!"' failed.
>
> when linking llvm-profdata during stage 2.
>
> Peter
>
> On Fri, Apr 22, 2016 at 9:46 AM, Rafael Espindola via llvm-commits
> <llvm-commits at lists.llvm.org> wrote:
>>
>> Author: rafael
>> Date: Fri Apr 22 11:46:08 2016
>> New Revision: 267164
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=267164&view=rev
>> Log:
>> GC entries of SHF_MERGE sections.
>>
>> It is a fairly direct extension of the gc algorithm. For merge sections
>> instead of remembering just a live bit, we remember which offsets were
>> used.
>>
>> This reduces the .rodata sections in chromium from 9648861 to 9477472
>> bytes.
>>
>> Added:
>>     lld/trunk/test/ELF/gc-sections-merge-addend.s
>>     lld/trunk/test/ELF/gc-sections-merge-implicit-addend.s
>>     lld/trunk/test/ELF/gc-sections-merge.s
>> Modified:
>>     lld/trunk/ELF/InputSection.cpp
>>     lld/trunk/ELF/InputSection.h
>>     lld/trunk/ELF/MarkLive.cpp
>>     lld/trunk/ELF/OutputSections.cpp
>>
>> Modified: lld/trunk/ELF/InputSection.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/InputSection.cpp?rev=267164&r1=267163&r2=267164&view=diff
>>
>> ==============================================================================
>> --- lld/trunk/ELF/InputSection.cpp (original)
>> +++ lld/trunk/ELF/InputSection.cpp Fri Apr 22 11:46:08 2016
>> @@ -77,23 +77,41 @@ InputSectionBase<ELFT>::getOffset(const
>>    return getOffset(Sym.Value);
>>  }
>>
>> -// Returns a section that Rel relocation is pointing to.
>>  template <class ELFT>
>> -InputSectionBase<ELFT> *
>> -InputSectionBase<ELFT>::getRelocTarget(const Elf_Rel &Rel) const {
>> -  // Global symbol
>> +static DefinedRegular<ELFT> *getRelocTargetSym(ObjectFile<ELFT> *File,
>> +                                               const typename ELFT::Rel
>> &Rel) {
>>    uint32_t SymIndex = Rel.getSymbol(Config->Mips64EL);
>>    SymbolBody &B = File->getSymbolBody(SymIndex).repl();
>>    if (auto *D = dyn_cast<DefinedRegular<ELFT>>(&B))
>>      if (D->Section)
>> -      return D->Section->Repl;
>> +      return D;
>>    return nullptr;
>>  }
>>
>> +// Returns a section that Rel relocation is pointing to.
>> +template <class ELFT>
>> +std::pair<InputSectionBase<ELFT> *, typename ELFT::uint>
>> +InputSectionBase<ELFT>::getRelocTarget(const Elf_Rel &Rel) const {
>> +  auto *D = getRelocTargetSym(File, Rel);
>> +  if (!D)
>> +    return std::make_pair(nullptr, 0);
>> +  if (!D->isSection())
>> +    return std::make_pair(D->Section->Repl, D->Value);
>> +  const uint8_t *BufLoc = getSectionData().begin() + Rel.r_offset;
>> +  uintX_t Addend =
>> +      Target->getImplicitAddend(BufLoc, Rel.getType(Config->Mips64EL));
>> +  return std::make_pair(D->Section->Repl, D->Value + Addend);
>> +}
>> +
>>  template <class ELFT>
>> -InputSectionBase<ELFT> *
>> +std::pair<InputSectionBase<ELFT> *, typename ELFT::uint>
>>  InputSectionBase<ELFT>::getRelocTarget(const Elf_Rela &Rel) const {
>> -  return getRelocTarget(reinterpret_cast<const Elf_Rel &>(Rel));
>> +  auto *D = getRelocTargetSym(File, Rel);
>> +  if (!D)
>> +    return std::make_pair(nullptr, 0);
>> +  if (!D->isSection())
>> +    return std::make_pair(D->Section->Repl, D->Value);
>> +  return std::make_pair(D->Section->Repl, D->Value + Rel.r_addend);
>>  }
>>
>>  template <class ELFT>
>> @@ -368,10 +386,49 @@ typename ELFT::uint EHInputSection<ELFT>
>>    return Base + Addend;
>>  }
>>
>> +static size_t findNull(StringRef S, size_t EntSize) {
>> +  // Optimize the common case.
>> +  if (EntSize == 1)
>> +    return S.find(0);
>> +
>> +  for (unsigned I = 0, N = S.size(); I != N; I += EntSize) {
>> +    const char *B = S.begin() + I;
>> +    if (std::all_of(B, B + EntSize, [](char C) { return C == 0; }))
>> +      return I;
>> +  }
>> +  return StringRef::npos;
>> +}
>> +
>>  template <class ELFT>
>>  MergeInputSection<ELFT>::MergeInputSection(elf::ObjectFile<ELFT> *F,
>>                                             const Elf_Shdr *Header)
>> -    : SplitInputSection<ELFT>(F, Header, InputSectionBase<ELFT>::Merge)
>> {}
>> +    : SplitInputSection<ELFT>(F, Header, InputSectionBase<ELFT>::Merge) {
>> +  uintX_t EntSize = Header->sh_entsize;
>> +  ArrayRef<uint8_t> D = this->getSectionData();
>> +  StringRef Data((const char *)D.data(), D.size());
>> +  std::vector<std::pair<uintX_t, uintX_t>> &Offsets = this->Offsets;
>> +
>> +  uintX_t V = Config->GcSections ? -1 : 0;
>> +  if (Header->sh_flags & SHF_STRINGS) {
>> +    uintX_t Offset = 0;
>> +    while (!Data.empty()) {
>> +      size_t End = findNull(Data, EntSize);
>> +      if (End == StringRef::npos)
>> +        fatal("string is not null terminated");
>> +      Offsets.push_back(std::make_pair(Offset, V));
>> +      uintX_t Size = End + EntSize;
>> +      Data = Data.substr(Size);
>> +      Offset += Size;
>> +    }
>> +    return;
>> +  }
>> +
>> +  // If this is not of type string, every entry has the same size.
>> +  size_t Size = Data.size();
>> +  assert((Size % EntSize) == 0);
>> +  for (unsigned I = 0, N = Size; I != N; I += EntSize)
>> +    Offsets.push_back(std::make_pair(I, V));
>> +}
>>
>>  template <class ELFT>
>>  bool MergeInputSection<ELFT>::classof(const InputSectionBase<ELFT> *S) {
>>
>> Modified: lld/trunk/ELF/InputSection.h
>> URL:
>> http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/InputSection.h?rev=267164&r1=267163&r2=267164&view=diff
>>
>> ==============================================================================
>> --- lld/trunk/ELF/InputSection.h (original)
>> +++ lld/trunk/ELF/InputSection.h Fri Apr 22 11:46:08 2016
>> @@ -124,8 +124,10 @@ public:
>>    ArrayRef<uint8_t> getSectionData() const;
>>
>>    // Returns a section that Rel is pointing to. Used by the garbage
>> collector.
>> -  InputSectionBase<ELFT> *getRelocTarget(const Elf_Rel &Rel) const;
>> -  InputSectionBase<ELFT> *getRelocTarget(const Elf_Rela &Rel) const;
>> +  std::pair<InputSectionBase<ELFT> *, uintX_t>
>> +  getRelocTarget(const Elf_Rel &Rel) const;
>> +  std::pair<InputSectionBase<ELFT> *, uintX_t>
>> +  getRelocTarget(const Elf_Rela &Rel) const;
>>
>>    void relocate(uint8_t *Buf, uint8_t *BufEnd);
>>    std::vector<Relocation> Relocations;
>>
>> Modified: lld/trunk/ELF/MarkLive.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/MarkLive.cpp?rev=267164&r1=267163&r2=267164&view=diff
>>
>> ==============================================================================
>> --- lld/trunk/ELF/MarkLive.cpp (original)
>> +++ lld/trunk/ELF/MarkLive.cpp Fri Apr 22 11:46:08 2016
>> @@ -40,20 +40,29 @@ using namespace lld::elf;
>>
>>  // Calls Fn for each section that Sec refers to via relocations.
>>  template <class ELFT>
>> -static void forEachSuccessor(InputSection<ELFT> *Sec,
>> -                             std::function<void(InputSectionBase<ELFT>
>> *)> Fn) {
>> +static void forEachSuccessor(
>> +    InputSection<ELFT> *Sec,
>> +    std::function<void(InputSectionBase<ELFT> *, typename ELFT::uint
>> Offset)>
>> +        Fn) {
>>    typedef typename ELFT::Rel Elf_Rel;
>>    typedef typename ELFT::Rela Elf_Rela;
>>    typedef typename ELFT::Shdr Elf_Shdr;
>> +  typedef typename ELFT::uint uintX_t;
>>
>>    ELFFile<ELFT> &Obj = Sec->getFile()->getObj();
>>    for (const Elf_Shdr *RelSec : Sec->RelocSections) {
>>      if (RelSec->sh_type == SHT_RELA) {
>> -      for (const Elf_Rela &RI : Obj.relas(RelSec))
>> -        Fn(Sec->getRelocTarget(RI));
>> +      for (const Elf_Rela &RI : Obj.relas(RelSec)) {
>> +        std::pair<InputSectionBase<ELFT> *, uintX_t> P =
>> +            Sec->getRelocTarget(RI);
>> +        Fn(P.first, P.second);
>> +      }
>>      } else {
>> -      for (const Elf_Rel &RI : Obj.rels(RelSec))
>> -        Fn(Sec->getRelocTarget(RI));
>> +      for (const Elf_Rel &RI : Obj.rels(RelSec)) {
>> +        std::pair<InputSectionBase<ELFT> *, uintX_t> P =
>> +            Sec->getRelocTarget(RI);
>> +        Fn(P.first, P.second);
>> +      }
>>      }
>>    }
>>  }
>> @@ -85,10 +94,18 @@ template <class ELFT> static bool isRese
>>  // Starting from GC-root sections, this function visits all reachable
>>  // sections to set their "Live" bits.
>>  template <class ELFT> void elf::markLive(SymbolTable<ELFT> *Symtab) {
>> +  typedef typename ELFT::uint uintX_t;
>>    SmallVector<InputSection<ELFT> *, 256> Q;
>>
>> -  auto Enqueue = [&](InputSectionBase<ELFT> *Sec) {
>> -    if (!Sec || Sec->Live)
>> +  auto Enqueue = [&](InputSectionBase<ELFT> *Sec, uintX_t Offset) {
>> +    if (!Sec)
>> +      return;
>> +    if (auto *MS = dyn_cast<MergeInputSection<ELFT>>(Sec)) {
>> +      std::pair<std::pair<uintX_t, uintX_t> *, uintX_t> T =
>> +          MS->getRangeAndSize(Offset);
>> +      T.first->second = 0;
>> +    }
>> +    if (Sec->Live)
>>        return;
>>      Sec->Live = true;
>>      if (InputSection<ELFT> *S = dyn_cast<InputSection<ELFT>>(Sec))
>> @@ -98,7 +115,7 @@ template <class ELFT> void elf::markLive
>>    auto MarkSymbol = [&](SymbolBody *Sym) {
>>      if (Sym)
>>        if (auto *D = dyn_cast<DefinedRegular<ELFT>>(Sym))
>> -        Enqueue(D->Section);
>> +        Enqueue(D->Section, D->Value);
>>    };
>>
>>    // Add GC root symbols.
>> @@ -125,7 +142,7 @@ template <class ELFT> void elf::markLive
>>      for (InputSectionBase<ELFT> *Sec : F->getSections())
>>        if (Sec && Sec != &InputSection<ELFT>::Discarded)
>>          if (isReserved(Sec) || Script<ELFT>::X->shouldKeep(Sec))
>> -          Enqueue(Sec);
>> +          Enqueue(Sec, 0);
>>
>>    // Mark all reachable sections.
>>    while (!Q.empty())
>>
>> Modified: lld/trunk/ELF/OutputSections.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/OutputSections.cpp?rev=267164&r1=267163&r2=267164&view=diff
>>
>> ==============================================================================
>> --- lld/trunk/ELF/OutputSections.cpp (original)
>> +++ lld/trunk/ELF/OutputSections.cpp Fri Apr 22 11:46:08 2016
>> @@ -1132,7 +1132,7 @@ void EHOutputSection<ELFT>::addSectionAu
>>      } else {
>>        if (!HasReloc)
>>          fatal("FDE doesn't reference another section");
>> -      InputSectionBase<ELFT> *Target = S->getRelocTarget(*RelI);
>> +      InputSectionBase<ELFT> *Target = S->getRelocTarget(*RelI).first;
>>        if (Target && Target->Live) {
>>          uint32_t CieOffset = Offset + 4 - ID;
>>          auto I = OffsetToIndex.find(CieOffset);
>> @@ -1227,19 +1227,6 @@ template <class ELFT> void MergeOutputSe
>>    }
>>  }
>>
>> -static size_t findNull(StringRef S, size_t EntSize) {
>> -  // Optimize the common case.
>> -  if (EntSize == 1)
>> -    return S.find(0);
>> -
>> -  for (unsigned I = 0, N = S.size(); I != N; I += EntSize) {
>> -    const char *B = S.begin() + I;
>> -    if (std::all_of(B, B + EntSize, [](char C) { return C == 0; }))
>> -      return I;
>> -  }
>> -  return StringRef::npos;
>> -}
>> -
>>  template <class ELFT>
>>  void MergeOutputSection<ELFT>::addSection(InputSectionBase<ELFT> *C) {
>>    auto *S = cast<MergeInputSection<ELFT>>(C);
>> @@ -1250,31 +1237,32 @@ void MergeOutputSection<ELFT>::addSectio
>>    StringRef Data((const char *)D.data(), D.size());
>>    uintX_t EntSize = S->getSectionHdr()->sh_entsize;
>>    this->Header.sh_entsize = EntSize;
>> +  MutableArrayRef<std::pair<uintX_t, uintX_t>> Offsets = S->Offsets;
>>
>>    // If this is of type string, the contents are null-terminated strings.
>>    if (this->Header.sh_flags & SHF_STRINGS) {
>> -    uintX_t Offset = 0;
>> -    while (!Data.empty()) {
>> -      size_t End = findNull(Data, EntSize);
>> -      if (End == StringRef::npos)
>> -        fatal("string is not null terminated");
>> -      StringRef Entry = Data.substr(0, End + EntSize);
>> +    for (unsigned I = 0, N = Offsets.size(); I != N; ++I) {
>> +      auto &P = Offsets[I];
>> +      if (P.second == (uintX_t)-1)
>> +        continue;
>> +
>> +      uintX_t Start = P.first;
>> +      uintX_t End = (I == N - 1) ? Data.size() : Offsets[I + 1].first;
>> +      StringRef Entry = Data.substr(Start, End - Start);
>>        uintX_t OutputOffset = Builder.add(Entry);
>>        if (shouldTailMerge())
>>          OutputOffset = -1;
>> -      S->Offsets.push_back(std::make_pair(Offset, OutputOffset));
>> -      uintX_t Size = End + EntSize;
>> -      Data = Data.substr(Size);
>> -      Offset += Size;
>> +      P.second = OutputOffset;
>>      }
>>      return;
>>    }
>>
>>    // If this is not of type string, every entry has the same size.
>> -  for (unsigned I = 0, N = Data.size(); I != N; I += EntSize) {
>> -    StringRef Entry = Data.substr(I, EntSize);
>> -    size_t OutputOffset = Builder.add(Entry);
>> -    S->Offsets.push_back(std::make_pair(I, OutputOffset));
>> +  for (auto &P : Offsets) {
>> +    if (P.second == (uintX_t)-1)
>> +      continue;
>> +    StringRef Entry = Data.substr(P.first, EntSize);
>> +    P.second = Builder.add(Entry);
>>    }
>>  }
>>
>>
>> Added: lld/trunk/test/ELF/gc-sections-merge-addend.s
>> URL:
>> http://llvm.org/viewvc/llvm-project/lld/trunk/test/ELF/gc-sections-merge-addend.s?rev=267164&view=auto
>>
>> ==============================================================================
>> --- lld/trunk/test/ELF/gc-sections-merge-addend.s (added)
>> +++ lld/trunk/test/ELF/gc-sections-merge-addend.s Fri Apr 22 11:46:08 2016
>> @@ -0,0 +1,39 @@
>> +// RUN: llvm-mc %s -o %t.o -filetype=obj -triple=x86_64-pc-linux
>> +// RUN: ld.lld %t.o -o %t.so -shared --gc-sections
>> +// RUN: llvm-readobj -s -section-data %t.so | FileCheck %s
>> +
>> +
>> +// CHECK:      Name: .rodata
>> +// CHECK-NEXT: Type: SHT_PROGBITS
>> +// CHECK-NEXT: Flags [
>> +// CHECK-NEXT:   SHF_ALLOC
>> +// CHECK-NEXT:   SHF_MERGE
>> +// CHECK-NEXT:   SHF_STRINGS
>> +// CHECK-NEXT: ]
>> +// CHECK-NEXT: Address:
>> +// CHECK-NEXT: Offset:
>> +// CHECK-NEXT: Size: 4
>> +// CHECK-NEXT: Link: 0
>> +// CHECK-NEXT: Info: 0
>> +// CHECK-NEXT: AddressAlignment: 1
>> +// CHECK-NEXT: EntrySize: 1
>> +// CHECK-NEXT: SectionData (
>> +// CHECK-NEXT:   0000: 62617200                    |bar.|
>> +// CHECK-NEXT: )
>> +
>> +        .section        .data.f,"aw", at progbits
>> +        .globl  f
>> +f:
>> +        .quad .rodata.str1.1 + 4
>> +
>> +        .section        .data.g,"aw", at progbits
>> +        .hidden g
>> +        .globl  g
>> +g:
>> +        .quad .rodata.str1.1
>> +
>> +        .section        .rodata.str1.1,"aMS", at progbits,1
>> +.L.str:
>> +        .asciz  "foo"
>> +.L.str.1:
>> +        .asciz  "bar"
>>
>> Added: lld/trunk/test/ELF/gc-sections-merge-implicit-addend.s
>> URL:
>> http://llvm.org/viewvc/llvm-project/lld/trunk/test/ELF/gc-sections-merge-implicit-addend.s?rev=267164&view=auto
>>
>> ==============================================================================
>> --- lld/trunk/test/ELF/gc-sections-merge-implicit-addend.s (added)
>> +++ lld/trunk/test/ELF/gc-sections-merge-implicit-addend.s Fri Apr 22
>> 11:46:08 2016
>> @@ -0,0 +1,39 @@
>> +// RUN: llvm-mc %s -o %t.o -filetype=obj -triple=i386-pc-linux
>> +// RUN: ld.lld %t.o -o %t.so -shared --gc-sections
>> +// RUN: llvm-readobj -s -section-data %t.so | FileCheck %s
>> +
>> +
>> +// CHECK:      Name: .rodata
>> +// CHECK-NEXT: Type: SHT_PROGBITS
>> +// CHECK-NEXT: Flags [
>> +// CHECK-NEXT:   SHF_ALLOC
>> +// CHECK-NEXT:   SHF_MERGE
>> +// CHECK-NEXT:   SHF_STRINGS
>> +// CHECK-NEXT: ]
>> +// CHECK-NEXT: Address:
>> +// CHECK-NEXT: Offset:
>> +// CHECK-NEXT: Size: 4
>> +// CHECK-NEXT: Link: 0
>> +// CHECK-NEXT: Info: 0
>> +// CHECK-NEXT: AddressAlignment: 1
>> +// CHECK-NEXT: EntrySize: 1
>> +// CHECK-NEXT: SectionData (
>> +// CHECK-NEXT:   0000: 62617200                    |bar.|
>> +// CHECK-NEXT: )
>> +
>> +        .section        .data.f,"aw", at progbits
>> +        .globl  f
>> +f:
>> +        .long .rodata.str1.1 + 4
>> +
>> +        .section        .data.g,"aw", at progbits
>> +        .hidden g
>> +        .globl  g
>> +g:
>> +        .long .rodata.str1.1
>> +
>> +        .section        .rodata.str1.1,"aMS", at progbits,1
>> +.L.str:
>> +        .asciz  "foo"
>> +.L.str.1:
>> +        .asciz  "bar"
>>
>> Added: lld/trunk/test/ELF/gc-sections-merge.s
>> URL:
>> http://llvm.org/viewvc/llvm-project/lld/trunk/test/ELF/gc-sections-merge.s?rev=267164&view=auto
>>
>> ==============================================================================
>> --- lld/trunk/test/ELF/gc-sections-merge.s (added)
>> +++ lld/trunk/test/ELF/gc-sections-merge.s Fri Apr 22 11:46:08 2016
>> @@ -0,0 +1,61 @@
>> +// RUN: llvm-mc %s -o %t.o -filetype=obj -triple=x86_64-pc-linux
>> +// RUN: ld.lld %t.o -o %t.so -shared
>> +// RUN: ld.lld %t.o -o %t.gc.so -shared --gc-sections
>> +// RUN: llvm-readobj -s -section-data %t.so | FileCheck %s
>> +// RUN: llvm-readobj -s -section-data %t.gc.so | FileCheck
>> --check-prefix=GC %s
>> +
>> +
>> +// CHECK:      Name: .rodata
>> +// CHECK-NEXT: Type: SHT_PROGBITS
>> +// CHECK-NEXT: Flags [
>> +// CHECK-NEXT:   SHF_ALLOC
>> +// CHECK-NEXT:   SHF_MERGE
>> +// CHECK-NEXT:   SHF_STRINGS
>> +// CHECK-NEXT: ]
>> +// CHECK-NEXT: Address:
>> +// CHECK-NEXT: Offset:
>> +// CHECK-NEXT: Size: 8
>> +// CHECK-NEXT: Link: 0
>> +// CHECK-NEXT: Info: 0
>> +// CHECK-NEXT: AddressAlignment: 1
>> +// CHECK-NEXT: EntrySize: 1
>> +// CHECK-NEXT: SectionData (
>> +// CHECK-NEXT:   0000: 666F6F00 62617200                    |foo.bar.|
>> +// CHECK-NEXT: )
>> +
>> +// GC:      Name: .rodata
>> +// GC-NEXT: Type: SHT_PROGBITS
>> +// GC-NEXT: Flags [
>> +// GC-NEXT:   SHF_ALLOC
>> +// GC-NEXT:   SHF_MERGE
>> +// GC-NEXT:   SHF_STRINGS
>> +// GC-NEXT: ]
>> +// GC-NEXT: Address:
>> +// GC-NEXT: Offset:
>> +// GC-NEXT: Size: 4
>> +// GC-NEXT: Link: 0
>> +// GC-NEXT: Info: 0
>> +// GC-NEXT: AddressAlignment: 1
>> +// GC-NEXT: EntrySize: 1
>> +// GC-NEXT: SectionData (
>> +// GC-NEXT:   0000: 666F6F00                                |foo.|
>> +// GC-NEXT: )
>> +
>> +        .section        .text.f,"ax", at progbits
>> +        .globl  f
>> +f:
>> +        leaq    .L.str(%rip), %rax
>> +        retq
>> +
>> +        .section        .text.g,"ax", at progbits
>> +        .hidden g
>> +        .globl  g
>> +g:
>> +        leaq    .L.str.1(%rip), %rax
>> +        retq
>> +
>> +        .section        .rodata.str1.1,"aMS", at progbits,1
>> +.L.str:
>> +        .asciz  "foo"
>> +.L.str.1:
>> +        .asciz  "bar"
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at lists.llvm.org
>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
>
>
>
> --
> --
> Peter


More information about the llvm-commits mailing list