[PATCH] D41046: [ELF] Make overlapping output sections an error

Rafael Avila de Espindola via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 14 20:15:06 PST 2017


Alexander Richardson via Phabricator <reviews at reviews.llvm.org> writes:

> arichardson updated this revision to Diff 126272.
> arichardson added a comment.
>
> Speed up lld invocation in test/ELF/many-alloc-sections.s lld invocation from 290s to <1s by not checking empty sections

What happens if the sections are 1 byte long? What is the performance
impact that you get with this patch when linknig the linux kernel?

>
> Repository:
>   rLLD LLVM Linker
>
> https://reviews.llvm.org/D41046
>
> Files:
>   ELF/Config.h
>   ELF/Relocations.cpp
>   ELF/Writer.cpp
>   test/ELF/linkerscript/overlapping-sections.s
>
> Index: test/ELF/linkerscript/overlapping-sections.s
> ===================================================================
> --- /dev/null
> +++ test/ELF/linkerscript/overlapping-sections.s
> @@ -0,0 +1,174 @@
> +# TODO: maybe this should be converted to an x86 test to get more buildbot coverage
> +# REQUIRES: mips
> +# RUN: llvm-mc -filetype=obj -triple=mips64-unknown-linux %s -o %t.o
> +
> +# RUN: echo "SECTIONS { \
> +# RUN:   .sec1 0x8000 : { sec1_start = .; *(.first_sec) sec1_end = .;} \
> +# RUN:   .sec2 0x8800 : { sec2_start = .; *(.second_sec) sec2_end = .;} \
> +# RUN: }" > %t.script
> +# RUN: ld.lld -o %t.so --script %t.script %t.o -shared
> +# RUN: llvm-readobj -sections -program-headers %t.so | FileCheck %s -check-prefix GOOD
> +
> +# GOOD:        Name: .sec1
> +# GOOD-NEXT:   Type: SHT_PROGBITS (0x1)
> +# GOOD-NEXT:   Flags [ (0x3)
> +# GOOD-NEXT:     SHF_ALLOC (0x2)
> +# GOOD-NEXT:     SHF_WRITE (0x1)
> +# GOOD-NEXT:   ]
> +# GOOD-NEXT:   Address: 0x8000
> +# GOOD-NEXT:   Offset: 0x18000
> +# GOOD-NEXT:   Size: 256
> +
> +# GOOD:        Name: .sec2
> +# GOOD-NEXT:   Type: SHT_PROGBITS (0x1)
> +# GOOD-NEXT:   Flags [ (0x3)
> +# GOOD-NEXT:     SHF_ALLOC (0x2)
> +# GOOD-NEXT:     SHF_WRITE (0x1)
> +# GOOD-NEXT:   ]
> +# GOOD-NEXT:   Address: 0x8800
> +# GOOD-NEXT:   Offset: 0x18800
> +# GOOD-NEXT:   Size: 256
> +
> +# GOOD:      ProgramHeaders [
> +# GOOD-NEXT:  ProgramHeader {
> +# GOOD-NEXT:    Type: PT_LOAD (0x1)
> +# GOOD-NEXT:    Offset: 0x10000
> +# GOOD-NEXT:    VirtualAddress: 0x0
> +# GOOD-NEXT:    PhysicalAddress: 0x0
> +# GOOD-NEXT:    FileSize: 481
> +# GOOD-NEXT:    MemSize: 481
> +# GOOD-NEXT:    Flags [ (0x5)
> +# GOOD-NEXT:      PF_R (0x4)
> +# GOOD-NEXT:      PF_X (0x1)
> +# GOOD-NEXT:    ]
> +# GOOD-NEXT:    Alignment: 65536
> +# GOOD-NEXT:  }
> +# GOOD-NEXT:  ProgramHeader {
> +# GOOD-NEXT:    Type: PT_LOAD (0x1)
> +# GOOD-NEXT:    Offset: 0x18000
> +# GOOD-NEXT:    VirtualAddress: 0x8000
> +# GOOD-NEXT:    PhysicalAddress: 0x8000
> +# GOOD-NEXT:    FileSize: 2320
> +# GOOD-NEXT:    MemSize: 2320
> +# GOOD-NEXT:    Flags [ (0x6)
> +# GOOD-NEXT:      PF_R (0x4)
> +# GOOD-NEXT:      PF_W (0x2)
> +# GOOD-NEXT:    ]
> +# GOOD-NEXT:    Alignment: 65536
> +# GOOD-NEXT:  }
> +
> +# LLD used to accept overlapping sections and create binaries that potentially
> +# crash at runtime while ld.bfd gives the following error:
> +# ld.bfd: section .sec2 [0000000000008080 -> 000000000000817f] overlaps section .sec1 [0000000000008000 -> 00000000000080ff]
> +
> +# RUN: echo "SECTIONS { \
> +# RUN:   .sec1 0x8000 : { sec1_start = .; *(.first_sec) sec1_end = .;} \
> +# RUN:   .sec2 0x8800 : AT(0x8080) { sec2_start = .; *(.second_sec) sec2_end = .;} \
> +# RUN: }" > %t-lma.script
> +# RUN: not ld.lld -o %t.so --script %t-lma.script %t.o -shared 2>&1 | FileCheck %s -check-prefix LMA-OVERLAP-ERR
> +# LMA-OVERLAP-ERR:      error: section .sec1 load address range overlaps with .sec2
> +# LMA-OVERLAP-ERR-NEXT: >>> .sec1 range is [0x8000 -> 0x80FF]
> +# LMA-OVERLAP-ERR-NEXT: >>> .sec2 range is [0x8080 -> 0x817F]
> +
> +# check that we create the expected binary with --noinhibit-exec:
> +# RUN: ld.lld -o %t.so --script %t-lma.script %t.o -shared --noinhibit-exec
> +# It seems like the only way to get the section -> segment mapping is using
> +# llvm-readobj with GNU output style instead of LLVM style
> +# We need this in order to verify that the .sec2 was indeed placed in a
> +# PT_LOAD where the PhysAddr overlaps with where .sec1 is loaded
> +# RUN: llvm-readobj -sections -program-headers -elf-output-style=GNU %t.so | FileCheck %s -check-prefix BAD-LMA
> +# BAD-LMA-LABEL: Section Headers:
> +# BAD-LMA: .sec1             PROGBITS        0000000000008000 018000 000100 00  WA  0   0  1
> +# BAD-LMA: .sec2             PROGBITS        0000000000008800 018800 000100 00  WA  0   0  1
> +# BAD-LMA-LABEL: Program Headers:
> +# BAD-LMA-NEXT:  Type           Offset   VirtAddr           PhysAddr           FileSiz  MemSiz   Flg Align
> +# BAD-LMA-NEXT:  LOAD           0x010000 0x0000000000000000 0x0000000000000000 0x0001e1 0x0001e1 R E 0x10000
> +# BAD-LMA-NEXT:  LOAD           0x018000 0x0000000000008000 0x0000000000008000 0x000100 0x000100 RW  0x10000
> +# BAD-LMA-NEXT:  LOAD           0x018800 0x0000000000008800 0x0000000000008080 0x000110 0x000110 RW  0x10000
> +# BAD-LMA-LABEL: Section to Segment mapping:
> +# BAD-LMA:  01     .sec1
> +# BAD-LMA:  02     .sec2 .data .got
> +
> +
> +# Now try a script where the virtual memory addresses overlap but ensure that the
> +# load addresses don't:
> +# RUN: echo "SECTIONS { \
> +# RUN:   .sec1 0x8000 : { sec1_start = .; *(.first_sec) sec1_end = .;} \
> +# RUN:   .sec2 0x8020 : AT(0x8800) { sec2_start = .; *(.second_sec) sec2_end = .;} \
> +# RUN: }" > %t-vaddr.script
> +# RUN: not ld.lld -o %t.so --script %t-vaddr.script %t.o -shared 2>&1 | FileCheck %s -check-prefix VADDR-OVERLAP-ERR
> +# VADDR-OVERLAP-ERR:      error: section .sec1 virtual address range overlaps with .sec2
> +# VADDR-OVERLAP-ERR-NEXT: >>> .sec1 range is [0x8000 -> 0x80FF]
> +# VADDR-OVERLAP-ERR-NEXT: >>> .sec2 range is [0x8020 -> 0x811F]
> +
> +# Check that the expected binary was created with --noinhibit-exec:
> +# RUN: ld.lld -o %t.so --script %t-vaddr.script %t.o -shared --noinhibit-exec
> +# RUN: llvm-readobj -sections -program-headers -elf-output-style=GNU %t.so | FileCheck %s -check-prefix BAD-VADDR
> +# BAD-VADDR-LABEL: Section Headers:
> +# BAD-VADDR: .sec1             PROGBITS        0000000000008000 018000 000100 00  WA  0   0  1
> +# BAD-VADDR: .sec2             PROGBITS        0000000000008020 028020 000100 00  WA  0   0  1
> +# BAD-VADDR-LABEL: Program Headers:
> +# BAD-VADDR-NEXT:  Type           Offset   VirtAddr           PhysAddr           FileSiz  MemSiz   Flg Align
> +# BAD-VADDR-NEXT:  LOAD           0x010000 0x0000000000000000 0x0000000000000000 0x0001e1 0x0001e1 R E 0x10000
> +# BAD-VADDR-NEXT:  LOAD           0x018000 0x0000000000008000 0x0000000000008000 0x000100 0x000100 RW  0x10000
> +# BAD-VADDR-NEXT:  LOAD           0x028020 0x0000000000008020 0x0000000000008800 0x000110 0x000110 RW  0x10000
> +# BAD-VADDR-LABEL: Section to Segment mapping:
> +# BAD-VADDR:  01     .sec1
> +# BAD-VADDR:  02     .sec2 .data .got
> +
> +# Finally check the case where both LMA and vaddr overlap
> +
> +# RUN: echo "SECTIONS { \
> +# RUN:   .sec1 0x8000 : { sec1_start = .; *(.first_sec) sec1_end = .;} \
> +# RUN:   .sec2 0x8040 : { sec2_start = .; *(.second_sec) sec2_end = .;} \
> +# RUN: }" > %t-both-overlap.script
> +
> +# RUN: not ld.lld -o %t.so --script %t-both-overlap.script %t.o -shared 2>&1 | FileCheck %s -check-prefix BOTH-OVERLAP-ERR
> +
> +# FIXME: we should not be creating binaries where file offsets overlap!
> +# ld.bfd seems to map .sec1 to file offset 18000 and .sec2 to 18100 so that
> +# only virtual addr and LMA overlap
> +# BOTH-OVERLAP-ERR:      error: section .sec1 file range overlaps with .sec2
> +# BOTH-OVERLAP-ERR-NEXT: >>> .sec1 range is [0x18000 -> 0x180FF]
> +# BOTH-OVERLAP-ERR-NEXT: >>> .sec2 range is [0x18040 -> 0x1813F]
> +# BOTH-OVERLAP-ERR:      error: section .sec1 virtual address range overlaps with .sec2
> +# BOTH-OVERLAP-ERR-NEXT: >>> .sec1 range is [0x8000 -> 0x80FF]
> +# BOTH-OVERLAP-ERR-NEXT: >>> .sec2 range is [0x8040 -> 0x813F]
> +# BOTH-OVERLAP-ERR:      error: section .sec1 load address range overlaps with .sec2
> +# BOTH-OVERLAP-ERR-NEXT: >>> .sec1 range is [0x8000 -> 0x80FF]
> +# BOTH-OVERLAP-ERR-NEXT: >>> .sec2 range is [0x8040 -> 0x813F]
> +
> +# Check that the expected binary was created with --noinhibit-exec:
> +# RUN: ld.lld -o %t.so --script %t-both-overlap.script %t.o -shared --noinhibit-exec
> +
> +# FIXME: this should not actually happen (but at least it needs --noinhibit-exec now)
> +# RUN: llvm-objdump -s %t.so | FileCheck %s -check-prefix BROKEN-OUTPUT-FILE
> +# BROKEN-OUTPUT-FILE-LABEL: Contents of section .sec1:
> +# BROKEN-OUTPUT-FILE-NEXT: 8000 01010101 01010101 01010101 01010101  ................
> +# BROKEN-OUTPUT-FILE-NEXT: 8010 01010101 01010101 01010101 01010101  ................
> +# BROKEN-OUTPUT-FILE-NEXT: 8020 01010101 01010101 01010101 01010101  ................
> +# BROKEN-OUTPUT-FILE-NEXT: 8030 01010101 01010101 01010101 01010101  ................
> +# Starting here the contents of .sec2 overwrites .sec1:
> +# BROKEN-OUTPUT-FILE-NEXT: 8040 02020202 02020202 02020202 02020202  ................
> +
> +# RUN: llvm-readobj -sections -program-headers -elf-output-style=GNU %t.so | FileCheck %s -check-prefix BAD-BOTH
> +# BAD-BOTH-LABEL: Section Headers:
> +# BAD-BOTH: .sec1             PROGBITS        0000000000008000 018000 000100 00  WA  0   0  1
> +# BAD-BOTH: .sec2             PROGBITS        0000000000008040 018040 000100 00  WA  0   0  1
> +# BAD-BOTH-LABEL: Program Headers:
> +# BAD-BOTH-NEXT:  Type           Offset   VirtAddr           PhysAddr           FileSiz  MemSiz   Flg Align
> +# BAD-BOTH-NEXT:  LOAD           0x010000 0x0000000000000000 0x0000000000000000 0x0001e1 0x0001e1 R E 0x10000
> +# BAD-BOTH-NEXT:  LOAD           0x018000 0x0000000000008000 0x0000000000008000 0x000150 0x000150 RW  0x10000
> +# BAD-BOTH-LABEL: Section to Segment mapping:
> +# BAD-BOTH:   01     .sec1 .sec2 .data .got
> +
> +
> +.section        .first_sec,"aw", at progbits
> +.rept 0x100
> +.byte 1
> +.endr
> +
> +.section        .second_sec,"aw", at progbits
> +.rept 0x100
> +.byte 2
> +.endr
> Index: ELF/Writer.cpp
> ===================================================================
> --- ELF/Writer.cpp
> +++ ELF/Writer.cpp
> @@ -61,6 +61,7 @@
>    void assignFileOffsets();
>    void assignFileOffsetsBinary();
>    void setPhdrs();
> +  void checkNoOverlappingSections();
>    void fixSectionAlignments();
>    void openFile();
>    void writeTrapInstr();
> @@ -459,6 +460,8 @@
>        Sec->Addr = 0;
>    }
>  
> +  checkNoOverlappingSections();
> +
>    // It does not make sense try to open the file if we have error already.
>    if (errorCount())
>      return;
> @@ -1739,6 +1742,112 @@
>    }
>  }
>  
> +// TODO: this is quite similar to clang::tooling::Range, maybe there should be
> +// a class in LLVM that can be used by both
> +struct AddressRange {
> +  AddressRange() : Address(0), Length(0) {}
> +  AddressRange(uint64_t Address, uint64_t Length)
> +      : Address(Address), Length(Length) {}
> +
> +  bool overlapsWith(AddressRange RHS) const {
> +    return Address + Length > RHS.Address && Address < RHS.Address + RHS.Length;
> +  }
> +
> +  std::string toString() {
> +    if (Length == 0)
> +      return "<emtpy range at 0x" + llvm::utohexstr(Address) + ">";
> +    return "[0x" + llvm::utohexstr(Address) + " -> 0x" +
> +           llvm::utohexstr(Address + Length - 1) + "]";
> +  }
> +
> +  uint64_t Address;
> +  uint64_t Length;
> +};
> +
> +template <typename Getter, typename Predicate>
> +static void checkForSectionOverlap(std::vector<OutputSection *> &Sections,
> +                                   StringRef Kind, Getter GetStart,
> +                                   Predicate ShouldSkip) {
> +  // Instead of comparing every OutputSection with every other output section
> +  // we create a copy of the output sections that we sort by the address that
> +  // is currently being checked. This way we only print an error about
> +  // overlapping sections once.
> +  std::sort(Sections.begin(), Sections.end(),
> +            [=](const OutputSection *A, const OutputSection *B) {
> +              return GetStart(A) < GetStart(B);
> +            });
> +  for (size_t i = 0; i < Sections.size(); ++i) {
> +    auto *Sec = Sections[i];
> +    if (ShouldSkip(Sec))
> +      continue;
> +    AddressRange SecRange{GetStart(Sec), Sec->Size};
> +    for (OutputSection *Other :
> +         ArrayRef<OutputSection *>(Sections).slice(i + 1)) {
> +      if (ShouldSkip(Other))
> +        continue;
> +      AddressRange OtherRange{GetStart(Other), Other->Size};
> +      if (SecRange.overlapsWith(OtherRange))
> +        errorOrWarn("section " + Sec->Name + " " + Kind +
> +                    " range overlaps with " + Other->Name + "\n>>> " +
> +                    Sec->Name + " range is " + SecRange.toString() + "\n>>> " +
> +                    Other->Name + " range is " + OtherRange.toString());
> +      // Since Sections is sorted by start address, we can exit this loop as
> +      // soon as the first other output section starts after the end of Sec
> +      else if (OtherRange.Address >= SecRange.Address + SecRange.Length)
> +        break;
> +    }
> +  }
> +}
> +
> +// Check for overlapping sections
> +//
> +// In this function we check that none of the output sections have overlapping
> +// file offsets. For SHF_ALLOC sections we also check that the load address
> +// ranges and the virtual address ranges don't overlap
> +template <class ELFT> void Writer<ELFT>::checkNoOverlappingSections() {
> +  std::vector<OutputSection *> Sections(OutputSections);
> +  // Speed up the check for overlapping sections by removing all sections with
> +  // zero size since they can't ever overlap
> +  erase_if(Sections, [](const OutputSection *Sec) { return Sec->Size == 0; });
> +  // First check for overlapping file offsets. In this case we need to skip
> +  // Any section marked as SHT_NOBITS. These sections don't actually occupy
> +  // space in the file so Sec->Offset + Sec->Size can overlap with others.
> +  // If --oformat binary is specified only add SHF_ALLOC sections are added to
> +  // the output file so we skip any non-allocated sections in that case.
> +  checkForSectionOverlap(
> +      Sections, "file", [](const OutputSection *Sec) { return Sec->Offset; },
> +      [](const OutputSection *Sec) {
> +        return Sec->Type == SHT_NOBITS ||
> +               (Config->OFormatBinary && (Sec->Flags & SHF_ALLOC) == 0);
> +      });
> +
> +  // When linking with -r there is no need to check for overlapping virtual/load
> +  // addresses since those addresses will only be assigned when the final
> +  // executable/shared object is created
> +  if (Config->Relocatable)
> +    return;
> +
> +  // Now compare the virtual memory addresses. We only need to do this
> +  // for SHF_ALLOC sections since others will not be loaded. Furthermore, we
> +  // also need to skip SHF_TLS sections since these will be mapped to other
> +  // addresses at runtime and can therefore have overlapping ranges in the file
> +  checkForSectionOverlap(Sections, "virtual address",
> +                         [](const OutputSection *Sec) { return Sec->Addr; },
> +                         [](const OutputSection *Sec) {
> +                           return (Sec->Flags & SHF_ALLOC) == 0 ||
> +                                  (Sec->Flags & SHF_TLS);
> +                         });
> +  // Finally, check that the load addresses don't overlap. This will usually be
> +  // the same as the virtual addresses but can be different when using a linker
> +  // script with AT()
> +  checkForSectionOverlap(Sections, "load address",
> +                         [](const OutputSection *Sec) { return Sec->getLMA(); },
> +                         [](const OutputSection *Sec) {
> +                           return (Sec->Flags & SHF_ALLOC) == 0 ||
> +                                  (Sec->Flags & SHF_TLS);
> +                         });
> +}
> +
>  // The entry point address is chosen in the following ways.
>  //
>  // 1. the '-e' entry command-line option;
> Index: ELF/Relocations.cpp
> ===================================================================
> --- ELF/Relocations.cpp
> +++ ELF/Relocations.cpp
> @@ -547,13 +547,6 @@
>    In<ELFT>::RelaDyn->addReloc({Target->CopyRel, Sec, 0, false, SS, 0});
>  }
>  
> -static void errorOrWarn(const Twine &Msg) {
> -  if (!Config->NoinhibitExec)
> -    error(Msg);
> -  else
> -    warn(Msg);
> -}
> -
>  template <class ELFT>
>  static RelExpr adjustExpr(Symbol &Sym, RelExpr Expr, RelType Type,
>                            InputSectionBase &S, uint64_t RelOff) {
> Index: ELF/Config.h
> ===================================================================
> --- ELF/Config.h
> +++ ELF/Config.h
> @@ -10,6 +10,7 @@
>  #ifndef LLD_ELF_CONFIG_H
>  #define LLD_ELF_CONFIG_H
>  
> +#include "lld/Common/ErrorHandler.h"
>  #include "llvm/ADT/MapVector.h"
>  #include "llvm/ADT/StringRef.h"
>  #include "llvm/ADT/StringSet.h"
> @@ -236,6 +237,12 @@
>  // The only instance of Configuration struct.
>  extern Configuration *Config;
>  
> +static inline void errorOrWarn(const Twine &Msg) {
> +  if (!Config->NoinhibitExec)
> +    error(Msg);
> +  else
> +    warn(Msg);
> +}
>  } // namespace elf
>  } // namespace lld
>  


More information about the llvm-commits mailing list