[PATCH] D41046: [ELF] Make overlapping output sections an error
Rafael Avila de Espindola via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 14 20:15:06 PST 2017
Alexander Richardson via Phabricator <reviews at reviews.llvm.org> writes:
> arichardson updated this revision to Diff 126272.
> arichardson added a comment.
>
> Speed up lld invocation in test/ELF/many-alloc-sections.s lld invocation from 290s to <1s by not checking empty sections
What happens if the sections are 1 byte long? What is the performance
impact that you get with this patch when linknig the linux kernel?
>
> Repository:
> rLLD LLVM Linker
>
> https://reviews.llvm.org/D41046
>
> Files:
> ELF/Config.h
> ELF/Relocations.cpp
> ELF/Writer.cpp
> test/ELF/linkerscript/overlapping-sections.s
>
> Index: test/ELF/linkerscript/overlapping-sections.s
> ===================================================================
> --- /dev/null
> +++ test/ELF/linkerscript/overlapping-sections.s
> @@ -0,0 +1,174 @@
> +# TODO: maybe this should be converted to an x86 test to get more buildbot coverage
> +# REQUIRES: mips
> +# RUN: llvm-mc -filetype=obj -triple=mips64-unknown-linux %s -o %t.o
> +
> +# RUN: echo "SECTIONS { \
> +# RUN: .sec1 0x8000 : { sec1_start = .; *(.first_sec) sec1_end = .;} \
> +# RUN: .sec2 0x8800 : { sec2_start = .; *(.second_sec) sec2_end = .;} \
> +# RUN: }" > %t.script
> +# RUN: ld.lld -o %t.so --script %t.script %t.o -shared
> +# RUN: llvm-readobj -sections -program-headers %t.so | FileCheck %s -check-prefix GOOD
> +
> +# GOOD: Name: .sec1
> +# GOOD-NEXT: Type: SHT_PROGBITS (0x1)
> +# GOOD-NEXT: Flags [ (0x3)
> +# GOOD-NEXT: SHF_ALLOC (0x2)
> +# GOOD-NEXT: SHF_WRITE (0x1)
> +# GOOD-NEXT: ]
> +# GOOD-NEXT: Address: 0x8000
> +# GOOD-NEXT: Offset: 0x18000
> +# GOOD-NEXT: Size: 256
> +
> +# GOOD: Name: .sec2
> +# GOOD-NEXT: Type: SHT_PROGBITS (0x1)
> +# GOOD-NEXT: Flags [ (0x3)
> +# GOOD-NEXT: SHF_ALLOC (0x2)
> +# GOOD-NEXT: SHF_WRITE (0x1)
> +# GOOD-NEXT: ]
> +# GOOD-NEXT: Address: 0x8800
> +# GOOD-NEXT: Offset: 0x18800
> +# GOOD-NEXT: Size: 256
> +
> +# GOOD: ProgramHeaders [
> +# GOOD-NEXT: ProgramHeader {
> +# GOOD-NEXT: Type: PT_LOAD (0x1)
> +# GOOD-NEXT: Offset: 0x10000
> +# GOOD-NEXT: VirtualAddress: 0x0
> +# GOOD-NEXT: PhysicalAddress: 0x0
> +# GOOD-NEXT: FileSize: 481
> +# GOOD-NEXT: MemSize: 481
> +# GOOD-NEXT: Flags [ (0x5)
> +# GOOD-NEXT: PF_R (0x4)
> +# GOOD-NEXT: PF_X (0x1)
> +# GOOD-NEXT: ]
> +# GOOD-NEXT: Alignment: 65536
> +# GOOD-NEXT: }
> +# GOOD-NEXT: ProgramHeader {
> +# GOOD-NEXT: Type: PT_LOAD (0x1)
> +# GOOD-NEXT: Offset: 0x18000
> +# GOOD-NEXT: VirtualAddress: 0x8000
> +# GOOD-NEXT: PhysicalAddress: 0x8000
> +# GOOD-NEXT: FileSize: 2320
> +# GOOD-NEXT: MemSize: 2320
> +# GOOD-NEXT: Flags [ (0x6)
> +# GOOD-NEXT: PF_R (0x4)
> +# GOOD-NEXT: PF_W (0x2)
> +# GOOD-NEXT: ]
> +# GOOD-NEXT: Alignment: 65536
> +# GOOD-NEXT: }
> +
> +# LLD used to accept overlapping sections and create binaries that potentially
> +# crash at runtime while ld.bfd gives the following error:
> +# ld.bfd: section .sec2 [0000000000008080 -> 000000000000817f] overlaps section .sec1 [0000000000008000 -> 00000000000080ff]
> +
> +# RUN: echo "SECTIONS { \
> +# RUN: .sec1 0x8000 : { sec1_start = .; *(.first_sec) sec1_end = .;} \
> +# RUN: .sec2 0x8800 : AT(0x8080) { sec2_start = .; *(.second_sec) sec2_end = .;} \
> +# RUN: }" > %t-lma.script
> +# RUN: not ld.lld -o %t.so --script %t-lma.script %t.o -shared 2>&1 | FileCheck %s -check-prefix LMA-OVERLAP-ERR
> +# LMA-OVERLAP-ERR: error: section .sec1 load address range overlaps with .sec2
> +# LMA-OVERLAP-ERR-NEXT: >>> .sec1 range is [0x8000 -> 0x80FF]
> +# LMA-OVERLAP-ERR-NEXT: >>> .sec2 range is [0x8080 -> 0x817F]
> +
> +# check that we create the expected binary with --noinhibit-exec:
> +# RUN: ld.lld -o %t.so --script %t-lma.script %t.o -shared --noinhibit-exec
> +# It seems like the only way to get the section -> segment mapping is using
> +# llvm-readobj with GNU output style instead of LLVM style
> +# We need this in order to verify that the .sec2 was indeed placed in a
> +# PT_LOAD where the PhysAddr overlaps with where .sec1 is loaded
> +# RUN: llvm-readobj -sections -program-headers -elf-output-style=GNU %t.so | FileCheck %s -check-prefix BAD-LMA
> +# BAD-LMA-LABEL: Section Headers:
> +# BAD-LMA: .sec1 PROGBITS 0000000000008000 018000 000100 00 WA 0 0 1
> +# BAD-LMA: .sec2 PROGBITS 0000000000008800 018800 000100 00 WA 0 0 1
> +# BAD-LMA-LABEL: Program Headers:
> +# BAD-LMA-NEXT: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align
> +# BAD-LMA-NEXT: LOAD 0x010000 0x0000000000000000 0x0000000000000000 0x0001e1 0x0001e1 R E 0x10000
> +# BAD-LMA-NEXT: LOAD 0x018000 0x0000000000008000 0x0000000000008000 0x000100 0x000100 RW 0x10000
> +# BAD-LMA-NEXT: LOAD 0x018800 0x0000000000008800 0x0000000000008080 0x000110 0x000110 RW 0x10000
> +# BAD-LMA-LABEL: Section to Segment mapping:
> +# BAD-LMA: 01 .sec1
> +# BAD-LMA: 02 .sec2 .data .got
> +
> +
> +# Now try a script where the virtual memory addresses overlap but ensure that the
> +# load addresses don't:
> +# RUN: echo "SECTIONS { \
> +# RUN: .sec1 0x8000 : { sec1_start = .; *(.first_sec) sec1_end = .;} \
> +# RUN: .sec2 0x8020 : AT(0x8800) { sec2_start = .; *(.second_sec) sec2_end = .;} \
> +# RUN: }" > %t-vaddr.script
> +# RUN: not ld.lld -o %t.so --script %t-vaddr.script %t.o -shared 2>&1 | FileCheck %s -check-prefix VADDR-OVERLAP-ERR
> +# VADDR-OVERLAP-ERR: error: section .sec1 virtual address range overlaps with .sec2
> +# VADDR-OVERLAP-ERR-NEXT: >>> .sec1 range is [0x8000 -> 0x80FF]
> +# VADDR-OVERLAP-ERR-NEXT: >>> .sec2 range is [0x8020 -> 0x811F]
> +
> +# Check that the expected binary was created with --noinhibit-exec:
> +# RUN: ld.lld -o %t.so --script %t-vaddr.script %t.o -shared --noinhibit-exec
> +# RUN: llvm-readobj -sections -program-headers -elf-output-style=GNU %t.so | FileCheck %s -check-prefix BAD-VADDR
> +# BAD-VADDR-LABEL: Section Headers:
> +# BAD-VADDR: .sec1 PROGBITS 0000000000008000 018000 000100 00 WA 0 0 1
> +# BAD-VADDR: .sec2 PROGBITS 0000000000008020 028020 000100 00 WA 0 0 1
> +# BAD-VADDR-LABEL: Program Headers:
> +# BAD-VADDR-NEXT: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align
> +# BAD-VADDR-NEXT: LOAD 0x010000 0x0000000000000000 0x0000000000000000 0x0001e1 0x0001e1 R E 0x10000
> +# BAD-VADDR-NEXT: LOAD 0x018000 0x0000000000008000 0x0000000000008000 0x000100 0x000100 RW 0x10000
> +# BAD-VADDR-NEXT: LOAD 0x028020 0x0000000000008020 0x0000000000008800 0x000110 0x000110 RW 0x10000
> +# BAD-VADDR-LABEL: Section to Segment mapping:
> +# BAD-VADDR: 01 .sec1
> +# BAD-VADDR: 02 .sec2 .data .got
> +
> +# Finally check the case where both LMA and vaddr overlap
> +
> +# RUN: echo "SECTIONS { \
> +# RUN: .sec1 0x8000 : { sec1_start = .; *(.first_sec) sec1_end = .;} \
> +# RUN: .sec2 0x8040 : { sec2_start = .; *(.second_sec) sec2_end = .;} \
> +# RUN: }" > %t-both-overlap.script
> +
> +# RUN: not ld.lld -o %t.so --script %t-both-overlap.script %t.o -shared 2>&1 | FileCheck %s -check-prefix BOTH-OVERLAP-ERR
> +
> +# FIXME: we should not be creating binaries where file offsets overlap!
> +# ld.bfd seems to map .sec1 to file offset 18000 and .sec2 to 18100 so that
> +# only virtual addr and LMA overlap
> +# BOTH-OVERLAP-ERR: error: section .sec1 file range overlaps with .sec2
> +# BOTH-OVERLAP-ERR-NEXT: >>> .sec1 range is [0x18000 -> 0x180FF]
> +# BOTH-OVERLAP-ERR-NEXT: >>> .sec2 range is [0x18040 -> 0x1813F]
> +# BOTH-OVERLAP-ERR: error: section .sec1 virtual address range overlaps with .sec2
> +# BOTH-OVERLAP-ERR-NEXT: >>> .sec1 range is [0x8000 -> 0x80FF]
> +# BOTH-OVERLAP-ERR-NEXT: >>> .sec2 range is [0x8040 -> 0x813F]
> +# BOTH-OVERLAP-ERR: error: section .sec1 load address range overlaps with .sec2
> +# BOTH-OVERLAP-ERR-NEXT: >>> .sec1 range is [0x8000 -> 0x80FF]
> +# BOTH-OVERLAP-ERR-NEXT: >>> .sec2 range is [0x8040 -> 0x813F]
> +
> +# Check that the expected binary was created with --noinhibit-exec:
> +# RUN: ld.lld -o %t.so --script %t-both-overlap.script %t.o -shared --noinhibit-exec
> +
> +# FIXME: this should not actually happen (but at least it needs --noinhibit-exec now)
> +# RUN: llvm-objdump -s %t.so | FileCheck %s -check-prefix BROKEN-OUTPUT-FILE
> +# BROKEN-OUTPUT-FILE-LABEL: Contents of section .sec1:
> +# BROKEN-OUTPUT-FILE-NEXT: 8000 01010101 01010101 01010101 01010101 ................
> +# BROKEN-OUTPUT-FILE-NEXT: 8010 01010101 01010101 01010101 01010101 ................
> +# BROKEN-OUTPUT-FILE-NEXT: 8020 01010101 01010101 01010101 01010101 ................
> +# BROKEN-OUTPUT-FILE-NEXT: 8030 01010101 01010101 01010101 01010101 ................
> +# Starting here the contents of .sec2 overwrites .sec1:
> +# BROKEN-OUTPUT-FILE-NEXT: 8040 02020202 02020202 02020202 02020202 ................
> +
> +# RUN: llvm-readobj -sections -program-headers -elf-output-style=GNU %t.so | FileCheck %s -check-prefix BAD-BOTH
> +# BAD-BOTH-LABEL: Section Headers:
> +# BAD-BOTH: .sec1 PROGBITS 0000000000008000 018000 000100 00 WA 0 0 1
> +# BAD-BOTH: .sec2 PROGBITS 0000000000008040 018040 000100 00 WA 0 0 1
> +# BAD-BOTH-LABEL: Program Headers:
> +# BAD-BOTH-NEXT: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align
> +# BAD-BOTH-NEXT: LOAD 0x010000 0x0000000000000000 0x0000000000000000 0x0001e1 0x0001e1 R E 0x10000
> +# BAD-BOTH-NEXT: LOAD 0x018000 0x0000000000008000 0x0000000000008000 0x000150 0x000150 RW 0x10000
> +# BAD-BOTH-LABEL: Section to Segment mapping:
> +# BAD-BOTH: 01 .sec1 .sec2 .data .got
> +
> +
> +.section .first_sec,"aw", at progbits
> +.rept 0x100
> +.byte 1
> +.endr
> +
> +.section .second_sec,"aw", at progbits
> +.rept 0x100
> +.byte 2
> +.endr
> Index: ELF/Writer.cpp
> ===================================================================
> --- ELF/Writer.cpp
> +++ ELF/Writer.cpp
> @@ -61,6 +61,7 @@
> void assignFileOffsets();
> void assignFileOffsetsBinary();
> void setPhdrs();
> + void checkNoOverlappingSections();
> void fixSectionAlignments();
> void openFile();
> void writeTrapInstr();
> @@ -459,6 +460,8 @@
> Sec->Addr = 0;
> }
>
> + checkNoOverlappingSections();
> +
> // It does not make sense try to open the file if we have error already.
> if (errorCount())
> return;
> @@ -1739,6 +1742,112 @@
> }
> }
>
> +// TODO: this is quite similar to clang::tooling::Range, maybe there should be
> +// a class in LLVM that can be used by both
> +struct AddressRange {
> + AddressRange() : Address(0), Length(0) {}
> + AddressRange(uint64_t Address, uint64_t Length)
> + : Address(Address), Length(Length) {}
> +
> + bool overlapsWith(AddressRange RHS) const {
> + return Address + Length > RHS.Address && Address < RHS.Address + RHS.Length;
> + }
> +
> + std::string toString() {
> + if (Length == 0)
> + return "<emtpy range at 0x" + llvm::utohexstr(Address) + ">";
> + return "[0x" + llvm::utohexstr(Address) + " -> 0x" +
> + llvm::utohexstr(Address + Length - 1) + "]";
> + }
> +
> + uint64_t Address;
> + uint64_t Length;
> +};
> +
> +template <typename Getter, typename Predicate>
> +static void checkForSectionOverlap(std::vector<OutputSection *> &Sections,
> + StringRef Kind, Getter GetStart,
> + Predicate ShouldSkip) {
> + // Instead of comparing every OutputSection with every other output section
> + // we create a copy of the output sections that we sort by the address that
> + // is currently being checked. This way we only print an error about
> + // overlapping sections once.
> + std::sort(Sections.begin(), Sections.end(),
> + [=](const OutputSection *A, const OutputSection *B) {
> + return GetStart(A) < GetStart(B);
> + });
> + for (size_t i = 0; i < Sections.size(); ++i) {
> + auto *Sec = Sections[i];
> + if (ShouldSkip(Sec))
> + continue;
> + AddressRange SecRange{GetStart(Sec), Sec->Size};
> + for (OutputSection *Other :
> + ArrayRef<OutputSection *>(Sections).slice(i + 1)) {
> + if (ShouldSkip(Other))
> + continue;
> + AddressRange OtherRange{GetStart(Other), Other->Size};
> + if (SecRange.overlapsWith(OtherRange))
> + errorOrWarn("section " + Sec->Name + " " + Kind +
> + " range overlaps with " + Other->Name + "\n>>> " +
> + Sec->Name + " range is " + SecRange.toString() + "\n>>> " +
> + Other->Name + " range is " + OtherRange.toString());
> + // Since Sections is sorted by start address, we can exit this loop as
> + // soon as the first other output section starts after the end of Sec
> + else if (OtherRange.Address >= SecRange.Address + SecRange.Length)
> + break;
> + }
> + }
> +}
> +
> +// Check for overlapping sections
> +//
> +// In this function we check that none of the output sections have overlapping
> +// file offsets. For SHF_ALLOC sections we also check that the load address
> +// ranges and the virtual address ranges don't overlap
> +template <class ELFT> void Writer<ELFT>::checkNoOverlappingSections() {
> + std::vector<OutputSection *> Sections(OutputSections);
> + // Speed up the check for overlapping sections by removing all sections with
> + // zero size since they can't ever overlap
> + erase_if(Sections, [](const OutputSection *Sec) { return Sec->Size == 0; });
> + // First check for overlapping file offsets. In this case we need to skip
> + // Any section marked as SHT_NOBITS. These sections don't actually occupy
> + // space in the file so Sec->Offset + Sec->Size can overlap with others.
> + // If --oformat binary is specified only add SHF_ALLOC sections are added to
> + // the output file so we skip any non-allocated sections in that case.
> + checkForSectionOverlap(
> + Sections, "file", [](const OutputSection *Sec) { return Sec->Offset; },
> + [](const OutputSection *Sec) {
> + return Sec->Type == SHT_NOBITS ||
> + (Config->OFormatBinary && (Sec->Flags & SHF_ALLOC) == 0);
> + });
> +
> + // When linking with -r there is no need to check for overlapping virtual/load
> + // addresses since those addresses will only be assigned when the final
> + // executable/shared object is created
> + if (Config->Relocatable)
> + return;
> +
> + // Now compare the virtual memory addresses. We only need to do this
> + // for SHF_ALLOC sections since others will not be loaded. Furthermore, we
> + // also need to skip SHF_TLS sections since these will be mapped to other
> + // addresses at runtime and can therefore have overlapping ranges in the file
> + checkForSectionOverlap(Sections, "virtual address",
> + [](const OutputSection *Sec) { return Sec->Addr; },
> + [](const OutputSection *Sec) {
> + return (Sec->Flags & SHF_ALLOC) == 0 ||
> + (Sec->Flags & SHF_TLS);
> + });
> + // Finally, check that the load addresses don't overlap. This will usually be
> + // the same as the virtual addresses but can be different when using a linker
> + // script with AT()
> + checkForSectionOverlap(Sections, "load address",
> + [](const OutputSection *Sec) { return Sec->getLMA(); },
> + [](const OutputSection *Sec) {
> + return (Sec->Flags & SHF_ALLOC) == 0 ||
> + (Sec->Flags & SHF_TLS);
> + });
> +}
> +
> // The entry point address is chosen in the following ways.
> //
> // 1. the '-e' entry command-line option;
> Index: ELF/Relocations.cpp
> ===================================================================
> --- ELF/Relocations.cpp
> +++ ELF/Relocations.cpp
> @@ -547,13 +547,6 @@
> In<ELFT>::RelaDyn->addReloc({Target->CopyRel, Sec, 0, false, SS, 0});
> }
>
> -static void errorOrWarn(const Twine &Msg) {
> - if (!Config->NoinhibitExec)
> - error(Msg);
> - else
> - warn(Msg);
> -}
> -
> template <class ELFT>
> static RelExpr adjustExpr(Symbol &Sym, RelExpr Expr, RelType Type,
> InputSectionBase &S, uint64_t RelOff) {
> Index: ELF/Config.h
> ===================================================================
> --- ELF/Config.h
> +++ ELF/Config.h
> @@ -10,6 +10,7 @@
> #ifndef LLD_ELF_CONFIG_H
> #define LLD_ELF_CONFIG_H
>
> +#include "lld/Common/ErrorHandler.h"
> #include "llvm/ADT/MapVector.h"
> #include "llvm/ADT/StringRef.h"
> #include "llvm/ADT/StringSet.h"
> @@ -236,6 +237,12 @@
> // The only instance of Configuration struct.
> extern Configuration *Config;
>
> +static inline void errorOrWarn(const Twine &Msg) {
> + if (!Config->NoinhibitExec)
> + error(Msg);
> + else
> + warn(Msg);
> +}
> } // namespace elf
> } // namespace lld
>
More information about the llvm-commits
mailing list