[lld] r302903 - Optimize orphan placement in a general way.

Rafael Espindola via llvm-commits llvm-commits at lists.llvm.org
Fri May 12 07:52:23 PDT 2017


Author: rafael
Date: Fri May 12 09:52:22 2017
New Revision: 302903

URL: http://llvm.org/viewvc/llvm-project?rev=302903&view=rev
Log:
Optimize orphan placement in a general way.

We used to place orphans by just using compareSectionsNonScript.

Then we noticed that since linker scripts can use another order, we
should first try match the section to a given PT_LOAD. But there is
nothing special about PT_LOAD. The same issue can show up for
PT_GNU_RELRO for example.

In general, we have to search for the most similar section and put the
orphan next to it. Most similar being defined as how long they follow
the same code path in compareSecitonsNonScript.

That is what this patch does. We now compute a rank for each output
section, with a bit for each branch in what was
compareSectionsNonScript.

With this findOrphanPos is now fully general and orphan placement can
be optimized by placing every section with the same rank at once.

The included testcase is a variation of many-sections.s that uses
allocatable sections to avoid the fast path in the existing
code. Without threads it goes form 46 seconds to 0.9 seconds.

Added:
    lld/trunk/test/ELF/many-alloc-sections.s
Modified:
    lld/trunk/ELF/OutputSections.h
    lld/trunk/ELF/Writer.cpp
    lld/trunk/test/ELF/linkerscript/sections-constraint.s
    lld/trunk/test/ELF/linkerscript/sections.s

Modified: lld/trunk/ELF/OutputSections.h
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/OutputSections.h?rev=302903&r1=302902&r2=302903&view=diff
==============================================================================
--- lld/trunk/ELF/OutputSections.h (original)
+++ lld/trunk/ELF/OutputSections.h Fri May 12 09:52:22 2017
@@ -50,6 +50,7 @@ public:
   template <typename ELFT> void writeHeaderTo(typename ELFT::Shdr *SHdr);
 
   unsigned SectionIndex;
+  unsigned SortRank;
 
   uint32_t getPhdrFlags() const;
 

Modified: lld/trunk/ELF/Writer.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/Writer.cpp?rev=302903&r1=302902&r2=302903&view=diff
==============================================================================
--- lld/trunk/ELF/Writer.cpp (original)
+++ lld/trunk/ELF/Writer.cpp Fri May 12 09:52:22 2017
@@ -546,32 +546,6 @@ template <class ELFT> void Writer<ELFT>:
   }
 }
 
-// PPC64 has a number of special SHT_PROGBITS+SHF_ALLOC+SHF_WRITE sections that
-// we would like to make sure appear is a specific order to maximize their
-// coverage by a single signed 16-bit offset from the TOC base pointer.
-// Conversely, the special .tocbss section should be first among all SHT_NOBITS
-// sections. This will put it next to the loaded special PPC64 sections (and,
-// thus, within reach of the TOC base pointer).
-static int getPPC64SectionRank(StringRef SectionName) {
-  return StringSwitch<int>(SectionName)
-      .Case(".tocbss", 0)
-      .Case(".branch_lt", 2)
-      .Case(".toc", 3)
-      .Case(".toc1", 4)
-      .Case(".opd", 5)
-      .Default(1);
-}
-
-// All sections with SHF_MIPS_GPREL flag should be grouped together
-// because data in these sections is addressable with a gp relative address.
-static int getMipsSectionRank(const OutputSection *S) {
-  if ((S->Flags & SHF_MIPS_GPREL) == 0)
-    return 0;
-  if (S->Name == ".got")
-    return 1;
-  return 2;
-}
-
 // Today's loaders have a feature to make segments read-only after
 // processing dynamic relocations to enhance security. PT_GNU_RELRO
 // is defined for that.
@@ -645,99 +619,145 @@ bool elf::isRelroSection(const OutputSec
          S == ".eh_frame" || S == ".openbsd.randomdata";
 }
 
-static bool compareSectionsNonScript(const OutputSection *A,
-                                     const OutputSection *B) {
+// We compute a rank for each section. The rank indicates where the
+// section should be placed in the file.  Instead of using simple
+// numbers (0,1,2...), we use a series of flags. One for each decision
+// point when placing the section.
+// Using flags has two key properties:
+// * It is easy to check if a give branch was taken.
+// * It is easy two see how similar two ranks are (see getRankProximity).
+enum RankFlags {
+  RF_NOT_ADDR_SET = 1 << 16,
+  RF_NOT_INTERP = 1 << 15,
+  RF_NOT_ALLOC = 1 << 14,
+  RF_WRITE = 1 << 13,
+  RF_EXEC = 1 << 12,
+  RF_NON_TLS_BSS = 1 << 11,
+  RF_NON_TLS_BSS_RO = 1 << 10,
+  RF_NOT_TLS = 1 << 9,
+  RF_BSS = 1 << 8,
+  RF_PPC_NOT_TOCBSS = 1 << 7,
+  RF_PPC_OPD = 1 << 6,
+  RF_PPC_TOCL = 1 << 5,
+  RF_PPC_TOC = 1 << 4,
+  RF_PPC_BRANCH_LT = 1 << 3,
+  RF_MIPS_GPREL = 1 << 2,
+  RF_MIPS_NOT_GOT = 1 << 1
+};
+
+static unsigned getSectionRank(const OutputSection *Sec) {
+  unsigned Rank = 0;
+
+  // We want to put section specified by -T option first, so we
+  // can start assigning VA starting from them later.
+  if (Config->SectionStartMap.count(Sec->Name))
+    return Rank;
+  Rank |= RF_NOT_ADDR_SET;
+
   // Put .interp first because some loaders want to see that section
   // on the first page of the executable file when loaded into memory.
-  bool AIsInterp = A->Name == ".interp";
-  bool BIsInterp = B->Name == ".interp";
-  if (AIsInterp != BIsInterp)
-    return AIsInterp;
+  if (Sec->Name == ".interp")
+    return Rank;
+  Rank |= RF_NOT_INTERP;
 
   // Allocatable sections go first to reduce the total PT_LOAD size and
   // so debug info doesn't change addresses in actual code.
-  bool AIsAlloc = A->Flags & SHF_ALLOC;
-  bool BIsAlloc = B->Flags & SHF_ALLOC;
-  if (AIsAlloc != BIsAlloc)
-    return AIsAlloc;
-
-  // We don't have any special requirements for the relative order of two non
-  // allocatable sections.
-  if (!AIsAlloc)
-    return false;
-
-  // We want to put section specified by -T option first, so we
-  // can start assigning VA starting from them later.
-  auto AAddrSetI = Config->SectionStartMap.find(A->Name);
-  auto BAddrSetI = Config->SectionStartMap.find(B->Name);
-  bool AHasAddrSet = AAddrSetI != Config->SectionStartMap.end();
-  bool BHasAddrSet = BAddrSetI != Config->SectionStartMap.end();
-  if (AHasAddrSet != BHasAddrSet)
-    return AHasAddrSet;
-  if (AHasAddrSet)
-    return AAddrSetI->second < BAddrSetI->second;
+  if (!(Sec->Flags & SHF_ALLOC))
+    return Rank | RF_NOT_ALLOC;
 
   // We want the read only sections first so that they go in the PT_LOAD
   // covering the program headers at the start of the file.
-  bool AIsWritable = A->Flags & SHF_WRITE;
-  bool BIsWritable = B->Flags & SHF_WRITE;
-  if (AIsWritable != BIsWritable)
-    return BIsWritable;
+  if (Sec->Flags & SHF_WRITE)
+    Rank |= RF_WRITE;
 
-  if (!Config->SingleRoRx) {
+  if (Sec->Flags & SHF_EXECINSTR) {
     // For a corresponding reason, put non exec sections first (the program
     // header PT_LOAD is not executable).
     // We only do that if we are not using linker scripts, since with linker
     // scripts ro and rx sections are in the same PT_LOAD, so their relative
     // order is not important. The same applies for -no-rosegment.
-    bool AIsExec = A->Flags & SHF_EXECINSTR;
-    bool BIsExec = B->Flags & SHF_EXECINSTR;
-    if (AIsExec != BIsExec)
-      return BIsExec;
+    if ((Rank & RF_WRITE) || !Config->SingleRoRx)
+      Rank |= RF_EXEC;
   }
 
   // If we got here we know that both A and B are in the same PT_LOAD.
 
-  bool AIsTls = A->Flags & SHF_TLS;
-  bool BIsTls = B->Flags & SHF_TLS;
-  bool AIsNoBits = A->Type == SHT_NOBITS;
-  bool BIsNoBits = B->Type == SHT_NOBITS;
+  bool IsTls = Sec->Flags & SHF_TLS;
+  bool IsNoBits = Sec->Type == SHT_NOBITS;
 
   // The first requirement we have is to put (non-TLS) nobits sections last. The
   // reason is that the only thing the dynamic linker will see about them is a
   // p_memsz that is larger than p_filesz. Seeing that it zeros the end of the
   // PT_LOAD, so that has to correspond to the nobits sections.
-  bool AIsNonTlsNoBits = AIsNoBits && !AIsTls;
-  bool BIsNonTlsNoBits = BIsNoBits && !BIsTls;
-  if (AIsNonTlsNoBits != BIsNonTlsNoBits)
-    return BIsNonTlsNoBits;
+  bool IsNonTlsNoBits = IsNoBits && !IsTls;
+  if (IsNonTlsNoBits)
+    Rank |= RF_NON_TLS_BSS;
 
   // We place nobits RelRo sections before plain r/w ones, and non-nobits RelRo
   // sections after r/w ones, so that the RelRo sections are contiguous.
-  bool AIsRelRo = isRelroSection(A);
-  bool BIsRelRo = isRelroSection(B);
-  if (AIsRelRo != BIsRelRo)
-    return AIsNonTlsNoBits ? AIsRelRo : BIsRelRo;
+  bool IsRelRo = isRelroSection(Sec);
+  if (IsNonTlsNoBits && !IsRelRo)
+    Rank |= RF_NON_TLS_BSS_RO;
+  if (!IsNonTlsNoBits && IsRelRo)
+    Rank |= RF_NON_TLS_BSS_RO;
 
   // The TLS initialization block needs to be a single contiguous block in a R/W
   // PT_LOAD, so stick TLS sections directly before the other RelRo R/W
   // sections. The TLS NOBITS sections are placed here as they don't take up
   // virtual address space in the PT_LOAD.
-  if (AIsTls != BIsTls)
-    return AIsTls;
+  if (!IsTls)
+    Rank |= RF_NOT_TLS;
 
   // Within the TLS initialization block, the non-nobits sections need to appear
   // first.
-  if (AIsNoBits != BIsNoBits)
-    return BIsNoBits;
+  if (IsNoBits)
+    Rank |= RF_BSS;
+
+  // // Some architectures have additional ordering restrictions for sections
+  // // within the same PT_LOAD.
+  if (Config->EMachine == EM_PPC64) {
+    // PPC64 has a number of special SHT_PROGBITS+SHF_ALLOC+SHF_WRITE sections
+    // that we would like to make sure appear is a specific order to maximize
+    // their coverage by a single signed 16-bit offset from the TOC base
+    // pointer. Conversely, the special .tocbss section should be first among
+    // all SHT_NOBITS sections. This will put it next to the loaded special
+    // PPC64 sections (and, thus, within reach of the TOC base pointer).
+    StringRef Name = Sec->Name;
+    if (Name != ".tocbss")
+      Rank |= RF_PPC_NOT_TOCBSS;
+
+    if (Name == ".opd")
+      Rank |= RF_PPC_OPD;
+
+    if (Name == ".toc1")
+      Rank |= RF_PPC_TOCL;
+
+    if (Name == ".toc")
+      Rank |= RF_PPC_TOC;
+
+    if (Name == ".branch_lt")
+      Rank |= RF_PPC_BRANCH_LT;
+  }
+  if (Config->EMachine == EM_MIPS) {
+    // All sections with SHF_MIPS_GPREL flag should be grouped together
+    // because data in these sections is addressable with a gp relative address.
+    if (Sec->Flags & SHF_MIPS_GPREL)
+      Rank |= RF_MIPS_GPREL;
 
-  // Some architectures have additional ordering restrictions for sections
-  // within the same PT_LOAD.
-  if (Config->EMachine == EM_PPC64)
-    return getPPC64SectionRank(A->Name) < getPPC64SectionRank(B->Name);
-  if (Config->EMachine == EM_MIPS)
-    return getMipsSectionRank(A) < getMipsSectionRank(B);
+    if (Sec->Name != ".got")
+      Rank |= RF_MIPS_NOT_GOT;
+  }
+
+  return Rank;
+}
 
+static bool compareSectionsNonScript(const OutputSection *A,
+                                     const OutputSection *B) {
+  if (A->SortRank != B->SortRank)
+    return A->SortRank < B->SortRank;
+  if (!(A->SortRank & RF_NOT_ADDR_SET))
+    return Config->SectionStartMap.lookup(A->Name) <
+           Config->SectionStartMap.lookup(B->Name);
   return false;
 }
 
@@ -751,8 +771,7 @@ static bool compareSections(const Output
   if (AIndex != BIndex)
     return AIndex < BIndex;
 
-  // The sections are not in the linker script, so don't sort for now.
-  return false;
+  return compareSectionsNonScript(A, B);
 }
 
 // Program header entry
@@ -962,45 +981,36 @@ template <class ELFT> void Writer<ELFT>:
     Sec->assignOffsets();
 }
 
-static bool canSharePtLoad(const OutputSection &S1, const OutputSection &S2) {
-  if (!(S1.Flags & SHF_ALLOC) || !(S2.Flags & SHF_ALLOC))
-    return false;
-
-  bool S1IsWrite = S1.Flags & SHF_WRITE;
-  bool S2IsWrite = S2.Flags & SHF_WRITE;
-  if (S1IsWrite != S2IsWrite)
-    return false;
-
-  if (!S1IsWrite)
-    return true; // RO and RX share a PT_LOAD with linker scripts.
-  return (S1.Flags & SHF_EXECINSTR) == (S2.Flags & SHF_EXECINSTR);
+// We want to find how similar two ranks are.
+// The more branches in getSectionRank that match, the more similar they are.
+// Since each branch corresponds to a bit flag, we can just use
+// countLeadingZeros.
+static unsigned getRankProximity(OutputSection *A, OutputSection *B) {
+  return countLeadingZeros(A->SortRank ^ B->SortRank);
 }
 
-// We assume, like createPhdrs that all allocs are at the start.
+// We want to place orphan sections so that they share as much
+// characteristics with their neighbors as possible. For example, if
+// both are rw, or both are tls.
 template <typename ELFT>
 static std::vector<OutputSection *>::iterator
 findOrphanPos(std::vector<OutputSection *>::iterator B,
               std::vector<OutputSection *>::iterator E) {
   OutputSection *Sec = *E;
 
-  // If it is not allocatable, just leave it at the end.
-  if (!(Sec->Flags & SHF_ALLOC))
+  // Find the first element that has as close a rank as possible.
+  auto I = std::max_element(B, E, [=](OutputSection *A, OutputSection *B) {
+    return getRankProximity(Sec, A) < getRankProximity(Sec, B);
+  });
+  if (I == E)
     return E;
 
-  // Find the first sharable.
-  auto Pos = std::find_if(
-      B, E, [=](OutputSection *S) { return canSharePtLoad(*S, *Sec); });
-  if (Pos != E) {
-    // Ony consider the sharable range.
-    B = Pos;
-    E = std::find_if(
-        B, E, [=](OutputSection *S) { return !canSharePtLoad(*S, *Sec); });
-    assert(B != E);
-  }
-
-  // Find the fist position that Sec compares less to.
-  return std::find_if(
-      B, E, [=](OutputSection *S) { return compareSectionsNonScript(Sec, S); });
+  // Consider all existing sections with the same proximity.
+  unsigned Proximity = getRankProximity(Sec, *I);
+  while (I != E && getRankProximity(Sec, *I) == Proximity &&
+         Sec->SortRank >= (*I)->SortRank)
+    ++I;
+  return I;
 }
 
 template <class ELFT> void Writer<ELFT>::sortSections() {
@@ -1008,12 +1018,18 @@ template <class ELFT> void Writer<ELFT>:
   // relative order for SHF_LINK_ORDER sections.
   if (Config->Relocatable)
     return;
+
+  if (Script->Opt.HasSections)
+    Script->adjustSectionsBeforeSorting();
+
+  for (OutputSection *Sec : OutputSections)
+    Sec->SortRank = getSectionRank(Sec);
+
   if (!Script->Opt.HasSections) {
     std::stable_sort(OutputSections.begin(), OutputSections.end(),
                      compareSectionsNonScript);
     return;
   }
-  Script->adjustSectionsBeforeSorting();
 
   // The order of the sections in the script is arbitrary and may not agree with
   // compareSectionsNonScript. This means that we cannot easily define a
@@ -1046,8 +1062,18 @@ template <class ELFT> void Writer<ELFT>:
   auto NonScriptI =
       std::find_if(OutputSections.begin(), E,
                    [](OutputSection *S) { return S->SectionIndex == INT_MAX; });
-  for (; NonScriptI != E; ++NonScriptI)
-    std::rotate(findOrphanPos<ELFT>(I, NonScriptI), NonScriptI, NonScriptI + 1);
+  while (NonScriptI != E) {
+    auto Pos = findOrphanPos<ELFT>(I, NonScriptI);
+
+    // As an optimization, find all sections with the same sort rank
+    // and insert them with one rotate.
+    unsigned Rank = (*NonScriptI)->SortRank;
+    auto End = std::find_if(NonScriptI + 1, E, [=](OutputSection *Sec) {
+      return Sec->SortRank != Rank;
+    });
+    std::rotate(Pos, NonScriptI, End);
+    NonScriptI = End;
+  }
 
   Script->adjustSectionsAfterSorting();
 }

Modified: lld/trunk/test/ELF/linkerscript/sections-constraint.s
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/ELF/linkerscript/sections-constraint.s?rev=302903&r1=302902&r2=302903&view=diff
==============================================================================
--- lld/trunk/test/ELF/linkerscript/sections-constraint.s (original)
+++ lld/trunk/test/ELF/linkerscript/sections-constraint.s Fri May 12 09:52:22 2017
@@ -24,8 +24,8 @@
 # NO1-NEXT: 0               00000000
 # NO1:  .writable     00000004
 # NO1:  .foo.2        00000004
-# NO1:  .readable     00000004
 # NO1:  .foo.1        00000004
+# NO1:  .readable     00000004
 
 .global _start
 _start:

Modified: lld/trunk/test/ELF/linkerscript/sections.s
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/ELF/linkerscript/sections.s?rev=302903&r1=302902&r2=302903&view=diff
==============================================================================
--- lld/trunk/test/ELF/linkerscript/sections.s (original)
+++ lld/trunk/test/ELF/linkerscript/sections.s Fri May 12 09:52:22 2017
@@ -45,8 +45,9 @@
 # SEC-ORDER: 3 .shstrtab     0000003b {{[0-9a-f]*}}
 # SEC-ORDER: 4 .symtab       00000030 {{[0-9a-f]*}}
 # SEC-ORDER: 5 .strtab       00000008 {{[0-9a-f]*}}
-# SEC-ORDER: 6 .data         00000020 {{[0-9a-f]*}} DATA
-# SEC-ORDER: 7 .text         0000000e {{[0-9a-f]*}} TEXT DATA
+# SEC-ORDER: 6 .comment      00000008 {{[0-9a-f]*}}
+# SEC-ORDER: 7 .data         00000020 {{[0-9a-f]*}} DATA
+# SEC-ORDER: 8 .text         0000000e {{[0-9a-f]*}} TEXT DATA
 
 # .text and .data have swapped names but proper sizes and types.
 # RUN: echo "SECTIONS { \

Added: lld/trunk/test/ELF/many-alloc-sections.s
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/ELF/many-alloc-sections.s?rev=302903&view=auto
==============================================================================
--- lld/trunk/test/ELF/many-alloc-sections.s (added)
+++ lld/trunk/test/ELF/many-alloc-sections.s Fri May 12 09:52:22 2017
@@ -0,0 +1,105 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o %t.o
+// RUN: echo "SECTIONS { . = SIZEOF_HEADERS; .text : { *(.text) } }" > %t.script
+// RUN: ld.lld -T %t.script %t.o -o %t
+// RUN: llvm-readobj -t %t | FileCheck %s
+
+// Test that _start is in the correct section.
+// CHECK:      Name: _start
+// CHECK-NEXT: Value: 0x120
+// CHECK-NEXT: Size: 0
+// CHECK-NEXT: Binding: Global
+// CHECK-NEXT: Type: None
+// CHECK-NEXT: Other: 0
+// CHECK-NEXT: Section: dm
+
+.macro gen_sections4 x
+        .section a\x,"a"
+        .section b\x,"a"
+        .section c\x,"a"
+        .section d\x,"a"
+.endm
+
+.macro gen_sections8 x
+        gen_sections4 a\x
+        gen_sections4 b\x
+.endm
+
+.macro gen_sections16 x
+        gen_sections8 a\x
+        gen_sections8 b\x
+.endm
+
+.macro gen_sections32 x
+        gen_sections16 a\x
+        gen_sections16 b\x
+.endm
+
+.macro gen_sections64 x
+        gen_sections32 a\x
+        gen_sections32 b\x
+.endm
+
+.macro gen_sections128 x
+        gen_sections64 a\x
+        gen_sections64 b\x
+.endm
+
+.macro gen_sections256 x
+        gen_sections128 a\x
+        gen_sections128 b\x
+.endm
+
+.macro gen_sections512 x
+        gen_sections256 a\x
+        gen_sections256 b\x
+.endm
+
+.macro gen_sections1024 x
+        gen_sections512 a\x
+        gen_sections512 b\x
+.endm
+
+.macro gen_sections2048 x
+        gen_sections1024 a\x
+        gen_sections1024 b\x
+.endm
+
+.macro gen_sections4096 x
+        gen_sections2048 a\x
+        gen_sections2048 b\x
+.endm
+
+.macro gen_sections8192 x
+        gen_sections4096 a\x
+        gen_sections4096 b\x
+.endm
+
+.macro gen_sections16384 x
+        gen_sections8192 a\x
+        gen_sections8192 b\x
+.endm
+
+.macro gen_sections32768 x
+        gen_sections16384 a\x
+        gen_sections16384 b\x
+.endm
+
+        .bss
+        .section bar
+
+gen_sections32768 a
+gen_sections16384 b
+gen_sections8192 c
+gen_sections4096 d
+gen_sections2048 e
+gen_sections1024 f
+gen_sections512 g
+gen_sections128 h
+gen_sections64 i
+gen_sections32 j
+gen_sections16 k
+gen_sections8 l
+gen_sections4 m
+
+.global _start
+_start:




More information about the llvm-commits mailing list