[lld] r270999 - Avoid doing binary search.

Rui Ueyama via llvm-commits llvm-commits at lists.llvm.org
Fri May 27 07:39:18 PDT 2016


Author: ruiu
Date: Fri May 27 09:39:13 2016
New Revision: 270999

URL: http://llvm.org/viewvc/llvm-project?rev=270999&view=rev
Log:
Avoid doing binary search.

MergedInputSection::getOffset is the busiest function in LLD if string
merging is enabled and input files have lots of mergeable sections.
It is usually the case when creating executable with debug info,
so it is pretty common.

The reason why it is slow is because it has to do faily complex
computations. For non-mergeable sections, section contents are
contiguous in output, so in order to compute an output offset,
we only have to add the output section's base address to an input
offset. But for mergeable strings, section contents are split for
merging, so they are not contigous. We've got to do some lookups.

We used to do binary search on the list of section pieces.
It is slow because I think it's hostile to branch prediction.

This patch replaces it with hash table lookup. Seems it's working
pretty well. Below is "perf stat -r10" output when linking clang
with debug info. In this case this patch speeds up about 4%.

Before:

       6584.153205 task-clock (msec)         #    1.001 CPUs utilized            ( +-  0.09% )
               238 context-switches          #    0.036 K/sec                    ( +-  6.59% )
                 0 cpu-migrations            #    0.000 K/sec                    ( +- 50.92% )
         1,067,675 page-faults               #    0.162 M/sec                    ( +-  0.15% )
    18,369,931,470 cycles                    #    2.790 GHz                      ( +-  0.09% )
     9,640,680,143 stalled-cycles-frontend   #   52.48% frontend cycles idle     ( +-  0.18% )
   <not supported> stalled-cycles-backend
    21,206,747,787 instructions              #    1.15  insns per cycle
                                             #    0.45  stalled cycles per insn  ( +-  0.04% )
     3,817,398,032 branches                  #  579.786 M/sec                    ( +-  0.04% )
       132,787,249 branch-misses             #    3.48% of all branches          ( +-  0.02% )

       6.579106511 seconds time elapsed                                          ( +-  0.09% )

After:

       6312.317533 task-clock (msec)         #    1.001 CPUs utilized            ( +-  0.19% )
               221 context-switches          #    0.035 K/sec                    ( +-  4.11% )
                 1 cpu-migrations            #    0.000 K/sec                    ( +- 45.21% )
         1,280,775 page-faults               #    0.203 M/sec                    ( +-  0.37% )
    17,611,539,150 cycles                    #    2.790 GHz                      ( +-  0.19% )
    10,285,148,569 stalled-cycles-frontend   #   58.40% frontend cycles idle     ( +-  0.30% )
   <not supported> stalled-cycles-backend
    18,794,779,900 instructions              #    1.07  insns per cycle
                                             #    0.55  stalled cycles per insn  ( +-  0.03% )
     3,287,450,865 branches                  #  520.799 M/sec                    ( +-  0.03% )
        72,259,605 branch-misses             #    2.20% of all branches          ( +-  0.01% )

       6.307411828 seconds time elapsed                                          ( +-  0.19% )

Differential Revision: http://reviews.llvm.org/D20645

Modified:
    lld/trunk/ELF/InputSection.cpp
    lld/trunk/ELF/InputSection.h
    lld/trunk/ELF/OutputSections.cpp
    lld/trunk/ELF/OutputSections.h
    lld/trunk/ELF/Writer.cpp

Modified: lld/trunk/ELF/InputSection.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/InputSection.cpp?rev=270999&r1=270998&r2=270999&view=diff
==============================================================================
--- lld/trunk/ELF/InputSection.cpp (original)
+++ lld/trunk/ELF/InputSection.cpp Fri May 27 09:39:13 2016
@@ -513,6 +513,7 @@ bool MergeInputSection<ELFT>::classof(co
   return S->SectionKind == InputSectionBase<ELFT>::Merge;
 }
 
+// Do binary search to get a section piece at a given input offset.
 template <class ELFT>
 SectionPiece *SplitInputSection<ELFT>::getSectionPiece(uintX_t Offset) {
   ArrayRef<uint8_t> D = this->getSectionData();
@@ -529,23 +530,40 @@ SectionPiece *SplitInputSection<ELFT>::g
   return &*I;
 }
 
+// Returns the offset in an output section for a given input offset.
+// Because contents of a mergeable section is not contiguous in output,
+// it is not just an addition to a base output offset.
 template <class ELFT>
 typename ELFT::uint MergeInputSection<ELFT>::getOffset(uintX_t Offset) {
+  auto It = OffsetMap.find(Offset);
+  if (It != OffsetMap.end())
+    return It->second;
+
+  // If Offset is not at beginning of a section piece, it is not in the map.
+  // In that case we need to search from the original section piece vector.
   SectionPiece &Piece = *this->getSectionPiece(Offset);
   assert(Piece.Live);
-
-  // Compute the Addend and if the Base is cached, return.
   uintX_t Addend = Offset - Piece.InputOff;
-  if (Piece.OutputOff != size_t(-1))
-    return Piece.OutputOff + Addend;
+  uintX_t Ret = Piece.OutputOff + Addend;
+  return Ret;
+}
 
-  // Map the base to the offset in the output section and cache it.
-  ArrayRef<uint8_t> D = this->getSectionData();
-  StringRef Data((const char *)D.data(), D.size());
-  StringRef Entry = Data.substr(Piece.InputOff, Piece.size());
-  auto *MOS = static_cast<MergeOutputSection<ELFT> *>(this->OutSec);
-  Piece.OutputOff = MOS->getOffset(Entry);
-  return Piece.OutputOff + Addend;
+// Create a map from input offsets to output offsets for all section pieces.
+// It is called after finalize().
+template <class ELFT> void  MergeInputSection<ELFT>::finalizePieces() {
+  OffsetMap.grow(this->Pieces.size());
+  for (SectionPiece &Piece : this->Pieces) {
+    if (!Piece.Live)
+      continue;
+    if (Piece.OutputOff == size_t(-1)) {
+      // Offsets of tail-merged strings are computed lazily.
+      auto *OutSec = static_cast<MergeOutputSection<ELFT> *>(this->OutSec);
+      ArrayRef<uint8_t> D = Piece.data();
+      StringRef S((const char *)D.data(), D.size());
+      Piece.OutputOff = OutSec->getOffset(S);
+    }
+    OffsetMap[Piece.InputOff] = Piece.OutputOff;
+  }
 }
 
 template <class ELFT>

Modified: lld/trunk/ELF/InputSection.h
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/InputSection.h?rev=270999&r1=270998&r2=270999&view=diff
==============================================================================
--- lld/trunk/ELF/InputSection.h (original)
+++ lld/trunk/ELF/InputSection.h Fri May 27 09:39:13 2016
@@ -145,7 +145,10 @@ public:
   // in the output section.
   uintX_t getOffset(uintX_t Offset);
 
+  void finalizePieces();
+
 private:
+  llvm::DenseMap<uintX_t, uintX_t> OffsetMap;
   llvm::DenseSet<uintX_t> LiveOffsets;
 };
 

Modified: lld/trunk/ELF/OutputSections.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/OutputSections.cpp?rev=270999&r1=270998&r2=270999&view=diff
==============================================================================
--- lld/trunk/ELF/OutputSections.cpp (original)
+++ lld/trunk/ELF/OutputSections.cpp Fri May 27 09:39:13 2016
@@ -1164,6 +1164,7 @@ void MergeOutputSection<ELFT>::addSectio
   Sec->OutSec = this;
   this->updateAlign(Sec->Align);
   this->Header.sh_entsize = Sec->getSectionHdr()->sh_entsize;
+  Sections.push_back(Sec);
 
   bool IsString = this->Header.sh_flags & SHF_STRINGS;
 
@@ -1191,6 +1192,11 @@ template <class ELFT> void MergeOutputSe
   this->Header.sh_size = Builder.getSize();
 }
 
+template <class ELFT> void MergeOutputSection<ELFT>::finalizePieces() {
+  for (MergeInputSection<ELFT> *Sec : Sections)
+    Sec->finalizePieces();
+}
+
 template <class ELFT>
 StringTableSection<ELFT>::StringTableSection(StringRef Name, bool Dynamic)
     : OutputSectionBase<ELFT>(Name, SHT_STRTAB,

Modified: lld/trunk/ELF/OutputSections.h
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/OutputSections.h?rev=270999&r1=270998&r2=270999&view=diff
==============================================================================
--- lld/trunk/ELF/OutputSections.h (original)
+++ lld/trunk/ELF/OutputSections.h Fri May 27 09:39:13 2016
@@ -93,6 +93,7 @@ public:
   bool PageAlign = false;
 
   virtual void finalize() {}
+  virtual void finalizePieces() {}
   virtual void
   forEachInputSection(std::function<void(InputSectionBase<ELFT> *)> F) {}
   virtual void writeTo(uint8_t *Buf) {}
@@ -320,10 +321,12 @@ public:
   void writeTo(uint8_t *Buf) override;
   unsigned getOffset(StringRef Val);
   void finalize() override;
+  void finalizePieces() override;
   bool shouldTailMerge() const;
 
 private:
   llvm::StringTableBuilder Builder;
+  std::vector<MergeInputSection<ELFT> *> Sections;
 };
 
 struct CieRecord {

Modified: lld/trunk/ELF/Writer.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/Writer.cpp?rev=270999&r1=270998&r2=270999&view=diff
==============================================================================
--- lld/trunk/ELF/Writer.cpp (original)
+++ lld/trunk/ELF/Writer.cpp Fri May 27 09:39:13 2016
@@ -873,6 +873,11 @@ template <class ELFT> void Writer<ELFT>:
 
   if (isOutputDynamic())
     Out<ELFT>::Dynamic->finalize();
+
+  // Now that all output offsets are fixed. Finalize mergeable sections
+  // to fix their maps from input offsets to output offsets.
+  for (OutputSectionBase<ELFT> *Sec : OutputSections)
+    Sec->finalizePieces();
 }
 
 template <class ELFT> bool Writer<ELFT>::needsGot() {




More information about the llvm-commits mailing list