[lld] r316280 - Assume that mergeable input sections are smaller than 4 GiB.

Rui Ueyama via llvm-commits llvm-commits at lists.llvm.org
Sat Oct 21 16:20:13 PDT 2017


Author: ruiu
Date: Sat Oct 21 16:20:13 2017
New Revision: 316280

URL: http://llvm.org/viewvc/llvm-project?rev=316280&view=rev
Log:
Assume that mergeable input sections are smaller than 4 GiB.

By assuming that mergeable input sections are smaller than 4 GiB,
lld's memory usage when linking clang with debug info drops from
2.788 GiB to 2.019 GiB (measured by valgrind, and that does not include
memory space for mmap'ed files). I think that's a reasonable assumption
given such a large RAM savings, so this patch.

According to valgrind, gold needs 3.54 GiB of RAM to do the same thing.

NB: This patch does not introduce a limitation on the size of
output sections. You can still create sections larger than 4 GiB.

Modified:
    lld/trunk/ELF/InputSection.cpp
    lld/trunk/ELF/InputSection.h
    lld/trunk/ELF/SyntheticSections.cpp

Modified: lld/trunk/ELF/InputSection.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/InputSection.cpp?rev=316280&r1=316279&r2=316280&view=diff
==============================================================================
--- lld/trunk/ELF/InputSection.cpp (original)
+++ lld/trunk/ELF/InputSection.cpp Sat Oct 21 16:20:13 2017
@@ -871,13 +871,15 @@ SyntheticSection *MergeInputSection::get
 void MergeInputSection::splitStrings(ArrayRef<uint8_t> Data, size_t EntSize) {
   size_t Off = 0;
   bool IsAlloc = this->Flags & SHF_ALLOC;
+
   while (!Data.empty()) {
     size_t End = findNull(Data, EntSize);
     if (End == StringRef::npos)
       fatal(toString(this) + ": string is not null terminated");
     size_t Size = End + EntSize;
-    Pieces.emplace_back(Off, !IsAlloc);
-    Hashes.push_back(xxHash64(toStringRef(Data.slice(0, Size))));
+
+    Pieces.emplace_back(Off, xxHash64(toStringRef(Data.slice(0, Size))),
+                        !IsAlloc);
     Data = Data.slice(Size);
     Off += Size;
   }
@@ -890,17 +892,23 @@ void MergeInputSection::splitNonStrings(
   size_t Size = Data.size();
   assert((Size % EntSize) == 0);
   bool IsAlloc = this->Flags & SHF_ALLOC;
-  for (unsigned I = 0, N = Size; I != N; I += EntSize) {
-    Hashes.push_back(xxHash64(toStringRef(Data.slice(I, EntSize))));
-    Pieces.emplace_back(I, !IsAlloc);
-  }
+
+  for (size_t I = 0; I != Size; I += EntSize)
+    Pieces.emplace_back(I, xxHash64(toStringRef(Data.slice(I, EntSize))),
+                        !IsAlloc);
 }
 
 template <class ELFT>
 MergeInputSection::MergeInputSection(ObjFile<ELFT> *F,
                                      const typename ELFT::Shdr *Header,
                                      StringRef Name)
-    : InputSectionBase(F, Header, Name, InputSectionBase::Merge) {}
+    : InputSectionBase(F, Header, Name, InputSectionBase::Merge) {
+  // In order to reduce memory allocation, we assume that mergeable
+  // sections are smaller than 4 GiB, which is not an unreasonable
+  // assumption as of 2017.
+  if (Data.size() > UINT32_MAX)
+    error(toString(this) + ": section too large");
+}
 
 // This function is called after we obtain a complete list of input sections
 // that need to be linked. This is responsible to split section contents
@@ -942,8 +950,7 @@ static It fastUpperBound(It First, It La
 }
 
 const SectionPiece *MergeInputSection::getSectionPiece(uint64_t Offset) const {
-  uint64_t Size = this->Data.size();
-  if (Offset >= Size)
+  if (Data.size() <= Offset)
     fatal(toString(this) + ": entry is past the end of the section");
 
   // Find the element this offset points to.
@@ -958,20 +965,20 @@ const SectionPiece *MergeInputSection::g
 // Because contents of a mergeable section is not contiguous in output,
 // it is not just an addition to a base output offset.
 uint64_t MergeInputSection::getOffset(uint64_t Offset) const {
+  if (!this->Live)
+    return 0;
+
   // Initialize OffsetMap lazily.
   llvm::call_once(InitOffsetMap, [&] {
     OffsetMap.reserve(Pieces.size());
-    for (const SectionPiece &Piece : Pieces)
-      OffsetMap[Piece.InputOff] = Piece.OutputOff;
+    for (size_t I = 0; I < Pieces.size(); ++I)
+      OffsetMap[Pieces[I].InputOff] = I;
   });
 
   // Find a string starting at a given offset.
   auto It = OffsetMap.find(Offset);
   if (It != OffsetMap.end())
-    return It->second;
-
-  if (!this->Live)
-    return 0;
+    return Pieces[It->second].OutputOff;
 
   // If Offset is not at beginning of a section piece, it is not in the map.
   // In that case we need to search from the original section piece vector.

Modified: lld/trunk/ELF/InputSection.h
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/InputSection.h?rev=316280&r1=316279&r2=316280&view=diff
==============================================================================
--- lld/trunk/ELF/InputSection.h (original)
+++ lld/trunk/ELF/InputSection.h Sat Oct 21 16:20:13 2017
@@ -214,15 +214,17 @@ private:
 // have to be as compact as possible, which is why we don't store the size (can
 // be found by looking at the next one) and put the hash in a side table.
 struct SectionPiece {
-  SectionPiece(size_t Off, bool Live)
-      : InputOff(Off), Live(Live || !Config->GcSections), OutputOff(-1) {}
-
-  size_t InputOff : 8 * sizeof(ssize_t) - 1;
-  size_t Live : 1;
-  ssize_t OutputOff;
+  SectionPiece(size_t Off, uint32_t Hash, bool Live)
+      : InputOff(Off), Hash(Hash), OutputOff(-1),
+        Live(Live || !Config->GcSections) {}
+
+  uint32_t InputOff;
+  uint32_t Hash;
+  uint64_t OutputOff : 63;
+  uint64_t Live : 1;
 };
-static_assert(sizeof(SectionPiece) == 2 * sizeof(size_t),
-              "SectionPiece is too big");
+
+static_assert(sizeof(SectionPiece) == 16, "SectionPiece is too big");
 
 // This corresponds to a SHF_MERGE section of an input file.
 class MergeInputSection : public InputSectionBase {
@@ -252,14 +254,9 @@ public:
   LLVM_ATTRIBUTE_ALWAYS_INLINE
   llvm::CachedHashStringRef getData(size_t I) const {
     size_t Begin = Pieces[I].InputOff;
-    size_t End;
-    if (Pieces.size() - 1 == I)
-      End = this->Data.size();
-    else
-      End = Pieces[I + 1].InputOff;
-
-    StringRef S = {(const char *)(this->Data.data() + Begin), End - Begin};
-    return {S, Hashes[I]};
+    size_t End =
+        (Pieces.size() - 1 == I) ? Data.size() : Pieces[I + 1].InputOff;
+    return {toStringRef(Data.slice(Begin, End - Begin)), Pieces[I].Hash};
   }
 
   // Returns the SectionPiece at a given input section offset.
@@ -272,9 +269,7 @@ private:
   void splitStrings(ArrayRef<uint8_t> A, size_t Size);
   void splitNonStrings(ArrayRef<uint8_t> A, size_t Size);
 
-  std::vector<uint32_t> Hashes;
-
-  mutable llvm::DenseMap<uint64_t, uint64_t> OffsetMap;
+  mutable llvm::DenseMap<uint32_t, uint32_t> OffsetMap;
   mutable llvm::once_flag InitOffsetMap;
 
   llvm::DenseSet<uint64_t> LiveOffsets;

Modified: lld/trunk/ELF/SyntheticSections.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/SyntheticSections.cpp?rev=316280&r1=316279&r2=316280&view=diff
==============================================================================
--- lld/trunk/ELF/SyntheticSections.cpp (original)
+++ lld/trunk/ELF/SyntheticSections.cpp Sat Oct 21 16:20:13 2017
@@ -2269,10 +2269,9 @@ void MergeNoTailSection::finalizeContent
       for (size_t I = 0, E = Sec->Pieces.size(); I != E; ++I) {
         if (!Sec->Pieces[I].Live)
           continue;
-        CachedHashStringRef Str = Sec->getData(I);
-        size_t ShardId = getShardId(Str.hash());
+        size_t ShardId = getShardId(Sec->Pieces[I].Hash);
         if ((ShardId & (Concurrency - 1)) == ThreadId)
-          Sec->Pieces[I].OutputOff = Shards[ShardId].add(Str);
+          Sec->Pieces[I].OutputOff = Shards[ShardId].add(Sec->getData(I));
       }
     }
   });
@@ -2294,7 +2293,7 @@ void MergeNoTailSection::finalizeContent
     for (size_t I = 0, E = Sec->Pieces.size(); I != E; ++I)
       if (Sec->Pieces[I].Live)
         Sec->Pieces[I].OutputOff +=
-            ShardOffsets[getShardId(Sec->getData(I).hash())];
+            ShardOffsets[getShardId(Sec->Pieces[I].Hash)];
   });
 }
 




More information about the llvm-commits mailing list