[lld] r314588 - Parallelize string merging.
Rui Ueyama via llvm-commits
llvm-commits at lists.llvm.org
Sat Sep 30 04:46:26 PDT 2017
Author: ruiu
Date: Sat Sep 30 04:46:26 2017
New Revision: 314588
URL: http://llvm.org/viewvc/llvm-project?rev=314588&view=rev
Log:
Parallelize string merging.
String merging is one of the most time-consuming functions in lld.
This patch parallelize it to speed it up. On my 2-socket 20-core
40-threads Xeon E5-2680 @ 2.8 GHz machine, this patch shorten the
clang debug build link time from 7.11s to 5.16s. It's a 27%
improvement and actually pretty noticeable. In this test condition,
lld is now 4x faster than gold.
Differential Revision: https://reviews.llvm.org/D38266
Modified:
lld/trunk/ELF/SyntheticSections.cpp
lld/trunk/ELF/SyntheticSections.h
lld/trunk/test/ELF/comment-gc.s
lld/trunk/test/ELF/compressed-debug-input.s
lld/trunk/test/ELF/merge-string.s
lld/trunk/test/ELF/string-gc.s
Modified: lld/trunk/ELF/SyntheticSections.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/SyntheticSections.cpp?rev=314588&r1=314587&r2=314588&view=diff
==============================================================================
--- lld/trunk/ELF/SyntheticSections.cpp (original)
+++ lld/trunk/ELF/SyntheticSections.cpp Sat Sep 30 04:46:26 2017
@@ -37,6 +37,7 @@
#include "llvm/Support/SHA1.h"
#include "llvm/Support/xxhash.h"
#include <cstdlib>
+#include <thread>
using namespace llvm;
using namespace llvm::dwarf;
@@ -48,6 +49,8 @@ using namespace llvm::support::endian;
using namespace lld;
using namespace lld::elf;
+const size_t MergeNoTailSection::NumShards;
+
uint64_t SyntheticSection::getVA() const {
if (OutputSection *Sec = getParent())
return Sec->Addr + OutSecOff;
@@ -2181,19 +2184,19 @@ template <class ELFT> bool VersionNeedSe
return getNeedNum() == 0;
}
-MergeSyntheticSection::MergeSyntheticSection(StringRef Name, uint32_t Type,
- uint64_t Flags, uint32_t Alignment)
- : SyntheticSection(Flags, Type, Alignment, Name),
- Builder(StringTableBuilder::RAW, Alignment) {}
-
void MergeSyntheticSection::addSection(MergeInputSection *MS) {
MS->Parent = this;
Sections.push_back(MS);
}
-size_t MergeSyntheticSection::getSize() const { return Builder.getSize(); }
+MergeTailSection::MergeTailSection(StringRef Name, uint32_t Type,
+ uint64_t Flags, uint32_t Alignment)
+ : MergeSyntheticSection(Name, Type, Flags, Alignment),
+ Builder(StringTableBuilder::RAW, Alignment) {}
+
+size_t MergeTailSection::getSize() const { return Builder.getSize(); }
-void MergeSyntheticSection::writeTo(uint8_t *Buf) { Builder.write(Buf); }
+void MergeTailSection::writeTo(uint8_t *Buf) { Builder.write(Buf); }
void MergeTailSection::finalizeContents() {
// Add all string pieces to the string table builder to create section
@@ -2215,17 +2218,63 @@ void MergeTailSection::finalizeContents(
Sec->Pieces[I].OutputOff = Builder.getOffset(Sec->getData(I));
}
+void MergeNoTailSection::writeTo(uint8_t *Buf) {
+ for (size_t I = 0; I < NumShards; ++I)
+ Shards[I].write(Buf + ShardOffsets[I]);
+}
+
+// This function is very hot (i.e. it can take several seconds to finish)
+// because sometimes the number of inputs is in an order of magnitude of
+// millions. So, we use multi-threading.
+//
+// For any strings S and T, we know S is not mergeable with T if S's hash
+// value is different from T's. If that's the case, we can safely put S and
+// T into different string builders without worrying about merge misses.
+// We do it in parallel.
void MergeNoTailSection::finalizeContents() {
- // Add all string pieces to the string table builder to create section
- // contents. Because we are not tail-optimizing, offsets of strings are
- // fixed when they are added to the builder (string table builder contains
- // a hash table from strings to offsets).
- for (MergeInputSection *Sec : Sections)
+ // Initializes string table builders.
+ for (size_t I = 0; I < NumShards; ++I)
+ Shards.emplace_back(StringTableBuilder::RAW, Alignment);
+
+ // Concurrency level. Must be a power of 2.
+ size_t Concurrency = 1;
+ if (Config->Threads)
+ if (int N = std::thread::hardware_concurrency())
+ Concurrency = std::min(PowerOf2Floor(N), NumShards);
+
+ // Add section pieces to the builders.
+ parallelForEachN(0, Concurrency, [&](size_t ThreadId) {
+ for (MergeInputSection *Sec : Sections) {
+ for (size_t I = 0, E = Sec->Pieces.size(); I != E; ++I) {
+ if (!Sec->Pieces[I].Live)
+ continue;
+ CachedHashStringRef Str = Sec->getData(I);
+ size_t ShardId = getShardId(Str.hash());
+ if ((ShardId & (Concurrency - 1)) == ThreadId)
+ Sec->Pieces[I].OutputOff = Shards[ShardId].add(Str);
+ }
+ }
+ });
+
+ // Compute an in-section offset for each shard.
+ size_t Off = 0;
+ for (size_t I = 0; I < NumShards; ++I) {
+ Shards[I].finalizeInOrder();
+ if (Shards[I].getSize() > 0)
+ Off = alignTo(Off, Alignment);
+ ShardOffsets[I] = Off;
+ Off += Shards[I].getSize();
+ }
+ Size = Off;
+
+ // So far, section pieces have offsets from beginning of shards, but
+ // we want offsets from beginning of the whole section. Fix them.
+ parallelForEach(Sections, [&](MergeInputSection *Sec) {
for (size_t I = 0, E = Sec->Pieces.size(); I != E; ++I)
if (Sec->Pieces[I].Live)
- Sec->Pieces[I].OutputOff = Builder.add(Sec->getData(I));
-
- Builder.finalizeInOrder();
+ Sec->Pieces[I].OutputOff +=
+ ShardOffsets[getShardId(Sec->getData(I).hash())];
+ });
}
static MergeSyntheticSection *createMergeSynthetic(StringRef Name,
Modified: lld/trunk/ELF/SyntheticSections.h
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/SyntheticSections.h?rev=314588&r1=314587&r2=314588&view=diff
==============================================================================
--- lld/trunk/ELF/SyntheticSections.h (original)
+++ lld/trunk/ELF/SyntheticSections.h Sat Sep 30 04:46:26 2017
@@ -668,24 +668,26 @@ public:
class MergeSyntheticSection : public SyntheticSection {
public:
void addSection(MergeInputSection *MS);
- size_t getSize() const override;
- void writeTo(uint8_t *Buf) override;
protected:
MergeSyntheticSection(StringRef Name, uint32_t Type, uint64_t Flags,
- uint32_t Alignment);
+ uint32_t Alignment)
+ : SyntheticSection(Flags, Type, Alignment, Name) {}
std::vector<MergeInputSection *> Sections;
- llvm::StringTableBuilder Builder;
};
class MergeTailSection final : public MergeSyntheticSection {
public:
MergeTailSection(StringRef Name, uint32_t Type, uint64_t Flags,
- uint32_t Alignment)
- : MergeSyntheticSection(Name, Type, Flags, Alignment) {}
+ uint32_t Alignment);
+ size_t getSize() const override;
+ void writeTo(uint8_t *Buf) override;
void finalizeContents() override;
+
+private:
+ llvm::StringTableBuilder Builder;
};
class MergeNoTailSection final : public MergeSyntheticSection {
@@ -694,7 +696,27 @@ public:
uint32_t Alignment)
: MergeSyntheticSection(Name, Type, Flags, Alignment) {}
+ size_t getSize() const override { return Size; }
+ void writeTo(uint8_t *Buf) override;
void finalizeContents() override;
+
+private:
+ // We use the most significant bits of a hash as a shard ID.
+ // The reason why we don't want to use the least significant bits is
+ // because DenseMap also uses lower bits to determine a bucket ID.
+ // If we use lower bits, it significantly increases the probability of
+ // hash collisons.
+ size_t getShardId(uint32_t Hash) {
+ return Hash >> (32 - llvm::countTrailingZeros(NumShards));
+ }
+
+ // Section size
+ size_t Size;
+
+ // String table contents
+ constexpr static size_t NumShards = 32;
+ std::vector<llvm::StringTableBuilder> Shards;
+ size_t ShardOffsets[NumShards];
};
// .MIPS.abiflags section.
Modified: lld/trunk/test/ELF/comment-gc.s
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/ELF/comment-gc.s?rev=314588&r1=314587&r2=314588&view=diff
==============================================================================
--- lld/trunk/test/ELF/comment-gc.s (original)
+++ lld/trunk/test/ELF/comment-gc.s Sat Sep 30 04:46:26 2017
@@ -5,8 +5,7 @@
# RUN: llvm-objdump -s %t1 | FileCheck %s
# CHECK: Contents of section .comment:
-# CHECK-NEXT: 0000 00666f6f 00626172 004c4c44 20312e30 .foo.bar.LLD 1.0
-# CHECK-NEXT: 0010 00 .
+# CHECK-NEXT: foo.LLD 1.0..bar
.ident "foo"
Modified: lld/trunk/test/ELF/compressed-debug-input.s
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/ELF/compressed-debug-input.s?rev=314588&r1=314587&r2=314588&view=diff
==============================================================================
--- lld/trunk/test/ELF/compressed-debug-input.s (original)
+++ lld/trunk/test/ELF/compressed-debug-input.s Sat Sep 30 04:46:26 2017
@@ -61,11 +61,11 @@
# DATA-NEXT: AddressAlignment: 1
# DATA-NEXT: EntrySize: 0
# DATA-NEXT: SectionData (
-# DATA-NEXT: 0000: 73686F72 7420756E 7369676E 65642069 |short unsigned i|
-# DATA-NEXT: 0010: 6E740075 6E736967 6E656420 696E7400 |nt.unsigned int.|
+# DATA-NEXT: 0000: 756E7369 676E6564 20696E74 00636861 |unsigned int.cha|
+# DATA-NEXT: 0010: 7200756E 7369676E 65642063 68617200 |r.unsigned char.|
# DATA-NEXT: 0020: 6C6F6E67 20756E73 69676E65 6420696E |long unsigned in|
-# DATA-NEXT: 0030: 74006368 61720075 6E736967 6E656420 |t.char.unsigned |
-# DATA-NEXT: 0040: 63686172 00 |char.|
+# DATA-NEXT: 0030: 74007368 6F727420 756E7369 676E6564 |t.short unsigned|
+# DATA-NEXT: 0040: 20696E74 00 | int.|
# DATA-NEXT: )
# DATA-NEXT: }
Modified: lld/trunk/test/ELF/merge-string.s
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/ELF/merge-string.s?rev=314588&r1=314587&r2=314588&view=diff
==============================================================================
--- lld/trunk/test/ELF/merge-string.s (original)
+++ lld/trunk/test/ELF/merge-string.s Sat Sep 30 04:46:26 2017
@@ -54,7 +54,7 @@ zed:
// NOTAIL-NEXT: AddressAlignment: 1
// NOTAIL-NEXT: EntrySize: 0
// NOTAIL-NEXT: SectionData (
-// NOTAIL-NEXT: 0000: 61626300 626300 |abc.bc.|
+// NOTAIL-NEXT: 0000: 62630061 626300 |bc.abc.|
// NOTAIL-NEXT: )
// NOMERGE: Name: .rodata1
Modified: lld/trunk/test/ELF/string-gc.s
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/ELF/string-gc.s?rev=314588&r1=314587&r2=314588&view=diff
==============================================================================
--- lld/trunk/test/ELF/string-gc.s (original)
+++ lld/trunk/test/ELF/string-gc.s Sat Sep 30 04:46:26 2017
@@ -14,7 +14,7 @@
// CHECK-NEXT: }
// CHECK-NEXT: Symbol {
// CHECK-NEXT: Name: s3
-// CHECK-NEXT: Value: 0x200125
+// CHECK-NEXT: Value: 0x200120
// CHECK-NEXT: Size: 0
// CHECK-NEXT: Binding: Local (0x0)
// CHECK-NEXT: Type: Object (0x1)
@@ -23,7 +23,7 @@
// CHECK-NEXT: }
// CHECK-NEXT: Symbol {
// CHECK-NEXT: Name: s1
-// CHECK-NEXT: Value: 0x200120
+// CHECK-NEXT: Value: 0x200125
// CHECK-NEXT: Size: 0
// CHECK-NEXT: Binding: Local (0x0)
// CHECK-NEXT: Type: Object (0x1)
More information about the llvm-commits
mailing list