<div dir="ltr">Do I understand the commit message correctly that this regresses single threaded performance? Is single threaded the default lld behavior?</div><br><div class="gmail_quote"><div dir="ltr">On Wed, Jul 11, 2018 at 4:42 AM Rui Ueyama via llvm-commits <<a href="mailto:llvm-commits@lists.llvm.org">llvm-commits@lists.llvm.org</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Author: ruiu<br>
Date: Wed Jul 11 04:37:10 2018<br>
New Revision: 336790<br>
<br>
URL: <a href="http://llvm.org/viewvc/llvm-project?rev=336790&view=rev" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project?rev=336790&view=rev</a><br>
Log:<br>
Parallelize GdbIndexSection's symbol table creation.<br>
<br>
Since .gdb_index sections contain all known symbols, they can be very large.<br>
One of my executables has a .gdb_index section of 1350 GiB. Uniquifying<br>
symbols by name takes 3.77 seconds on my machine. This patch parallelize it.<br>
<br>
  Time to call createSymbols() with 8.4 million unique symbols:<br>
<br>
  Without this patch: 3773 ms<br>
  Parallelism = 1:    4374 ms<br>
  Parallelism = 2:    2628 ms<br>
  Parallelism = 16:    837 ms<br>
<br>
As you can see above, this algorithm is a bit more inefficient<br>
than the non-parallelized version, but even with dual-core, it is<br>
faster than that, so I think it is overall a win.<br>
<br>
Differential Revision: <a href="https://reviews.llvm.org/D49164" rel="noreferrer" target="_blank">https://reviews.llvm.org/D49164</a><br>
<br>
Modified:<br>
    lld/trunk/ELF/SyntheticSections.cpp<br>
    lld/trunk/test/ELF/gdb-index.s<br>
<br>
Modified: lld/trunk/ELF/SyntheticSections.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/SyntheticSections.cpp?rev=336790&r1=336789&r2=336790&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/SyntheticSections.cpp?rev=336790&r1=336789&r2=336790&view=diff</a><br>
==============================================================================<br>
--- lld/trunk/ELF/SyntheticSections.cpp (original)<br>
+++ lld/trunk/ELF/SyntheticSections.cpp Wed Jul 11 04:37:10 2018<br>
@@ -2366,23 +2366,51 @@ createSymbols(ArrayRef<std::vector<GdbIn<br>
   typedef GdbIndexSection::GdbSymbol GdbSymbol;<br>
   typedef GdbIndexSection::NameTypeEntry NameTypeEntry;<br>
<br>
-  // A map to uniquify symbols by name.<br>
-  DenseMap<CachedHashStringRef, size_t> Map;<br>
+  // The number of symbols we will handle in this function is of the order<br>
+  // of millions for very large executables, so we use multi-threading to<br>
+  // speed it up.<br>
+  size_t NumShards = 32;<br>
+  size_t Concurrency = 1;<br>
+  if (ThreadsEnabled)<br>
+    Concurrency =<br>
+        std::min<size_t>(PowerOf2Floor(hardware_concurrency()), NumShards);<br>
+<br>
+  // A sharded map to uniquify symbols by name.<br>
+  std::vector<DenseMap<CachedHashStringRef, size_t>> Map(NumShards);<br>
+  size_t Shift = 32 - countTrailingZeros(NumShards);<br>
<br>
   // Instantiate GdbSymbols while uniqufying them by name.<br>
-  std::vector<GdbSymbol> Ret;<br>
-  for (ArrayRef<NameTypeEntry> Entries : NameTypes) {<br>
-    for (const NameTypeEntry &Ent : Entries) {<br>
-      size_t &Idx = Map[Ent.Name];<br>
-      if (Idx) {<br>
-        Ret[Idx - 1].CuVector.push_back(Ent.Type);<br>
-        continue;<br>
-      }<br>
+  std::vector<std::vector<GdbSymbol>> Symbols(NumShards);<br>
+  parallelForEachN(0, Concurrency, [&](size_t ThreadId) {<br>
+    for (ArrayRef<NameTypeEntry> Entries : NameTypes) {<br>
+      for (const NameTypeEntry &Ent : Entries) {<br>
+        size_t ShardId = Ent.Name.hash() >> Shift;<br>
+        if ((ShardId & (Concurrency - 1)) != ThreadId)<br>
+          continue;<br>
+<br>
+        size_t &Idx = Map[ShardId][Ent.Name];<br>
+        if (Idx) {<br>
+          Symbols[ShardId][Idx - 1].CuVector.push_back(Ent.Type);<br>
+          continue;<br>
+        }<br>
<br>
-      Idx = Ret.size() + 1;<br>
-      Ret.push_back({Ent.Name, {Ent.Type}, 0, 0});<br>
+        Idx = Symbols[ShardId].size() + 1;<br>
+        Symbols[ShardId].push_back({Ent.Name, {Ent.Type}, 0, 0});<br>
+      }<br>
     }<br>
-  }<br>
+  });<br>
+<br>
+  size_t NumSymbols = 0;<br>
+  for (ArrayRef<GdbSymbol> V : Symbols)<br>
+    NumSymbols += V.size();<br>
+<br>
+  // The return type is a flattened vector, so we'll copy each vector<br>
+  // contents to Ret.<br>
+  std::vector<GdbSymbol> Ret;<br>
+  Ret.reserve(NumSymbols);<br>
+  for (std::vector<GdbSymbol> &Vec : Symbols)<br>
+    for (GdbSymbol &Sym : Vec)<br>
+      Ret.push_back(std::move(Sym));<br>
<br>
   // CU vectors and symbol names are adjacent in the output file.<br>
   // We can compute their offsets in the output file now.<br>
<br>
Modified: lld/trunk/test/ELF/gdb-index.s<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/lld/trunk/test/ELF/gdb-index.s?rev=336790&r1=336789&r2=336790&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/lld/trunk/test/ELF/gdb-index.s?rev=336790&r1=336789&r2=336790&view=diff</a><br>
==============================================================================<br>
--- lld/trunk/test/ELF/gdb-index.s (original)<br>
+++ lld/trunk/test/ELF/gdb-index.s Wed Jul 11 04:37:10 2018<br>
@@ -34,16 +34,16 @@<br>
 # DWARF-NEXT:    Low/High address = [0x201000, 0x201001) (Size: 0x1), CU id = 0<br>
 # DWARF-NEXT:    Low/High address = [0x201004, 0x201006) (Size: 0x2), CU id = 1<br>
 # DWARF:       Symbol table offset = 0x60, size = 1024, filled slots:<br>
-# DWARF-NEXT:    512: Name offset = 0x2b, CU vector offset = 0x14<br>
-# DWARF-NEXT:      String name: aaaaaaaaaaaaaaaa, CU vector index: 2<br>
-# DWARF-NEXT:    754: Name offset = 0x27, CU vector offset = 0x8<br>
-# DWARF-NEXT:      String name: int, CU vector index: 1<br>
-# DWARF-NEXT:    822: Name offset = 0x1c, CU vector offset = 0x0<br>
-# DWARF-NEXT:      String name: entrypoint, CU vector index: 0<br>
+# DWARF-NEXT:    512: Name offset = 0x1c, CU vector offset = 0x0<br>
+# DWARF-NEXT:      String name: aaaaaaaaaaaaaaaa, CU vector index: 0<br>
+# DWARF-NEXT:    754: Name offset = 0x38, CU vector offset = 0x10<br>
+# DWARF-NEXT:      String name: int, CU vector index: 2<br>
+# DWARF-NEXT:    822: Name offset = 0x2d, CU vector offset = 0x8<br>
+# DWARF-NEXT:      String name: entrypoint, CU vector index: 1<br>
 # DWARF:       Constant pool offset = 0x2060, has 3 CU vectors:<br>
-# DWARF-NEXT:    0(0x0): 0x30000000<br>
-# DWARF-NEXT:    1(0x8): 0x90000000 0x90000001<br>
-# DWARF-NEXT:    2(0x14): 0x30000001<br>
+# DWARF-NEXT:    0(0x0): 0x30000001<br>
+# DWARF-NEXT:    1(0x8): 0x30000000<br>
+# DWARF-NEXT:    2(0x10): 0x90000000 0x90000001<br>
<br>
 # SECTION-NOT: debug_gnu_pubnames<br>
<br>
<br>
<br>
_______________________________________________<br>
llvm-commits mailing list<br>
<a href="mailto:llvm-commits@lists.llvm.org" target="_blank">llvm-commits@lists.llvm.org</a><br>
<a href="http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits" rel="noreferrer" target="_blank">http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits</a><br>
</blockquote></div>