[PATCH] D49164: Parallelize GdbIndexSection's symbol table creation.

Rui Ueyama via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 11 03:59:33 PDT 2018


ruiu updated this revision to Diff 154964.
ruiu added a comment.

- rebased


https://reviews.llvm.org/D49164

Files:
  lld/ELF/SyntheticSections.cpp
  lld/test/ELF/gdb-index.s


Index: lld/test/ELF/gdb-index.s
===================================================================
--- lld/test/ELF/gdb-index.s
+++ lld/test/ELF/gdb-index.s
@@ -34,16 +34,16 @@
 # DWARF-NEXT:    Low/High address = [0x201000, 0x201001) (Size: 0x1), CU id = 0
 # DWARF-NEXT:    Low/High address = [0x201004, 0x201006) (Size: 0x2), CU id = 1
 # DWARF:       Symbol table offset = 0x60, size = 1024, filled slots:
-# DWARF-NEXT:    512: Name offset = 0x2b, CU vector offset = 0x14
-# DWARF-NEXT:      String name: aaaaaaaaaaaaaaaa, CU vector index: 2
-# DWARF-NEXT:    754: Name offset = 0x27, CU vector offset = 0x8
-# DWARF-NEXT:      String name: int, CU vector index: 1
-# DWARF-NEXT:    822: Name offset = 0x1c, CU vector offset = 0x0
-# DWARF-NEXT:      String name: entrypoint, CU vector index: 0
+# DWARF-NEXT:    512: Name offset = 0x1c, CU vector offset = 0x0
+# DWARF-NEXT:      String name: aaaaaaaaaaaaaaaa, CU vector index: 0
+# DWARF-NEXT:    754: Name offset = 0x38, CU vector offset = 0x10
+# DWARF-NEXT:      String name: int, CU vector index: 2
+# DWARF-NEXT:    822: Name offset = 0x2d, CU vector offset = 0x8
+# DWARF-NEXT:      String name: entrypoint, CU vector index: 1
 # DWARF:       Constant pool offset = 0x2060, has 3 CU vectors:
-# DWARF-NEXT:    0(0x0): 0x30000000
-# DWARF-NEXT:    1(0x8): 0x90000000 0x90000001
-# DWARF-NEXT:    2(0x14): 0x30000001
+# DWARF-NEXT:    0(0x0): 0x30000001
+# DWARF-NEXT:    1(0x8): 0x30000000
+# DWARF-NEXT:    2(0x10): 0x90000000 0x90000001
 
 # SECTION-NOT: debug_gnu_pubnames
 
Index: lld/ELF/SyntheticSections.cpp
===================================================================
--- lld/ELF/SyntheticSections.cpp
+++ lld/ELF/SyntheticSections.cpp
@@ -2366,23 +2366,51 @@
   typedef GdbIndexSection::GdbSymbol GdbSymbol;
   typedef GdbIndexSection::NameTypeEntry NameTypeEntry;
 
-  // A map to uniquify symbols by name.
-  DenseMap<CachedHashStringRef, size_t> Map;
+  // The number of symbols we will handle in this function is of the order
+  // of millions for very large executables, so we use multi-threading to
+  // speed it up.
+  size_t NumShards = 16;
+  size_t Concurrency = 1;
+  if (ThreadsEnabled)
+    Concurrency =
+        std::min<size_t>(PowerOf2Floor(hardware_concurrency()), NumShards);
+
+  // A sharded map to uniquify symbols by name.
+  std::vector<DenseMap<CachedHashStringRef, size_t>> Map(NumShards);
+  size_t Shift = 32 - countTrailingZeros(NumShards);
 
   // Instantiate GdbSymbols while uniqufying them by name.
-  std::vector<GdbSymbol> Ret;
-  for (ArrayRef<NameTypeEntry> Entries : NameTypes) {
-    for (const NameTypeEntry &Ent : Entries) {
-      size_t &Idx = Map[Ent.Name];
-      if (Idx) {
-        Ret[Idx - 1].CuVector.push_back(Ent.Type);
-        continue;
-      }
+  std::vector<std::vector<GdbSymbol>> Symbols(NumShards);
+  parallelForEachN(0, Concurrency, [&](size_t ThreadId) {
+    for (ArrayRef<NameTypeEntry> Entries : NameTypes) {
+      for (const NameTypeEntry &Ent : Entries) {
+        size_t ShardId = Ent.Name.hash() >> Shift;
+        if ((ShardId & (Concurrency - 1)) != ThreadId)
+          continue;
 
-      Idx = Ret.size() + 1;
-      Ret.push_back({Ent.Name, {Ent.Type}, 0, 0});
+        size_t &Idx = Map[ShardId][Ent.Name];
+        if (Idx) {
+          Symbols[ShardId][Idx - 1].CuVector.push_back(Ent.Type);
+          continue;
+        }
+
+        Idx = Symbols[ShardId].size() + 1;
+        Symbols[ShardId].push_back({Ent.Name, {Ent.Type}, 0, 0});
+      }
     }
-  }
+  });
+
+  size_t NumSymbols = 0;
+  for (ArrayRef<GdbSymbol> V : Symbols)
+    NumSymbols += V.size();
+
+  // The return type is a flattened vector, so we'll copy each vector
+  // contents to Ret.
+  std::vector<GdbSymbol> Ret;
+  Ret.reserve(NumSymbols);
+  for (std::vector<GdbSymbol> &Vec : Symbols)
+    for (GdbSymbol &Sym : Vec)
+      Ret.push_back(std::move(Sym));
 
   // CU vectors and symbol names are adjacent in the output file.
   // We can compute their offsets in the output file now.


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D49164.154964.patch
Type: text/x-patch
Size: 4022 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180711/6940fe01/attachment.bin>


More information about the llvm-commits mailing list