[lld] r239332 - COFF: Read symbol names lazily.

Rui Ueyama ruiu at google.com
Mon Jun 8 12:44:00 PDT 2015


Author: ruiu
Date: Mon Jun  8 14:43:59 2015
New Revision: 239332

URL: http://llvm.org/viewvc/llvm-project?rev=239332&view=rev
Log:
COFF: Read symbol names lazily.

This change seems to make the linker about 10% faster.
Reading symbol name is not very cheap because it needs strlen()
on the string table. We were wasting time on reading non-external
symbol names that would never be used by the linker.

Modified:
    lld/trunk/COFF/InputFiles.cpp
    lld/trunk/COFF/InputFiles.h
    lld/trunk/COFF/Symbols.cpp
    lld/trunk/COFF/Symbols.h

Modified: lld/trunk/COFF/InputFiles.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/COFF/InputFiles.cpp?rev=239332&r1=239331&r2=239332&view=diff
==============================================================================
--- lld/trunk/COFF/InputFiles.cpp (original)
+++ lld/trunk/COFF/InputFiles.cpp Mon Jun  8 14:43:59 2015
@@ -158,23 +158,12 @@ std::error_code ObjectFile::initializeSy
     }
     COFFSymbolRef Sym = SymOrErr.get();
 
-    // Get a symbol name.
-    StringRef SymbolName;
-    if (auto EC = COFFObj->getSymbolName(Sym, SymbolName)) {
-      llvm::errs() << "broken object file: " << getName() << ": "
-                   << EC.message() << "\n";
-      return make_error_code(LLDError::BrokenFile);
-    }
-    // Skip special symbols.
-    if (SymbolName == "@comp.id" || SymbolName == "@feat.00")
-      continue;
-
     const void *AuxP = nullptr;
     if (Sym.getNumberOfAuxSymbols())
       AuxP = COFFObj->getSymbol(I + 1)->getRawPtr();
     bool IsFirst = (LastSectionNumber != Sym.getSectionNumber());
 
-    SymbolBody *Body = createSymbolBody(SymbolName, Sym, AuxP, IsFirst);
+    SymbolBody *Body = createSymbolBody(Sym, AuxP, IsFirst);
     if (Body) {
       SymbolBodies.push_back(Body);
       SparseSymbolBodies[I] = Body;
@@ -185,19 +174,28 @@ std::error_code ObjectFile::initializeSy
   return std::error_code();
 }
 
-SymbolBody *ObjectFile::createSymbolBody(StringRef Name, COFFSymbolRef Sym,
-                                         const void *AuxP, bool IsFirst) {
-  if (Sym.isUndefined())
+SymbolBody *ObjectFile::createSymbolBody(COFFSymbolRef Sym, const void *AuxP,
+                                         bool IsFirst) {
+  StringRef Name;
+  if (Sym.isUndefined()) {
+    COFFObj->getSymbolName(Sym, Name);
     return new (Alloc) Undefined(Name);
+  }
   if (Sym.isCommon()) {
     Chunk *C = new (Alloc) CommonChunk(Sym);
     Chunks.push_back(C);
-    return new (Alloc) DefinedRegular(Name, Sym, C);
+    return new (Alloc) DefinedRegular(COFFObj.get(), Sym, C);
   }
-  if (Sym.isAbsolute())
+  if (Sym.isAbsolute()) {
+    COFFObj->getSymbolName(Sym, Name);
+    // Skip special symbols.
+    if (Name == "@comp.id" || Name == "@feat.00")
+      return nullptr;
     return new (Alloc) DefinedAbsolute(Name, Sym.getValue());
+  }
   // TODO: Handle IMAGE_WEAK_EXTERN_SEARCH_ALIAS
   if (Sym.isWeakExternal()) {
+    COFFObj->getSymbolName(Sym, Name);
     auto *Aux = (const coff_aux_weak_external *)AuxP;
     return new (Alloc) Undefined(Name, &SparseSymbolBodies[Aux->TagIndex]);
   }
@@ -214,7 +212,7 @@ SymbolBody *ObjectFile::createSymbolBody
     }
   }
   if (Chunk *C = SparseChunks[Sym.getSectionNumber()])
-    return new (Alloc) DefinedRegular(Name, Sym, C);
+    return new (Alloc) DefinedRegular(COFFObj.get(), Sym, C);
   return nullptr;
 }
 

Modified: lld/trunk/COFF/InputFiles.h
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/COFF/InputFiles.h?rev=239332&r1=239331&r2=239332&view=diff
==============================================================================
--- lld/trunk/COFF/InputFiles.h (original)
+++ lld/trunk/COFF/InputFiles.h Mon Jun  8 14:43:59 2015
@@ -111,8 +111,8 @@ private:
   std::error_code initializeChunks();
   std::error_code initializeSymbols();
 
-  SymbolBody *createSymbolBody(StringRef Name, COFFSymbolRef Sym,
-                               const void *Aux, bool IsFirst);
+  SymbolBody *createSymbolBody(COFFSymbolRef Sym, const void *Aux,
+                               bool IsFirst);
 
   std::unique_ptr<COFFObjectFile> COFFObj;
   StringRef Directives;

Modified: lld/trunk/COFF/Symbols.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/COFF/Symbols.cpp?rev=239332&r1=239331&r2=239332&view=diff
==============================================================================
--- lld/trunk/COFF/Symbols.cpp (original)
+++ lld/trunk/COFF/Symbols.cpp Mon Jun  8 14:43:59 2015
@@ -69,6 +69,19 @@ int Undefined::compare(SymbolBody *Other
   return 1;
 }
 
+StringRef DefinedRegular::getName() {
+  // DefinedSymbol's name is read lazily for a performance reason.
+  // Non-external symbol names are never used by the linker.
+  // Their internal references are resolved not by name but by symbol index.
+  // And because they are not external, no one can refer them by name.
+  // Object files contain lots of non-external symbols, and creating
+  // StringRefs for them (which involves lots of strlen() on the string table)
+  // is a waste of time.
+  if (Name.empty())
+    COFFFile->getSymbolName(Sym, Name);
+  return Name;
+}
+
 ErrorOr<std::unique_ptr<InputFile>> Lazy::getMember() {
   auto MBRefOrErr = File->getMember(&Sym);
   if (auto EC = MBRefOrErr.getError())

Modified: lld/trunk/COFF/Symbols.h
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/COFF/Symbols.h?rev=239332&r1=239331&r2=239332&view=diff
==============================================================================
--- lld/trunk/COFF/Symbols.h (original)
+++ lld/trunk/COFF/Symbols.h Mon Jun  8 14:43:59 2015
@@ -23,6 +23,7 @@ namespace lld {
 namespace coff {
 
 using llvm::object::Archive;
+using llvm::object::COFFObjectFile;
 using llvm::object::COFFSymbolRef;
 using llvm::object::coff_import_header;
 
@@ -61,7 +62,7 @@ public:
   virtual bool isExternal() { return true; }
 
   // Returns the symbol name.
-  StringRef getName() { return Name; }
+  virtual StringRef getName() = 0;
 
   // A SymbolBody has a backreference to a Symbol. Originally they are
   // doubly-linked. A backreference will never change. But the pointer
@@ -78,11 +79,10 @@ public:
   virtual int compare(SymbolBody *Other) = 0;
 
 protected:
-  SymbolBody(Kind K, StringRef N) : SymbolKind(K), Name(N) {}
+  SymbolBody(Kind K) : SymbolKind(K) {}
 
 private:
   const Kind SymbolKind;
-  StringRef Name;
   Symbol *Backref = nullptr;
 };
 
@@ -90,7 +90,7 @@ private:
 // etc.
 class Defined : public SymbolBody {
 public:
-  Defined(Kind K, StringRef Name) : SymbolBody(K, Name) {}
+  Defined(Kind K) : SymbolBody(K) {}
 
   static bool classof(const SymbolBody *S) {
     Kind K = S->kind();
@@ -115,13 +115,14 @@ public:
 // Regular defined symbols read from object file symbol tables.
 class DefinedRegular : public Defined {
 public:
-  DefinedRegular(StringRef Name, COFFSymbolRef S, Chunk *C)
-      : Defined(DefinedRegularKind, Name), Sym(S), Data(C) {}
+  DefinedRegular(COFFObjectFile *F, COFFSymbolRef S, Chunk *C)
+      : Defined(DefinedRegularKind), COFFFile(F), Sym(S), Data(C) {}
 
   static bool classof(const SymbolBody *S) {
     return S->kind() == DefinedRegularKind;
   }
 
+  StringRef getName() override;
   uint64_t getRVA() override { return Data->getRVA() + Sym.getValue(); }
   bool isExternal() override { return Sym.isExternal(); }
   void markLive() override { Data->markLive(); }
@@ -133,6 +134,8 @@ public:
   uint32_t getCommonSize() const { return Sym.getValue(); }
 
 private:
+  StringRef Name;
+  COFFObjectFile *COFFFile;
   COFFSymbolRef Sym;
   Chunk *Data;
 };
@@ -140,17 +143,19 @@ private:
 // Absolute symbols.
 class DefinedAbsolute : public Defined {
 public:
-  DefinedAbsolute(StringRef Name, uint64_t VA)
-      : Defined(DefinedAbsoluteKind, Name), RVA(VA - Config->ImageBase) {}
+  DefinedAbsolute(StringRef N, uint64_t VA)
+      : Defined(DefinedAbsoluteKind), Name(N), RVA(VA - Config->ImageBase) {}
 
   static bool classof(const SymbolBody *S) {
     return S->kind() == DefinedAbsoluteKind;
   }
 
+  StringRef getName() override { return Name; }
   uint64_t getRVA() override { return RVA; }
   uint64_t getFileOff() override { llvm_unreachable("internal error"); }
 
 private:
+  StringRef Name;
   uint64_t RVA;
 };
 
@@ -162,9 +167,10 @@ private:
 class Lazy : public SymbolBody {
 public:
   Lazy(ArchiveFile *F, const Archive::Symbol S)
-      : SymbolBody(LazyKind, S.getName()), File(F), Sym(S) {}
+      : SymbolBody(LazyKind), Name(S.getName()), File(F), Sym(S) {}
 
   static bool classof(const SymbolBody *S) { return S->kind() == LazyKind; }
+  StringRef getName() override { return Name; }
 
   // Returns an object file for this symbol, or a nullptr if the file
   // was already returned.
@@ -173,6 +179,7 @@ public:
   int compare(SymbolBody *Other) override;
 
 private:
+  StringRef Name;
   ArchiveFile *File;
   const Archive::Symbol Sym;
 };
@@ -180,12 +187,13 @@ private:
 // Undefined symbols.
 class Undefined : public SymbolBody {
 public:
-  explicit Undefined(StringRef Name, SymbolBody **S = nullptr)
-      : SymbolBody(UndefinedKind, Name), Alias(S) {}
+  explicit Undefined(StringRef N, SymbolBody **S = nullptr)
+      : SymbolBody(UndefinedKind), Name(N), Alias(S) {}
 
   static bool classof(const SymbolBody *S) {
     return S->kind() == UndefinedKind;
   }
+  StringRef getName() override { return Name; }
 
   // An undefined symbol can have a fallback symbol which gives an
   // undefined symbol a second chance if it would remain undefined.
@@ -196,6 +204,7 @@ public:
   int compare(SymbolBody *Other) override;
 
 private:
+  StringRef Name;
   SymbolBody **Alias;
 };
 
@@ -207,15 +216,16 @@ private:
 // table in an output. The former has "__imp_" prefix.
 class DefinedImportData : public Defined {
 public:
-  DefinedImportData(StringRef D, StringRef Name, StringRef E,
+  DefinedImportData(StringRef D, StringRef N, StringRef E,
                     const coff_import_header *H)
-      : Defined(DefinedImportDataKind, Name), DLLName(D),
-        ExternalName(E), Hdr(H) {}
+      : Defined(DefinedImportDataKind), Name(N), DLLName(D), ExternalName(E),
+        Hdr(H) {}
 
   static bool classof(const SymbolBody *S) {
     return S->kind() == DefinedImportDataKind;
   }
 
+  StringRef getName() override { return Name; }
   uint64_t getRVA() override { return Location->getRVA(); }
   uint64_t getFileOff() override { return Location->getFileOff(); }
   StringRef getDLLName() { return DLLName; }
@@ -224,6 +234,7 @@ public:
   uint16_t getOrdinal() { return Hdr->OrdinalHint; }
 
 private:
+  StringRef Name;
   StringRef DLLName;
   StringRef ExternalName;
   const coff_import_header *Hdr;
@@ -237,31 +248,37 @@ private:
 // a regular name. A function pointer is given as a DefinedImportData.
 class DefinedImportThunk : public Defined {
 public:
-  DefinedImportThunk(StringRef Name, DefinedImportData *S)
-      : Defined(DefinedImportThunkKind, Name), Data(S) {}
+  DefinedImportThunk(StringRef N, DefinedImportData *S)
+      : Defined(DefinedImportThunkKind), Name(N), Data(S) {}
 
   static bool classof(const SymbolBody *S) {
     return S->kind() == DefinedImportThunkKind;
   }
 
+  StringRef getName() override { return Name; }
   uint64_t getRVA() override { return Data.getRVA(); }
   uint64_t getFileOff() override { return Data.getFileOff(); }
   Chunk *getChunk() { return &Data; }
 
 private:
+  StringRef Name;
   ImportThunkChunk Data;
 };
 
 class DefinedBitcode : public Defined {
 public:
-  DefinedBitcode(StringRef Name) : Defined(DefinedBitcodeKind, Name) {}
+  DefinedBitcode(StringRef N) : Defined(DefinedBitcodeKind), Name(N) {}
 
   static bool classof(const SymbolBody *S) {
     return S->kind() == DefinedBitcodeKind;
   }
 
+  StringRef getName() override { return Name; }
   uint64_t getRVA() override { llvm_unreachable("bitcode reached writer"); }
   uint64_t getFileOff() override { llvm_unreachable("bitcode reached writer"); }
+
+private:
+  StringRef Name;
 };
 
 } // namespace coff





More information about the llvm-commits mailing list