[lld] r288172 - Introduce StringRefZ class to represent null-terminated strings.

Rui Ueyama via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 29 10:05:04 PST 2016


Author: ruiu
Date: Tue Nov 29 12:05:04 2016
New Revision: 288172

URL: http://llvm.org/viewvc/llvm-project?rev=288172&view=rev
Log:
Introduce StringRefZ class to represent null-terminated strings.

StringRefZ is a class to represent a null-terminated string. String
length is computed lazily, so it's more efficient than StringRef to
represent strings in string table.

The motivation of defining this new class is to merge functions
that only differ in string types; we have many constructors that takes
`const char *` or `StringRef`. With StringRefZ, we can merge them.

Differential Revision: https://reviews.llvm.org/D27037

Modified:
    lld/trunk/ELF/InputFiles.cpp
    lld/trunk/ELF/LTO.cpp
    lld/trunk/ELF/Strings.cpp
    lld/trunk/ELF/Strings.h
    lld/trunk/ELF/SymbolTable.cpp
    lld/trunk/ELF/SymbolTable.h
    lld/trunk/ELF/Symbols.cpp
    lld/trunk/ELF/Symbols.h

Modified: lld/trunk/ELF/InputFiles.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/InputFiles.cpp?rev=288172&r1=288171&r2=288172&view=diff
==============================================================================
--- lld/trunk/ELF/InputFiles.cpp (original)
+++ lld/trunk/ELF/InputFiles.cpp Tue Nov 29 12:05:04 2016
@@ -438,6 +438,11 @@ SymbolBody *elf::ObjectFile<ELFT>::creat
   int Binding = Sym->getBinding();
   InputSectionBase<ELFT> *Sec = getSection(*Sym);
 
+  uint8_t StOther = Sym->st_other;
+  uint8_t Type = Sym->getType();
+  uintX_t Value = Sym->st_value;
+  uintX_t Size = Sym->st_size;
+
   if (Binding == STB_LOCAL) {
     if (Sym->getType() == STT_FILE)
       SourceFile = check(Sym->getName(this->StringTable));
@@ -447,20 +452,19 @@ SymbolBody *elf::ObjectFile<ELFT>::creat
 
     const char *Name = this->StringTable.data() + Sym->st_name;
     if (Sym->st_shndx == SHN_UNDEF)
-      return new (BAlloc) Undefined(Name, Sym->st_other, Sym->getType(), this);
-    return new (BAlloc) DefinedRegular<ELFT>(Name, *Sym, Sec);
+      return new (BAlloc)
+          Undefined(Name, /*IsLocal=*/true, StOther, Type, this);
+
+    return new (BAlloc) DefinedRegular<ELFT>(Name, /*IsLocal=*/true, StOther,
+                                             Type, Value, Size, Sec, this);
   }
 
   StringRef Name = check(Sym->getName(this->StringTable));
-  uint8_t StOther = Sym->st_other;
-  uint8_t Type = Sym->getType();
-  uintX_t Value = Sym->st_value;
-  uintX_t Size = Sym->st_size;
 
   switch (Sym->st_shndx) {
   case SHN_UNDEF:
     return elf::Symtab<ELFT>::X
-        ->addUndefined(Name, Binding, StOther, Type,
+        ->addUndefined(Name, /*IsLocal=*/false, Binding, StOther, Type,
                        /*CanOmitFromDynSym=*/false, this)
         ->body();
   case SHN_COMMON:
@@ -480,7 +484,7 @@ SymbolBody *elf::ObjectFile<ELFT>::creat
   case STB_GNU_UNIQUE:
     if (Sec == &InputSection<ELFT>::Discarded)
       return elf::Symtab<ELFT>::X
-          ->addUndefined(Name, Binding, StOther, Type,
+          ->addUndefined(Name, /*IsLocal=*/false, Binding, StOther, Type,
                          /*CanOmitFromDynSym=*/false, this)
           ->body();
     return elf::Symtab<ELFT>::X
@@ -723,12 +727,14 @@ static Symbol *createBitcodeSymbol(const
 
   int C = check(ObjSym.getComdatIndex());
   if (C != -1 && !KeptComdats[C])
-    return Symtab<ELFT>::X->addUndefined(NameRef, Binding, Visibility, Type,
-                                         CanOmitFromDynSym, F);
+    return Symtab<ELFT>::X->addUndefined(NameRef, /*IsLocal=*/false, Binding,
+                                         Visibility, Type, CanOmitFromDynSym,
+                                         F);
 
   if (Flags & BasicSymbolRef::SF_Undefined)
-    return Symtab<ELFT>::X->addUndefined(NameRef, Binding, Visibility, Type,
-                                         CanOmitFromDynSym, F);
+    return Symtab<ELFT>::X->addUndefined(NameRef, /*IsLocal=*/false, Binding,
+                                         Visibility, Type, CanOmitFromDynSym,
+                                         F);
 
   if (Flags & BasicSymbolRef::SF_Common)
     return Symtab<ELFT>::X->addCommon(NameRef, ObjSym.getCommonSize(),

Modified: lld/trunk/ELF/LTO.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/LTO.cpp?rev=288172&r1=288171&r2=288172&view=diff
==============================================================================
--- lld/trunk/ELF/LTO.cpp (original)
+++ lld/trunk/ELF/LTO.cpp Tue Nov 29 12:05:04 2016
@@ -97,8 +97,8 @@ BitcodeCompiler::BitcodeCompiler() : LTO
 BitcodeCompiler::~BitcodeCompiler() = default;
 
 static void undefine(Symbol *S) {
-  replaceBody<Undefined>(S, S->body()->getName(), STV_DEFAULT, S->body()->Type,
-                         nullptr);
+  replaceBody<Undefined>(S, S->body()->getName(), /*IsLocal=*/false,
+                         STV_DEFAULT, S->body()->Type, nullptr);
 }
 
 void BitcodeCompiler::add(BitcodeFile &F) {

Modified: lld/trunk/ELF/Strings.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/Strings.cpp?rev=288172&r1=288171&r2=288172&view=diff
==============================================================================
--- lld/trunk/ELF/Strings.cpp (original)
+++ lld/trunk/ELF/Strings.cpp Tue Nov 29 12:05:04 2016
@@ -16,6 +16,7 @@
 #include "llvm/Config/config.h"
 #include "llvm/Demangle/Demangle.h"
 #include <algorithm>
+#include <cstring>
 
 using namespace llvm;
 using namespace lld;

Modified: lld/trunk/ELF/Strings.h
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/Strings.h?rev=288172&r1=288171&r2=288172&view=diff
==============================================================================
--- lld/trunk/ELF/Strings.h (original)
+++ lld/trunk/ELF/Strings.h Tue Nov 29 12:05:04 2016
@@ -26,6 +26,36 @@ std::vector<uint8_t> parseHex(StringRef
 bool isValidCIdentifier(StringRef S);
 StringRef unquote(StringRef S);
 
+// This is a lazy version of StringRef. String size is computed lazily
+// when it is needed. It is more efficient than StringRef to instantiate
+// if you have a string whose size is unknown.
+//
+// ELF string tables contain a lot of null-terminated strings.
+// Most of them are not necessary for the linker because they are names
+// of local symbols and the linker doesn't use local symbol names for
+// name resolution. So, we use this class to represents strings read
+// from string tables.
+class StringRefZ {
+public:
+  StringRefZ() : Start(nullptr), Size(0) {}
+  StringRefZ(const char *S, size_t Size) : Start(S), Size(Size) {}
+
+  /*implicit*/ StringRefZ(const char *S) : Start(S), Size(-1) {}
+
+  /*implicit*/ StringRefZ(llvm::StringRef S)
+      : Start(S.data()), Size(S.size()) {}
+
+  operator llvm::StringRef() const {
+    if (Size == (size_t)-1)
+      Size = strlen(Start);
+    return {Start, Size};
+  }
+
+private:
+  const char *Start;
+  mutable size_t Size;
+};
+
 // This class represents a glob pattern. Supported metacharacters
 // are "*", "?", "[<chars>]" and "[^<chars>]".
 class GlobPattern {

Modified: lld/trunk/ELF/SymbolTable.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/SymbolTable.cpp?rev=288172&r1=288171&r2=288172&view=diff
==============================================================================
--- lld/trunk/ELF/SymbolTable.cpp (original)
+++ lld/trunk/ELF/SymbolTable.cpp Tue Nov 29 12:05:04 2016
@@ -236,14 +236,15 @@ SymbolTable<ELFT>::insert(StringRef Name
 }
 
 template <class ELFT> Symbol *SymbolTable<ELFT>::addUndefined(StringRef Name) {
-  return addUndefined(Name, STB_GLOBAL, STV_DEFAULT, /*Type*/ 0,
+  return addUndefined(Name, /*IsLocal=*/false, STB_GLOBAL, STV_DEFAULT,
+                      /*Type*/ 0,
                       /*CanOmitFromDynSym*/ false, /*File*/ nullptr);
 }
 
 template <class ELFT>
-Symbol *SymbolTable<ELFT>::addUndefined(StringRef Name, uint8_t Binding,
-                                        uint8_t StOther, uint8_t Type,
-                                        bool CanOmitFromDynSym,
+Symbol *SymbolTable<ELFT>::addUndefined(StringRef Name, bool IsLocal,
+                                        uint8_t Binding, uint8_t StOther,
+                                        uint8_t Type, bool CanOmitFromDynSym,
                                         InputFile *File) {
   Symbol *S;
   bool WasInserted;
@@ -251,7 +252,7 @@ Symbol *SymbolTable<ELFT>::addUndefined(
       insert(Name, Type, StOther & 3, CanOmitFromDynSym, File);
   if (WasInserted) {
     S->Binding = Binding;
-    replaceBody<Undefined>(S, Name, StOther, Type, File);
+    replaceBody<Undefined>(S, Name, IsLocal, StOther, Type, File);
     return S;
   }
   if (Binding != STB_WEAK) {
@@ -378,8 +379,8 @@ Symbol *SymbolTable<ELFT>::addRegular(St
                                     /*CanOmitFromDynSym*/ false, File);
   int Cmp = compareDefinedNonCommon(S, WasInserted, Binding);
   if (Cmp > 0)
-    replaceBody<DefinedRegular<ELFT>>(S, Name, StOther, Type, Value, Size,
-                                      Section, File);
+    replaceBody<DefinedRegular<ELFT>>(S, Name, /*IsLocal=*/false, StOther, Type,
+                                      Value, Size, Section, File);
   else if (Cmp == 0)
     reportDuplicate(S->body(), Section, Value);
   return S;
@@ -432,7 +433,8 @@ Symbol *SymbolTable<ELFT>::addBitcode(St
       insert(Name, Type, StOther & 3, CanOmitFromDynSym, F);
   int Cmp = compareDefinedNonCommon(S, WasInserted, Binding);
   if (Cmp > 0)
-    replaceBody<DefinedRegular<ELFT>>(S, Name, StOther, Type, 0, 0, nullptr, F);
+    replaceBody<DefinedRegular<ELFT>>(S, Name, /*IsLocal=*/false, StOther, Type,
+                                      0, 0, nullptr, F);
   else if (Cmp == 0)
     reportDuplicate(S->body(), F);
   return S;

Modified: lld/trunk/ELF/SymbolTable.h
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/SymbolTable.h?rev=288172&r1=288171&r2=288172&view=diff
==============================================================================
--- lld/trunk/ELF/SymbolTable.h (original)
+++ lld/trunk/ELF/SymbolTable.h Tue Nov 29 12:05:04 2016
@@ -55,8 +55,9 @@ public:
                                    uint8_t Visibility = llvm::ELF::STV_HIDDEN);
 
   Symbol *addUndefined(StringRef Name);
-  Symbol *addUndefined(StringRef Name, uint8_t Binding, uint8_t StOther,
-                       uint8_t Type, bool CanOmitFromDynSym, InputFile *File);
+  Symbol *addUndefined(StringRef Name, bool IsLocal, uint8_t Binding,
+                       uint8_t StOther, uint8_t Type, bool CanOmitFromDynSym,
+                       InputFile *File);
 
   Symbol *addRegular(StringRef Name, uint8_t StOther, uint8_t Type,
                      uintX_t Value, uintX_t Size, uint8_t Binding,

Modified: lld/trunk/ELF/Symbols.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/Symbols.cpp?rev=288172&r1=288171&r2=288172&view=diff
==============================================================================
--- lld/trunk/ELF/Symbols.cpp (original)
+++ lld/trunk/ELF/Symbols.cpp Tue Nov 29 12:05:04 2016
@@ -92,22 +92,12 @@ static typename ELFT::uint getSymVA(cons
   llvm_unreachable("invalid symbol kind");
 }
 
-SymbolBody::SymbolBody(Kind K, const char *Name, uint8_t StOther, uint8_t Type)
-    : SymbolKind(K), NeedsCopyOrPltAddr(false), IsLocal(true),
+SymbolBody::SymbolBody(Kind K, StringRefZ Name, bool IsLocal, uint8_t StOther,
+                       uint8_t Type)
+    : SymbolKind(K), NeedsCopyOrPltAddr(false), IsLocal(IsLocal),
       IsInGlobalMipsGot(false), Is32BitMipsGot(false), Type(Type),
       StOther(StOther), Name(Name) {}
 
-SymbolBody::SymbolBody(Kind K, StringRef Name, uint8_t StOther, uint8_t Type)
-    : SymbolKind(K), NeedsCopyOrPltAddr(false), IsLocal(false),
-      IsInGlobalMipsGot(false), Is32BitMipsGot(false), Type(Type),
-      StOther(StOther), NameLen(Name.size()), Name(Name.data()) {}
-
-StringRef SymbolBody::getName() const {
-  if (NameLen == (uint32_t)-1)
-    NameLen = strlen(Name);
-  return StringRef(Name, NameLen);
-}
-
 // Returns true if a symbol can be replaced at load-time by a symbol
 // with the same name defined in other ELF executable or DSO.
 bool SymbolBody::isPreemptible() const {
@@ -203,7 +193,7 @@ void SymbolBody::parseSymbolVersion() {
     return;
 
   // Truncate the symbol name so that it doesn't include the version string.
-  NameLen = Pos;
+  Name = {S.data(), Pos};
 
   // '@@' in a symbol name means the default version.
   // It is usually the most recent one.
@@ -226,11 +216,9 @@ void SymbolBody::parseSymbolVersion() {
   error("symbol " + S + " has undefined version " + Verstr);
 }
 
-Defined::Defined(Kind K, StringRef Name, uint8_t StOther, uint8_t Type)
-    : SymbolBody(K, Name, StOther, Type) {}
-
-Defined::Defined(Kind K, const char *Name, uint8_t StOther, uint8_t Type)
-    : SymbolBody(K, Name, StOther, Type) {}
+Defined::Defined(Kind K, StringRefZ Name, bool IsLocal, uint8_t StOther,
+                 uint8_t Type)
+    : SymbolBody(K, Name, IsLocal, StOther, Type) {}
 
 template <class ELFT> bool DefinedRegular<ELFT>::isMipsPIC() const {
   if (!Section || !isFunc())
@@ -239,27 +227,23 @@ template <class ELFT> bool DefinedRegula
          (Section->getFile()->getObj().getHeader()->e_flags & EF_MIPS_PIC);
 }
 
-Undefined::Undefined(StringRef Name, uint8_t StOther, uint8_t Type,
-                     InputFile *File)
-    : SymbolBody(SymbolBody::UndefinedKind, Name, StOther, Type) {
-  this->File = File;
-}
-
-Undefined::Undefined(const char *Name, uint8_t StOther, uint8_t Type,
-                     InputFile *File)
-    : SymbolBody(SymbolBody::UndefinedKind, Name, StOther, Type) {
+Undefined::Undefined(StringRefZ Name, bool IsLocal, uint8_t StOther,
+                     uint8_t Type, InputFile *File)
+    : SymbolBody(SymbolBody::UndefinedKind, Name, IsLocal, StOther, Type) {
   this->File = File;
 }
 
 template <typename ELFT>
-DefinedSynthetic<ELFT>::DefinedSynthetic(StringRef N, uintX_t Value,
+DefinedSynthetic<ELFT>::DefinedSynthetic(StringRef Name, uintX_t Value,
                                          const OutputSectionBase *Section)
-    : Defined(SymbolBody::DefinedSyntheticKind, N, STV_HIDDEN, 0 /* Type */),
+    : Defined(SymbolBody::DefinedSyntheticKind, Name, /*IsLocal=*/false,
+              STV_HIDDEN, 0 /* Type */),
       Value(Value), Section(Section) {}
 
-DefinedCommon::DefinedCommon(StringRef N, uint64_t Size, uint64_t Alignment,
+DefinedCommon::DefinedCommon(StringRef Name, uint64_t Size, uint64_t Alignment,
                              uint8_t StOther, uint8_t Type, InputFile *File)
-    : Defined(SymbolBody::DefinedCommonKind, N, StOther, Type),
+    : Defined(SymbolBody::DefinedCommonKind, Name, /*IsLocal=*/false, StOther,
+              Type),
       Alignment(Alignment), Size(Size) {
   this->File = File;
 }

Modified: lld/trunk/ELF/Symbols.h
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/Symbols.h?rev=288172&r1=288171&r2=288172&view=diff
==============================================================================
--- lld/trunk/ELF/Symbols.h (original)
+++ lld/trunk/ELF/Symbols.h Tue Nov 29 12:05:04 2016
@@ -16,6 +16,7 @@
 #define LLD_ELF_SYMBOLS_H
 
 #include "InputSection.h"
+#include "Strings.h"
 
 #include "lld/Core/LLVM.h"
 #include "llvm/Object/Archive.h"
@@ -28,7 +29,6 @@ class ArchiveFile;
 class BitcodeFile;
 class InputFile;
 class LazyObjectFile;
-class SymbolBody;
 template <class ELFT> class ObjectFile;
 template <class ELFT> class OutputSection;
 class OutputSectionBase;
@@ -69,7 +69,7 @@ public:
   bool isShared() const { return SymbolKind == SharedKind; }
   bool isLocal() const { return IsLocal; }
   bool isPreemptible() const;
-  StringRef getName() const;
+  StringRef getName() const { return Name; }
   uint8_t getVisibility() const { return StOther & 0x3; }
   void parseSymbolVersion();
 
@@ -98,8 +98,8 @@ public:
   uint32_t GlobalDynIndex = -1;
 
 protected:
-  SymbolBody(Kind K, StringRef Name, uint8_t StOther, uint8_t Type);
-  SymbolBody(Kind K, const char *Name, uint8_t StOther, uint8_t Type);
+  SymbolBody(Kind K, StringRefZ Name, bool IsLocal, uint8_t StOther,
+             uint8_t Type);
 
   const unsigned SymbolKind : 8;
 
@@ -136,17 +136,13 @@ public:
   bool isFile() const { return Type == llvm::ELF::STT_FILE; }
 
 protected:
-  // Local symbols are not inserted to the symbol table, so we usually
-  // don't need their names at all. We read symbol names lazily if possible.
-  mutable uint32_t NameLen = (uint32_t)-1;
-  const char *Name;
+  StringRefZ Name;
 };
 
 // The base class for any defined symbols.
 class Defined : public SymbolBody {
 public:
-  Defined(Kind K, StringRef Name, uint8_t StOther, uint8_t Type);
-  Defined(Kind K, const char *Name, uint8_t StOther, uint8_t Type);
+  Defined(Kind K, StringRefZ Name, bool IsLocal, uint8_t StOther, uint8_t Type);
   static bool classof(const SymbolBody *S) { return S->isDefined(); }
 };
 
@@ -175,25 +171,15 @@ template <class ELFT> class DefinedRegul
   typedef typename ELFT::uint uintX_t;
 
 public:
-  DefinedRegular(StringRef Name, uint8_t StOther, uint8_t Type, uintX_t Value,
-                 uintX_t Size, InputSectionBase<ELFT> *Section, InputFile *File)
-      : Defined(SymbolBody::DefinedRegularKind, Name, StOther, Type),
+  DefinedRegular(StringRefZ Name, bool IsLocal, uint8_t StOther, uint8_t Type,
+                 uintX_t Value, uintX_t Size, InputSectionBase<ELFT> *Section,
+                 InputFile *File)
+      : Defined(SymbolBody::DefinedRegularKind, Name, IsLocal, StOther, Type),
         Value(Value), Size(Size),
         Section(Section ? Section->Repl : NullInputSection) {
     this->File = File;
   }
 
-  DefinedRegular(const char *Name, const Elf_Sym &Sym,
-                 InputSectionBase<ELFT> *Section)
-      : Defined(SymbolBody::DefinedRegularKind, Name, Sym.st_other,
-                Sym.getType()),
-        Value(Sym.st_value), Size(Sym.st_size),
-        Section(Section ? Section->Repl : NullInputSection) {
-    assert(isLocal());
-    if (Section)
-      this->File = Section->getFile();
-  }
-
   // Return true if the symbol is a PIC function.
   bool isMipsPIC() const;
 
@@ -248,8 +234,8 @@ public:
 
 class Undefined : public SymbolBody {
 public:
-  Undefined(StringRef Name, uint8_t StOther, uint8_t Type, InputFile *F);
-  Undefined(const char *Name, uint8_t StOther, uint8_t Type, InputFile *F);
+  Undefined(StringRefZ Name, bool IsLocal, uint8_t StOther, uint8_t Type,
+            InputFile *F);
 
   static bool classof(const SymbolBody *S) {
     return S->kind() == UndefinedKind;
@@ -270,7 +256,8 @@ public:
 
   SharedSymbol(SharedFile<ELFT> *F, StringRef Name, const Elf_Sym &Sym,
                const Elf_Verdef *Verdef)
-      : Defined(SymbolBody::SharedKind, Name, Sym.st_other, Sym.getType()),
+      : Defined(SymbolBody::SharedKind, Name, /*IsLocal=*/false, Sym.st_other,
+                Sym.getType()),
         Sym(Sym), Verdef(Verdef) {
     // IFuncs defined in DSOs are treated as functions by the static linker.
     if (isGnuIFunc())
@@ -309,7 +296,7 @@ public:
 
 protected:
   Lazy(SymbolBody::Kind K, StringRef Name, uint8_t Type)
-      : SymbolBody(K, Name, llvm::ELF::STV_DEFAULT, Type) {}
+      : SymbolBody(K, Name, /*IsLocal=*/false, llvm::ELF::STV_DEFAULT, Type) {}
 };
 
 // LazyArchive symbols represents symbols in archive files.




More information about the llvm-commits mailing list