[PATCH] D27037: Introduce StringRefZ class to represent null-terminated string.

Rafael Avila de Espindola via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 28 22:35:25 PST 2016


The code changes are fine, I just wonder how we would handle the error
condition of a broken file that is missing a null terminator.

I guess we would use the "requires null terminator" option of
MemoryBuffer.

I will try to create a testcase to see if this is a realistic concern.


Rui Ueyama <ruiu at google.com> writes:

> ruiu updated this revision to Diff 79036.
> ruiu added a comment.
>
> - Inline a few functions.
>
>
> https://reviews.llvm.org/D27037
>
> Files:
>   ELF/InputFiles.cpp
>   ELF/LTO.cpp
>   ELF/Strings.cpp
>   ELF/Strings.h
>   ELF/SymbolTable.cpp
>   ELF/SymbolTable.h
>   ELF/Symbols.cpp
>   ELF/Symbols.h
>
> Index: ELF/Symbols.h
> ===================================================================
> --- ELF/Symbols.h
> +++ ELF/Symbols.h
> @@ -16,6 +16,7 @@
>  #define LLD_ELF_SYMBOLS_H
>  
>  #include "InputSection.h"
> +#include "Strings.h"
>  
>  #include "lld/Core/LLVM.h"
>  #include "llvm/Object/Archive.h"
> @@ -28,7 +29,6 @@
>  class BitcodeFile;
>  class InputFile;
>  class LazyObjectFile;
> -class SymbolBody;
>  template <class ELFT> class ObjectFile;
>  template <class ELFT> class OutputSection;
>  class OutputSectionBase;
> @@ -69,7 +69,7 @@
>    bool isShared() const { return SymbolKind == SharedKind; }
>    bool isLocal() const { return IsLocal; }
>    bool isPreemptible() const;
> -  StringRef getName() const;
> +  StringRef getName() const { return Name; }
>    uint8_t getVisibility() const { return StOther & 0x3; }
>    void parseSymbolVersion();
>  
> @@ -98,8 +98,8 @@
>    uint32_t GlobalDynIndex = -1;
>  
>  protected:
> -  SymbolBody(Kind K, StringRef Name, uint8_t StOther, uint8_t Type);
> -  SymbolBody(Kind K, const char *Name, uint8_t StOther, uint8_t Type);
> +  SymbolBody(Kind K, StringRefZ Name, bool IsLocal, uint8_t StOther,
> +             uint8_t Type);
>  
>    const unsigned SymbolKind : 8;
>  
> @@ -136,17 +136,13 @@
>    bool isFile() const { return Type == llvm::ELF::STT_FILE; }
>  
>  protected:
> -  // Local symbols are not inserted to the symbol table, so we usually
> -  // don't need their names at all. We read symbol names lazily if possible.
> -  mutable uint32_t NameLen = (uint32_t)-1;
> -  const char *Name;
> +  StringRefZ Name;
>  };
>  
>  // The base class for any defined symbols.
>  class Defined : public SymbolBody {
>  public:
> -  Defined(Kind K, StringRef Name, uint8_t StOther, uint8_t Type);
> -  Defined(Kind K, const char *Name, uint8_t StOther, uint8_t Type);
> +  Defined(Kind K, StringRefZ Name, bool IsLocal, uint8_t StOther, uint8_t Type);
>    static bool classof(const SymbolBody *S) { return S->isDefined(); }
>  };
>  
> @@ -175,25 +171,15 @@
>    typedef typename ELFT::uint uintX_t;
>  
>  public:
> -  DefinedRegular(StringRef Name, uint8_t StOther, uint8_t Type, uintX_t Value,
> -                 uintX_t Size, InputSectionBase<ELFT> *Section, InputFile *File)
> -      : Defined(SymbolBody::DefinedRegularKind, Name, StOther, Type),
> +  DefinedRegular(StringRefZ Name, bool IsLocal, uint8_t StOther, uint8_t Type,
> +                 uintX_t Value, uintX_t Size, InputSectionBase<ELFT> *Section,
> +                 InputFile *File)
> +      : Defined(SymbolBody::DefinedRegularKind, Name, IsLocal, StOther, Type),
>          Value(Value), Size(Size),
>          Section(Section ? Section->Repl : NullInputSection) {
>      this->File = File;
>    }
>  
> -  DefinedRegular(const char *Name, const Elf_Sym &Sym,
> -                 InputSectionBase<ELFT> *Section)
> -      : Defined(SymbolBody::DefinedRegularKind, Name, Sym.st_other,
> -                Sym.getType()),
> -        Value(Sym.st_value), Size(Sym.st_size),
> -        Section(Section ? Section->Repl : NullInputSection) {
> -    assert(isLocal());
> -    if (Section)
> -      this->File = Section->getFile();
> -  }
> -
>    // Return true if the symbol is a PIC function.
>    bool isMipsPIC() const;
>  
> @@ -248,8 +234,8 @@
>  
>  class Undefined : public SymbolBody {
>  public:
> -  Undefined(StringRef Name, uint8_t StOther, uint8_t Type, InputFile *F);
> -  Undefined(const char *Name, uint8_t StOther, uint8_t Type, InputFile *F);
> +  Undefined(StringRefZ Name, bool IsLocal, uint8_t StOther, uint8_t Type,
> +            InputFile *F);
>  
>    static bool classof(const SymbolBody *S) {
>      return S->kind() == UndefinedKind;
> @@ -270,7 +256,8 @@
>  
>    SharedSymbol(SharedFile<ELFT> *F, StringRef Name, const Elf_Sym &Sym,
>                 const Elf_Verdef *Verdef)
> -      : Defined(SymbolBody::SharedKind, Name, Sym.st_other, Sym.getType()),
> +      : Defined(SymbolBody::SharedKind, Name, /*IsLocal=*/false, Sym.st_other,
> +                Sym.getType()),
>          Sym(Sym), Verdef(Verdef) {
>      // IFuncs defined in DSOs are treated as functions by the static linker.
>      if (isGnuIFunc())
> @@ -309,7 +296,7 @@
>  
>  protected:
>    Lazy(SymbolBody::Kind K, StringRef Name, uint8_t Type)
> -      : SymbolBody(K, Name, llvm::ELF::STV_DEFAULT, Type) {}
> +      : SymbolBody(K, Name, /*IsLocal=*/false, llvm::ELF::STV_DEFAULT, Type) {}
>  };
>  
>  // LazyArchive symbols represents symbols in archive files.
> Index: ELF/Symbols.cpp
> ===================================================================
> --- ELF/Symbols.cpp
> +++ ELF/Symbols.cpp
> @@ -91,22 +91,12 @@
>    llvm_unreachable("invalid symbol kind");
>  }
>  
> -SymbolBody::SymbolBody(Kind K, const char *Name, uint8_t StOther, uint8_t Type)
> -    : SymbolKind(K), NeedsCopyOrPltAddr(false), IsLocal(true),
> +SymbolBody::SymbolBody(Kind K, StringRefZ Name, bool IsLocal, uint8_t StOther,
> +                       uint8_t Type)
> +    : SymbolKind(K), NeedsCopyOrPltAddr(false), IsLocal(IsLocal),
>        IsInGlobalMipsGot(false), Is32BitMipsGot(false), Type(Type),
>        StOther(StOther), Name(Name) {}
>  
> -SymbolBody::SymbolBody(Kind K, StringRef Name, uint8_t StOther, uint8_t Type)
> -    : SymbolKind(K), NeedsCopyOrPltAddr(false), IsLocal(false),
> -      IsInGlobalMipsGot(false), Is32BitMipsGot(false), Type(Type),
> -      StOther(StOther), NameLen(Name.size()), Name(Name.data()) {}
> -
> -StringRef SymbolBody::getName() const {
> -  if (NameLen == (uint32_t)-1)
> -    NameLen = strlen(Name);
> -  return StringRef(Name, NameLen);
> -}
> -
>  // Returns true if a symbol can be replaced at load-time by a symbol
>  // with the same name defined in other ELF executable or DSO.
>  bool SymbolBody::isPreemptible() const {
> @@ -202,7 +192,7 @@
>      return;
>  
>    // Truncate the symbol name so that it doesn't include the version string.
> -  NameLen = Pos;
> +  Name = {S.data(), Pos};
>  
>    // '@@' in a symbol name means the default version.
>    // It is usually the most recent one.
> @@ -225,40 +215,34 @@
>    error("symbol " + S + " has undefined version " + Verstr);
>  }
>  
> -Defined::Defined(Kind K, StringRef Name, uint8_t StOther, uint8_t Type)
> -    : SymbolBody(K, Name, StOther, Type) {}
> -
> -Defined::Defined(Kind K, const char *Name, uint8_t StOther, uint8_t Type)
> -    : SymbolBody(K, Name, StOther, Type) {}
> +Defined::Defined(Kind K, StringRefZ Name, bool IsLocal, uint8_t StOther,
> +                 uint8_t Type)
> +    : SymbolBody(K, Name, IsLocal, StOther, Type) {}
>  
>  template <class ELFT> bool DefinedRegular<ELFT>::isMipsPIC() const {
>    if (!Section || !isFunc())
>      return false;
>    return (this->StOther & STO_MIPS_MIPS16) == STO_MIPS_PIC ||
>           (Section->getFile()->getObj().getHeader()->e_flags & EF_MIPS_PIC);
>  }
>  
> -Undefined::Undefined(StringRef Name, uint8_t StOther, uint8_t Type,
> -                     InputFile *File)
> -    : SymbolBody(SymbolBody::UndefinedKind, Name, StOther, Type) {
> -  this->File = File;
> -}
> -
> -Undefined::Undefined(const char *Name, uint8_t StOther, uint8_t Type,
> -                     InputFile *File)
> -    : SymbolBody(SymbolBody::UndefinedKind, Name, StOther, Type) {
> +Undefined::Undefined(StringRefZ Name, bool IsLocal, uint8_t StOther,
> +                     uint8_t Type, InputFile *File)
> +    : SymbolBody(SymbolBody::UndefinedKind, Name, IsLocal, StOther, Type) {
>    this->File = File;
>  }
>  
>  template <typename ELFT>
> -DefinedSynthetic<ELFT>::DefinedSynthetic(StringRef N, uintX_t Value,
> +DefinedSynthetic<ELFT>::DefinedSynthetic(StringRef Name, uintX_t Value,
>                                           const OutputSectionBase *Section)
> -    : Defined(SymbolBody::DefinedSyntheticKind, N, STV_HIDDEN, 0 /* Type */),
> +    : Defined(SymbolBody::DefinedSyntheticKind, Name, /*IsLocal=*/false,
> +              STV_HIDDEN, 0 /* Type */),
>        Value(Value), Section(Section) {}
>  
> -DefinedCommon::DefinedCommon(StringRef N, uint64_t Size, uint64_t Alignment,
> +DefinedCommon::DefinedCommon(StringRef Name, uint64_t Size, uint64_t Alignment,
>                               uint8_t StOther, uint8_t Type, InputFile *File)
> -    : Defined(SymbolBody::DefinedCommonKind, N, StOther, Type),
> +    : Defined(SymbolBody::DefinedCommonKind, Name, /*IsLocal=*/false, StOther,
> +              Type),
>        Alignment(Alignment), Size(Size) {
>    this->File = File;
>  }
> Index: ELF/SymbolTable.h
> ===================================================================
> --- ELF/SymbolTable.h
> +++ ELF/SymbolTable.h
> @@ -55,8 +55,9 @@
>                                     uint8_t Visibility = llvm::ELF::STV_HIDDEN);
>  
>    Symbol *addUndefined(StringRef Name);
> -  Symbol *addUndefined(StringRef Name, uint8_t Binding, uint8_t StOther,
> -                       uint8_t Type, bool CanOmitFromDynSym, InputFile *File);
> +  Symbol *addUndefined(StringRef Name, bool IsLocal, uint8_t Binding,
> +                       uint8_t StOther, uint8_t Type, bool CanOmitFromDynSym,
> +                       InputFile *File);
>  
>    Symbol *addRegular(StringRef Name, uint8_t StOther, uint8_t Type,
>                       uintX_t Value, uintX_t Size, uint8_t Binding,
> Index: ELF/SymbolTable.cpp
> ===================================================================
> --- ELF/SymbolTable.cpp
> +++ ELF/SymbolTable.cpp
> @@ -235,22 +235,23 @@
>  }
>  
>  template <class ELFT> Symbol *SymbolTable<ELFT>::addUndefined(StringRef Name) {
> -  return addUndefined(Name, STB_GLOBAL, STV_DEFAULT, /*Type*/ 0,
> +  return addUndefined(Name, /*IsLocal=*/false, STB_GLOBAL, STV_DEFAULT,
> +                      /*Type*/ 0,
>                        /*CanOmitFromDynSym*/ false, /*File*/ nullptr);
>  }
>  
>  template <class ELFT>
> -Symbol *SymbolTable<ELFT>::addUndefined(StringRef Name, uint8_t Binding,
> -                                        uint8_t StOther, uint8_t Type,
> -                                        bool CanOmitFromDynSym,
> +Symbol *SymbolTable<ELFT>::addUndefined(StringRef Name, bool IsLocal,
> +                                        uint8_t Binding, uint8_t StOther,
> +                                        uint8_t Type, bool CanOmitFromDynSym,
>                                          InputFile *File) {
>    Symbol *S;
>    bool WasInserted;
>    std::tie(S, WasInserted) =
>        insert(Name, Type, StOther & 3, CanOmitFromDynSym, File);
>    if (WasInserted) {
>      S->Binding = Binding;
> -    replaceBody<Undefined>(S, Name, StOther, Type, File);
> +    replaceBody<Undefined>(S, Name, IsLocal, StOther, Type, File);
>      return S;
>    }
>    if (Binding != STB_WEAK) {
> @@ -378,8 +379,8 @@
>                                      /*CanOmitFromDynSym*/ false, File);
>    int Cmp = compareDefinedNonCommon(S, WasInserted, Binding);
>    if (Cmp > 0)
> -    replaceBody<DefinedRegular<ELFT>>(S, Name, StOther, Type, Value, Size,
> -                                      Section, File);
> +    replaceBody<DefinedRegular<ELFT>>(S, Name, /*IsLocal=*/false, StOther, Type,
> +                                      Value, Size, Section, File);
>    else if (Cmp == 0)
>      reportDuplicate(S->body(), Section, Value);
>    return S;
> @@ -432,7 +433,8 @@
>        insert(Name, Type, StOther & 3, CanOmitFromDynSym, F);
>    int Cmp = compareDefinedNonCommon(S, WasInserted, Binding);
>    if (Cmp > 0)
> -    replaceBody<DefinedRegular<ELFT>>(S, Name, StOther, Type, 0, 0, nullptr, F);
> +    replaceBody<DefinedRegular<ELFT>>(S, Name, /*IsLocal=*/false, StOther, Type,
> +                                      0, 0, nullptr, F);
>    else if (Cmp == 0)
>      reportDuplicate(S->body(), F);
>    return S;
> Index: ELF/Strings.h
> ===================================================================
> --- ELF/Strings.h
> +++ ELF/Strings.h
> @@ -26,6 +26,36 @@
>  bool isValidCIdentifier(StringRef S);
>  StringRef unquote(StringRef S);
>  
> +// This is a lazy version of StringRef. String size is computed lazily
> +// when it is needed. It is more efficient than StringRef if you have
> +// a string whose size is unknown but could be very long.
> +//
> +// ELF string tables contain a lot of null-terminated strings.
> +// Most of them are not necessary for the linker because they are names
> +// of local symbols and the linker doesn't use local symbol names for
> +// name resolution. So, we use this class to represents strings read
> +// from string tables.
> +class StringRefZ {
> +public:
> +  StringRefZ() : Start(nullptr), Size(0) {}
> +  StringRefZ(const char *S, size_t Size) : Start(S), Size(Size) {}
> +
> +  /*implicit*/ StringRefZ(const char *S) : Start(S), Size(-1) {}
> +
> +  /*implicit*/ StringRefZ(llvm::StringRef S)
> +      : Start(S.data()), Size(S.size()) {}
> +
> +  operator llvm::StringRef() const {
> +    if (Size == (size_t)-1)
> +      Size = strlen(Start);
> +    return {Start, Size};
> +  }
> +
> +private:
> +  const char *Start;
> +  mutable size_t Size;
> +};
> +
>  // This class represents a glob pattern. Supported metacharacters
>  // are "*", "?", "[<chars>]" and "[^<chars>]".
>  class GlobPattern {
> Index: ELF/Strings.cpp
> ===================================================================
> --- ELF/Strings.cpp
> +++ ELF/Strings.cpp
> @@ -16,6 +16,7 @@
>  #include "llvm/Config/config.h"
>  #include "llvm/Demangle/Demangle.h"
>  #include <algorithm>
> +#include <cstring>
>  
>  using namespace llvm;
>  using namespace lld;
> Index: ELF/LTO.cpp
> ===================================================================
> --- ELF/LTO.cpp
> +++ ELF/LTO.cpp
> @@ -97,8 +97,8 @@
>  BitcodeCompiler::~BitcodeCompiler() = default;
>  
>  static void undefine(Symbol *S) {
> -  replaceBody<Undefined>(S, S->body()->getName(), STV_DEFAULT, S->body()->Type,
> -                         nullptr);
> +  replaceBody<Undefined>(S, S->body()->getName(), /*IsLocal=*/false,
> +                         STV_DEFAULT, S->body()->Type, nullptr);
>  }
>  
>  void BitcodeCompiler::add(BitcodeFile &F) {
> Index: ELF/InputFiles.cpp
> ===================================================================
> --- ELF/InputFiles.cpp
> +++ ELF/InputFiles.cpp
> @@ -439,6 +439,11 @@
>    int Binding = Sym->getBinding();
>    InputSectionBase<ELFT> *Sec = getSection(*Sym);
>  
> +  uint8_t StOther = Sym->st_other;
> +  uint8_t Type = Sym->getType();
> +  uintX_t Value = Sym->st_value;
> +  uintX_t Size = Sym->st_size;
> +
>    if (Binding == STB_LOCAL) {
>      if (Sym->getType() == STT_FILE)
>        SourceFile = check(Sym->getName(this->StringTable));
> @@ -448,20 +453,19 @@
>  
>      const char *Name = this->StringTable.data() + Sym->st_name;
>      if (Sym->st_shndx == SHN_UNDEF)
> -      return new (BAlloc) Undefined(Name, Sym->st_other, Sym->getType(), this);
> -    return new (BAlloc) DefinedRegular<ELFT>(Name, *Sym, Sec);
> +      return new (BAlloc)
> +          Undefined(Name, /*IsLocal=*/true, StOther, Type, this);
> +
> +    return new (BAlloc) DefinedRegular<ELFT>(Name, /*IsLocal=*/true, StOther,
> +                                             Type, Value, Size, Sec, this);
>    }
>  
>    StringRef Name = check(Sym->getName(this->StringTable));
> -  uint8_t StOther = Sym->st_other;
> -  uint8_t Type = Sym->getType();
> -  uintX_t Value = Sym->st_value;
> -  uintX_t Size = Sym->st_size;
>  
>    switch (Sym->st_shndx) {
>    case SHN_UNDEF:
>      return elf::Symtab<ELFT>::X
> -        ->addUndefined(Name, Binding, StOther, Type,
> +        ->addUndefined(Name, /*IsLocal=*/false, Binding, StOther, Type,
>                         /*CanOmitFromDynSym=*/false, this)
>          ->body();
>    case SHN_COMMON:
> @@ -481,7 +485,7 @@
>    case STB_GNU_UNIQUE:
>      if (Sec == &InputSection<ELFT>::Discarded)
>        return elf::Symtab<ELFT>::X
> -          ->addUndefined(Name, Binding, StOther, Type,
> +          ->addUndefined(Name, /*IsLocal=*/false, Binding, StOther, Type,
>                           /*CanOmitFromDynSym=*/false, this)
>            ->body();
>      return elf::Symtab<ELFT>::X
> @@ -726,12 +730,14 @@
>  
>    int C = check(ObjSym.getComdatIndex());
>    if (C != -1 && !KeptComdats[C])
> -    return Symtab<ELFT>::X->addUndefined(NameRef, Binding, Visibility, Type,
> -                                         CanOmitFromDynSym, F);
> +    return Symtab<ELFT>::X->addUndefined(NameRef, /*IsLocal=*/false, Binding,
> +                                         Visibility, Type, CanOmitFromDynSym,
> +                                         F);
>  
>    if (Flags & BasicSymbolRef::SF_Undefined)
> -    return Symtab<ELFT>::X->addUndefined(NameRef, Binding, Visibility, Type,
> -                                         CanOmitFromDynSym, F);
> +    return Symtab<ELFT>::X->addUndefined(NameRef, /*IsLocal=*/false, Binding,
> +                                         Visibility, Type, CanOmitFromDynSym,
> +                                         F);
>  
>    if (Flags & BasicSymbolRef::SF_Common)
>      return Symtab<ELFT>::X->addCommon(NameRef, ObjSym.getCommonSize(),


More information about the llvm-commits mailing list