[PATCH] D27037: Introduce StringRefZ class to represent null-terminated string.
Rafael Avila de Espindola via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 28 22:35:25 PST 2016
The code changes are fine, I just wonder how we would handle the error
condition of a broken file that is missing a null terminator.
I guess we would use the "requires null terminator" option of
MemoryBuffer.
I will try to create a testcase to see if this is a realistic concern.
Rui Ueyama <ruiu at google.com> writes:
> ruiu updated this revision to Diff 79036.
> ruiu added a comment.
>
> - Inline a few functions.
>
>
> https://reviews.llvm.org/D27037
>
> Files:
> ELF/InputFiles.cpp
> ELF/LTO.cpp
> ELF/Strings.cpp
> ELF/Strings.h
> ELF/SymbolTable.cpp
> ELF/SymbolTable.h
> ELF/Symbols.cpp
> ELF/Symbols.h
>
> Index: ELF/Symbols.h
> ===================================================================
> --- ELF/Symbols.h
> +++ ELF/Symbols.h
> @@ -16,6 +16,7 @@
> #define LLD_ELF_SYMBOLS_H
>
> #include "InputSection.h"
> +#include "Strings.h"
>
> #include "lld/Core/LLVM.h"
> #include "llvm/Object/Archive.h"
> @@ -28,7 +29,6 @@
> class BitcodeFile;
> class InputFile;
> class LazyObjectFile;
> -class SymbolBody;
> template <class ELFT> class ObjectFile;
> template <class ELFT> class OutputSection;
> class OutputSectionBase;
> @@ -69,7 +69,7 @@
> bool isShared() const { return SymbolKind == SharedKind; }
> bool isLocal() const { return IsLocal; }
> bool isPreemptible() const;
> - StringRef getName() const;
> + StringRef getName() const { return Name; }
> uint8_t getVisibility() const { return StOther & 0x3; }
> void parseSymbolVersion();
>
> @@ -98,8 +98,8 @@
> uint32_t GlobalDynIndex = -1;
>
> protected:
> - SymbolBody(Kind K, StringRef Name, uint8_t StOther, uint8_t Type);
> - SymbolBody(Kind K, const char *Name, uint8_t StOther, uint8_t Type);
> + SymbolBody(Kind K, StringRefZ Name, bool IsLocal, uint8_t StOther,
> + uint8_t Type);
>
> const unsigned SymbolKind : 8;
>
> @@ -136,17 +136,13 @@
> bool isFile() const { return Type == llvm::ELF::STT_FILE; }
>
> protected:
> - // Local symbols are not inserted to the symbol table, so we usually
> - // don't need their names at all. We read symbol names lazily if possible.
> - mutable uint32_t NameLen = (uint32_t)-1;
> - const char *Name;
> + StringRefZ Name;
> };
>
> // The base class for any defined symbols.
> class Defined : public SymbolBody {
> public:
> - Defined(Kind K, StringRef Name, uint8_t StOther, uint8_t Type);
> - Defined(Kind K, const char *Name, uint8_t StOther, uint8_t Type);
> + Defined(Kind K, StringRefZ Name, bool IsLocal, uint8_t StOther, uint8_t Type);
> static bool classof(const SymbolBody *S) { return S->isDefined(); }
> };
>
> @@ -175,25 +171,15 @@
> typedef typename ELFT::uint uintX_t;
>
> public:
> - DefinedRegular(StringRef Name, uint8_t StOther, uint8_t Type, uintX_t Value,
> - uintX_t Size, InputSectionBase<ELFT> *Section, InputFile *File)
> - : Defined(SymbolBody::DefinedRegularKind, Name, StOther, Type),
> + DefinedRegular(StringRefZ Name, bool IsLocal, uint8_t StOther, uint8_t Type,
> + uintX_t Value, uintX_t Size, InputSectionBase<ELFT> *Section,
> + InputFile *File)
> + : Defined(SymbolBody::DefinedRegularKind, Name, IsLocal, StOther, Type),
> Value(Value), Size(Size),
> Section(Section ? Section->Repl : NullInputSection) {
> this->File = File;
> }
>
> - DefinedRegular(const char *Name, const Elf_Sym &Sym,
> - InputSectionBase<ELFT> *Section)
> - : Defined(SymbolBody::DefinedRegularKind, Name, Sym.st_other,
> - Sym.getType()),
> - Value(Sym.st_value), Size(Sym.st_size),
> - Section(Section ? Section->Repl : NullInputSection) {
> - assert(isLocal());
> - if (Section)
> - this->File = Section->getFile();
> - }
> -
> // Return true if the symbol is a PIC function.
> bool isMipsPIC() const;
>
> @@ -248,8 +234,8 @@
>
> class Undefined : public SymbolBody {
> public:
> - Undefined(StringRef Name, uint8_t StOther, uint8_t Type, InputFile *F);
> - Undefined(const char *Name, uint8_t StOther, uint8_t Type, InputFile *F);
> + Undefined(StringRefZ Name, bool IsLocal, uint8_t StOther, uint8_t Type,
> + InputFile *F);
>
> static bool classof(const SymbolBody *S) {
> return S->kind() == UndefinedKind;
> @@ -270,7 +256,8 @@
>
> SharedSymbol(SharedFile<ELFT> *F, StringRef Name, const Elf_Sym &Sym,
> const Elf_Verdef *Verdef)
> - : Defined(SymbolBody::SharedKind, Name, Sym.st_other, Sym.getType()),
> + : Defined(SymbolBody::SharedKind, Name, /*IsLocal=*/false, Sym.st_other,
> + Sym.getType()),
> Sym(Sym), Verdef(Verdef) {
> // IFuncs defined in DSOs are treated as functions by the static linker.
> if (isGnuIFunc())
> @@ -309,7 +296,7 @@
>
> protected:
> Lazy(SymbolBody::Kind K, StringRef Name, uint8_t Type)
> - : SymbolBody(K, Name, llvm::ELF::STV_DEFAULT, Type) {}
> + : SymbolBody(K, Name, /*IsLocal=*/false, llvm::ELF::STV_DEFAULT, Type) {}
> };
>
> // LazyArchive symbols represents symbols in archive files.
> Index: ELF/Symbols.cpp
> ===================================================================
> --- ELF/Symbols.cpp
> +++ ELF/Symbols.cpp
> @@ -91,22 +91,12 @@
> llvm_unreachable("invalid symbol kind");
> }
>
> -SymbolBody::SymbolBody(Kind K, const char *Name, uint8_t StOther, uint8_t Type)
> - : SymbolKind(K), NeedsCopyOrPltAddr(false), IsLocal(true),
> +SymbolBody::SymbolBody(Kind K, StringRefZ Name, bool IsLocal, uint8_t StOther,
> + uint8_t Type)
> + : SymbolKind(K), NeedsCopyOrPltAddr(false), IsLocal(IsLocal),
> IsInGlobalMipsGot(false), Is32BitMipsGot(false), Type(Type),
> StOther(StOther), Name(Name) {}
>
> -SymbolBody::SymbolBody(Kind K, StringRef Name, uint8_t StOther, uint8_t Type)
> - : SymbolKind(K), NeedsCopyOrPltAddr(false), IsLocal(false),
> - IsInGlobalMipsGot(false), Is32BitMipsGot(false), Type(Type),
> - StOther(StOther), NameLen(Name.size()), Name(Name.data()) {}
> -
> -StringRef SymbolBody::getName() const {
> - if (NameLen == (uint32_t)-1)
> - NameLen = strlen(Name);
> - return StringRef(Name, NameLen);
> -}
> -
> // Returns true if a symbol can be replaced at load-time by a symbol
> // with the same name defined in other ELF executable or DSO.
> bool SymbolBody::isPreemptible() const {
> @@ -202,7 +192,7 @@
> return;
>
> // Truncate the symbol name so that it doesn't include the version string.
> - NameLen = Pos;
> + Name = {S.data(), Pos};
>
> // '@@' in a symbol name means the default version.
> // It is usually the most recent one.
> @@ -225,40 +215,34 @@
> error("symbol " + S + " has undefined version " + Verstr);
> }
>
> -Defined::Defined(Kind K, StringRef Name, uint8_t StOther, uint8_t Type)
> - : SymbolBody(K, Name, StOther, Type) {}
> -
> -Defined::Defined(Kind K, const char *Name, uint8_t StOther, uint8_t Type)
> - : SymbolBody(K, Name, StOther, Type) {}
> +Defined::Defined(Kind K, StringRefZ Name, bool IsLocal, uint8_t StOther,
> + uint8_t Type)
> + : SymbolBody(K, Name, IsLocal, StOther, Type) {}
>
> template <class ELFT> bool DefinedRegular<ELFT>::isMipsPIC() const {
> if (!Section || !isFunc())
> return false;
> return (this->StOther & STO_MIPS_MIPS16) == STO_MIPS_PIC ||
> (Section->getFile()->getObj().getHeader()->e_flags & EF_MIPS_PIC);
> }
>
> -Undefined::Undefined(StringRef Name, uint8_t StOther, uint8_t Type,
> - InputFile *File)
> - : SymbolBody(SymbolBody::UndefinedKind, Name, StOther, Type) {
> - this->File = File;
> -}
> -
> -Undefined::Undefined(const char *Name, uint8_t StOther, uint8_t Type,
> - InputFile *File)
> - : SymbolBody(SymbolBody::UndefinedKind, Name, StOther, Type) {
> +Undefined::Undefined(StringRefZ Name, bool IsLocal, uint8_t StOther,
> + uint8_t Type, InputFile *File)
> + : SymbolBody(SymbolBody::UndefinedKind, Name, IsLocal, StOther, Type) {
> this->File = File;
> }
>
> template <typename ELFT>
> -DefinedSynthetic<ELFT>::DefinedSynthetic(StringRef N, uintX_t Value,
> +DefinedSynthetic<ELFT>::DefinedSynthetic(StringRef Name, uintX_t Value,
> const OutputSectionBase *Section)
> - : Defined(SymbolBody::DefinedSyntheticKind, N, STV_HIDDEN, 0 /* Type */),
> + : Defined(SymbolBody::DefinedSyntheticKind, Name, /*IsLocal=*/false,
> + STV_HIDDEN, 0 /* Type */),
> Value(Value), Section(Section) {}
>
> -DefinedCommon::DefinedCommon(StringRef N, uint64_t Size, uint64_t Alignment,
> +DefinedCommon::DefinedCommon(StringRef Name, uint64_t Size, uint64_t Alignment,
> uint8_t StOther, uint8_t Type, InputFile *File)
> - : Defined(SymbolBody::DefinedCommonKind, N, StOther, Type),
> + : Defined(SymbolBody::DefinedCommonKind, Name, /*IsLocal=*/false, StOther,
> + Type),
> Alignment(Alignment), Size(Size) {
> this->File = File;
> }
> Index: ELF/SymbolTable.h
> ===================================================================
> --- ELF/SymbolTable.h
> +++ ELF/SymbolTable.h
> @@ -55,8 +55,9 @@
> uint8_t Visibility = llvm::ELF::STV_HIDDEN);
>
> Symbol *addUndefined(StringRef Name);
> - Symbol *addUndefined(StringRef Name, uint8_t Binding, uint8_t StOther,
> - uint8_t Type, bool CanOmitFromDynSym, InputFile *File);
> + Symbol *addUndefined(StringRef Name, bool IsLocal, uint8_t Binding,
> + uint8_t StOther, uint8_t Type, bool CanOmitFromDynSym,
> + InputFile *File);
>
> Symbol *addRegular(StringRef Name, uint8_t StOther, uint8_t Type,
> uintX_t Value, uintX_t Size, uint8_t Binding,
> Index: ELF/SymbolTable.cpp
> ===================================================================
> --- ELF/SymbolTable.cpp
> +++ ELF/SymbolTable.cpp
> @@ -235,22 +235,23 @@
> }
>
> template <class ELFT> Symbol *SymbolTable<ELFT>::addUndefined(StringRef Name) {
> - return addUndefined(Name, STB_GLOBAL, STV_DEFAULT, /*Type*/ 0,
> + return addUndefined(Name, /*IsLocal=*/false, STB_GLOBAL, STV_DEFAULT,
> + /*Type*/ 0,
> /*CanOmitFromDynSym*/ false, /*File*/ nullptr);
> }
>
> template <class ELFT>
> -Symbol *SymbolTable<ELFT>::addUndefined(StringRef Name, uint8_t Binding,
> - uint8_t StOther, uint8_t Type,
> - bool CanOmitFromDynSym,
> +Symbol *SymbolTable<ELFT>::addUndefined(StringRef Name, bool IsLocal,
> + uint8_t Binding, uint8_t StOther,
> + uint8_t Type, bool CanOmitFromDynSym,
> InputFile *File) {
> Symbol *S;
> bool WasInserted;
> std::tie(S, WasInserted) =
> insert(Name, Type, StOther & 3, CanOmitFromDynSym, File);
> if (WasInserted) {
> S->Binding = Binding;
> - replaceBody<Undefined>(S, Name, StOther, Type, File);
> + replaceBody<Undefined>(S, Name, IsLocal, StOther, Type, File);
> return S;
> }
> if (Binding != STB_WEAK) {
> @@ -378,8 +379,8 @@
> /*CanOmitFromDynSym*/ false, File);
> int Cmp = compareDefinedNonCommon(S, WasInserted, Binding);
> if (Cmp > 0)
> - replaceBody<DefinedRegular<ELFT>>(S, Name, StOther, Type, Value, Size,
> - Section, File);
> + replaceBody<DefinedRegular<ELFT>>(S, Name, /*IsLocal=*/false, StOther, Type,
> + Value, Size, Section, File);
> else if (Cmp == 0)
> reportDuplicate(S->body(), Section, Value);
> return S;
> @@ -432,7 +433,8 @@
> insert(Name, Type, StOther & 3, CanOmitFromDynSym, F);
> int Cmp = compareDefinedNonCommon(S, WasInserted, Binding);
> if (Cmp > 0)
> - replaceBody<DefinedRegular<ELFT>>(S, Name, StOther, Type, 0, 0, nullptr, F);
> + replaceBody<DefinedRegular<ELFT>>(S, Name, /*IsLocal=*/false, StOther, Type,
> + 0, 0, nullptr, F);
> else if (Cmp == 0)
> reportDuplicate(S->body(), F);
> return S;
> Index: ELF/Strings.h
> ===================================================================
> --- ELF/Strings.h
> +++ ELF/Strings.h
> @@ -26,6 +26,36 @@
> bool isValidCIdentifier(StringRef S);
> StringRef unquote(StringRef S);
>
> +// This is a lazy version of StringRef. String size is computed lazily
> +// when it is needed. It is more efficient than StringRef if you have
> +// a string whose size is unknown but could be very long.
> +//
> +// ELF string tables contain a lot of null-terminated strings.
> +// Most of them are not necessary for the linker because they are names
> +// of local symbols and the linker doesn't use local symbol names for
> +// name resolution. So, we use this class to represents strings read
> +// from string tables.
> +class StringRefZ {
> +public:
> + StringRefZ() : Start(nullptr), Size(0) {}
> + StringRefZ(const char *S, size_t Size) : Start(S), Size(Size) {}
> +
> + /*implicit*/ StringRefZ(const char *S) : Start(S), Size(-1) {}
> +
> + /*implicit*/ StringRefZ(llvm::StringRef S)
> + : Start(S.data()), Size(S.size()) {}
> +
> + operator llvm::StringRef() const {
> + if (Size == (size_t)-1)
> + Size = strlen(Start);
> + return {Start, Size};
> + }
> +
> +private:
> + const char *Start;
> + mutable size_t Size;
> +};
> +
> // This class represents a glob pattern. Supported metacharacters
> // are "*", "?", "[<chars>]" and "[^<chars>]".
> class GlobPattern {
> Index: ELF/Strings.cpp
> ===================================================================
> --- ELF/Strings.cpp
> +++ ELF/Strings.cpp
> @@ -16,6 +16,7 @@
> #include "llvm/Config/config.h"
> #include "llvm/Demangle/Demangle.h"
> #include <algorithm>
> +#include <cstring>
>
> using namespace llvm;
> using namespace lld;
> Index: ELF/LTO.cpp
> ===================================================================
> --- ELF/LTO.cpp
> +++ ELF/LTO.cpp
> @@ -97,8 +97,8 @@
> BitcodeCompiler::~BitcodeCompiler() = default;
>
> static void undefine(Symbol *S) {
> - replaceBody<Undefined>(S, S->body()->getName(), STV_DEFAULT, S->body()->Type,
> - nullptr);
> + replaceBody<Undefined>(S, S->body()->getName(), /*IsLocal=*/false,
> + STV_DEFAULT, S->body()->Type, nullptr);
> }
>
> void BitcodeCompiler::add(BitcodeFile &F) {
> Index: ELF/InputFiles.cpp
> ===================================================================
> --- ELF/InputFiles.cpp
> +++ ELF/InputFiles.cpp
> @@ -439,6 +439,11 @@
> int Binding = Sym->getBinding();
> InputSectionBase<ELFT> *Sec = getSection(*Sym);
>
> + uint8_t StOther = Sym->st_other;
> + uint8_t Type = Sym->getType();
> + uintX_t Value = Sym->st_value;
> + uintX_t Size = Sym->st_size;
> +
> if (Binding == STB_LOCAL) {
> if (Sym->getType() == STT_FILE)
> SourceFile = check(Sym->getName(this->StringTable));
> @@ -448,20 +453,19 @@
>
> const char *Name = this->StringTable.data() + Sym->st_name;
> if (Sym->st_shndx == SHN_UNDEF)
> - return new (BAlloc) Undefined(Name, Sym->st_other, Sym->getType(), this);
> - return new (BAlloc) DefinedRegular<ELFT>(Name, *Sym, Sec);
> + return new (BAlloc)
> + Undefined(Name, /*IsLocal=*/true, StOther, Type, this);
> +
> + return new (BAlloc) DefinedRegular<ELFT>(Name, /*IsLocal=*/true, StOther,
> + Type, Value, Size, Sec, this);
> }
>
> StringRef Name = check(Sym->getName(this->StringTable));
> - uint8_t StOther = Sym->st_other;
> - uint8_t Type = Sym->getType();
> - uintX_t Value = Sym->st_value;
> - uintX_t Size = Sym->st_size;
>
> switch (Sym->st_shndx) {
> case SHN_UNDEF:
> return elf::Symtab<ELFT>::X
> - ->addUndefined(Name, Binding, StOther, Type,
> + ->addUndefined(Name, /*IsLocal=*/false, Binding, StOther, Type,
> /*CanOmitFromDynSym=*/false, this)
> ->body();
> case SHN_COMMON:
> @@ -481,7 +485,7 @@
> case STB_GNU_UNIQUE:
> if (Sec == &InputSection<ELFT>::Discarded)
> return elf::Symtab<ELFT>::X
> - ->addUndefined(Name, Binding, StOther, Type,
> + ->addUndefined(Name, /*IsLocal=*/false, Binding, StOther, Type,
> /*CanOmitFromDynSym=*/false, this)
> ->body();
> return elf::Symtab<ELFT>::X
> @@ -726,12 +730,14 @@
>
> int C = check(ObjSym.getComdatIndex());
> if (C != -1 && !KeptComdats[C])
> - return Symtab<ELFT>::X->addUndefined(NameRef, Binding, Visibility, Type,
> - CanOmitFromDynSym, F);
> + return Symtab<ELFT>::X->addUndefined(NameRef, /*IsLocal=*/false, Binding,
> + Visibility, Type, CanOmitFromDynSym,
> + F);
>
> if (Flags & BasicSymbolRef::SF_Undefined)
> - return Symtab<ELFT>::X->addUndefined(NameRef, Binding, Visibility, Type,
> - CanOmitFromDynSym, F);
> + return Symtab<ELFT>::X->addUndefined(NameRef, /*IsLocal=*/false, Binding,
> + Visibility, Type, CanOmitFromDynSym,
> + F);
>
> if (Flags & BasicSymbolRef::SF_Common)
> return Symtab<ELFT>::X->addCommon(NameRef, ObjSym.getCommonSize(),
More information about the llvm-commits
mailing list