[PATCH] D38239: [ELF] - Define linkerscript symbols early.

Rafael Avila de Espindola via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 22 14:07:45 PST 2017


defsym.ll was originally added with a change to implement --wrap for
LTO.

Producing a duplicated symbol error for both an explicit linker script
and --defsym seems reasonable. I will experiment with that a bit.

Cheers,
Rafael


George Rimar via Phabricator <reviews at reviews.llvm.org> writes:

> grimar updated this revision to Diff 128019.
> grimar edited the summary of this revision.
> grimar added a comment.
>
> - Reimplemented.
>
> I think correct solution is to add linker script symbols as undefined and
> set flag for them, showing we know they be defined/adjusted later by script. 
> That looks cleaner and allows to avod multiple definition error I mentioned earlier.
>
> Patch do this.
>
>
> https://reviews.llvm.org/D38239
>
> Files:
>   ELF/Driver.cpp
>   ELF/LTO.cpp
>   ELF/LinkerScript.cpp
>   ELF/LinkerScript.h
>   ELF/SymbolTable.cpp
>   ELF/Symbols.h
>   test/ELF/linkerscript/symbols-synthetic.s
>   test/ELF/linkerscript/version-script.s
>
> Index: test/ELF/linkerscript/version-script.s
> ===================================================================
> --- test/ELF/linkerscript/version-script.s
> +++ test/ELF/linkerscript/version-script.s
> @@ -0,0 +1,52 @@
> +# REQUIRES: x86
> +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t.o
> +
> +# RUN: echo "bar = foo; VERSION { V { global: foo; bar; local: *; }; }" > %t.script
> +# RUN: ld.lld -T %t.script -shared --no-undefined-version %t.o -o %t.so
> +# RUN: llvm-readobj -V %t.so | FileCheck %s
> +
> +## Check that we are able to version symbols defined in script.
> +# CHECK:      Symbols [
> +# CHECK-NEXT:   Symbol {
> +# CHECK-NEXT:     Version: 0
> +# CHECK-NEXT:     Name: @
> +# CHECK-NEXT:   }
> +# CHECK-NEXT:   Symbol {
> +# CHECK-NEXT:     Version: 0
> +# CHECK-NEXT:     Name: und@
> +# CHECK-NEXT:   }
> +# CHECK-NEXT:   Symbol {
> +# CHECK-NEXT:     Version: 2
> +# CHECK-NEXT:     Name: foo@@V
> +# CHECK-NEXT:   }
> +# CHECK-NEXT:   Symbol {
> +# CHECK-NEXT:     Version: 2
> +# CHECK-NEXT:     Name: bar@@V
> +# CHECK-NEXT:   }
> +# CHECK-NEXT: ]
> +
> +# RUN: echo "bar = und; VERSION { V { global: foo; bar; local: *; }; }" > %t.script
> +# RUN: not ld.lld -T %t.script -shared --no-undefined-version %t.o -o %t.so \
> +# RUN:   2>&1 | FileCheck --check-prefix=ERR %s
> +# ERR: symbol not found: und
> +
> +# RUN: echo "und = 0x1; VERSION { V { global: und; local: *; }; }" > %t.script
> +# RUN: ld.lld -T %t.script -shared --no-undefined-version %t.o -o %t.so
> +# RUN: llvm-readobj -V %t.so | FileCheck %s --check-prefix=UNDEF
> +# UNDEF:      Symbols [
> +# UNDEF-NEXT:   Symbol {
> +# UNDEF-NEXT:     Version: 0
> +# UNDEF-NEXT:     Name: @
> +# UNDEF-NEXT:   }
> +# UNDEF-NEXT:   Symbol {
> +# UNDEF-NEXT:     Version: 2
> +# UNDEF-NEXT:     Name: und@@V
> +# UNDEF-NEXT:   }
> +# UNDEF-NEXT: ]
> +
> +.global und
> +
> +.text
> +.globl foo
> +.type foo, at function
> +foo:
> Index: test/ELF/linkerscript/symbols-synthetic.s
> ===================================================================
> --- test/ELF/linkerscript/symbols-synthetic.s
> +++ test/ELF/linkerscript/symbols-synthetic.s
> @@ -61,14 +61,14 @@
>  # SIMPLE-NEXT: 0000000000000120         .foo    00000000 _begin_sec
>  # SIMPLE-NEXT: 0000000000000128         *ABS*   00000000 _end_sec_abs
>  # SIMPLE-NEXT: 0000000000001048         .text   00000000 _start
> +# SIMPLE-NEXT: 0000000000000ee4         *ABS*   00000000 size_foo_3
>  # SIMPLE-NEXT: 0000000000000120         .foo    00000000 begin_foo
>  # SIMPLE-NEXT: 0000000000000128         .foo    00000000 end_foo
>  # SIMPLE-NEXT: 0000000000000008         *ABS*   00000000 size_foo_1
>  # SIMPLE-NEXT: 0000000000000008         *ABS*   00000000 size_foo_1_abs
>  # SIMPLE-NEXT: 0000000000001000         .foo    00000000 begin_bar
>  # SIMPLE-NEXT: 0000000000001004         .foo    00000000 end_bar
>  # SIMPLE-NEXT: 0000000000000ee4         *ABS*   00000000 size_foo_2
> -# SIMPLE-NEXT: 0000000000000ee4         *ABS*   00000000 size_foo_3
>  # SIMPLE-NEXT: 0000000000001004         .eh_frame_hdr     00000000 __eh_frame_hdr_start
>  # SIMPLE-NEXT: 0000000000001010         *ABS*             00000000 __eh_frame_hdr_start2
>  # SIMPLE-NEXT: 0000000000001018         .eh_frame_hdr     00000000 __eh_frame_hdr_end
> Index: ELF/Symbols.h
> ===================================================================
> --- ELF/Symbols.h
> +++ ELF/Symbols.h
> @@ -205,6 +205,9 @@
>        : Symbol(UndefinedKind, File, Name, Binding, StOther, Type) {}
>  
>    static bool classof(const Symbol *S) { return S->kind() == UndefinedKind; }
> +
> +  // If set, means that symbol has definition in linker script.
> +  bool DefinedInLinkerScript;
>  };
>  
>  class SharedSymbol : public Symbol {
> Index: ELF/SymbolTable.cpp
> ===================================================================
> --- ELF/SymbolTable.cpp
> +++ ELF/SymbolTable.cpp
> @@ -612,6 +612,16 @@
>    }
>  }
>  
> +// We can version only defined symbols and those undefined that
> +// are known to be defined in linker script.
> +static bool canBeVersioned(Symbol *Sym) {
> +  if (Sym->isDefined())
> +    return true;
> +  if (auto *U = dyn_cast<Undefined>(Sym))
> +    return U->DefinedInLinkerScript;
> +  return false;
> +}
> +
>  // Initialize DemangledSyms with a map from demangled symbols to symbol
>  // objects. Used to handle "extern C++" directive in version scripts.
>  //
> @@ -629,7 +639,7 @@
>    if (!DemangledSyms) {
>      DemangledSyms.emplace();
>      for (Symbol *Sym : SymVector) {
> -      if (!Sym->isDefined())
> +      if (!canBeVersioned(Sym))
>          continue;
>        if (Optional<std::string> S = demangleItanium(Sym->getName()))
>          (*DemangledSyms)[*S].push_back(Sym);
> @@ -644,7 +654,7 @@
>    if (Ver.IsExternCpp)
>      return getDemangledSyms().lookup(Ver.Name);
>    if (Symbol *B = find(Ver.Name))
> -    if (B->isDefined())
> +    if (canBeVersioned(B))
>        return {B};
>    return {};
>  }
> @@ -661,7 +671,7 @@
>    }
>  
>    for (Symbol *Sym : SymVector)
> -    if (Sym->isDefined() && M.match(Sym->getName()))
> +    if (canBeVersioned(Sym) && M.match(Sym->getName()))
>        Res.push_back(Sym);
>    return Res;
>  }
> Index: ELF/LinkerScript.h
> ===================================================================
> --- ELF/LinkerScript.h
> +++ ELF/LinkerScript.h
> @@ -84,7 +84,7 @@
>    int Kind;
>  };
>  
> -// This represents ". = <expr>" or "<symbol> = <expr>".
> +// This represents ". = <expr>" or "<symbol> = <expr>" or -defsym=<expr>.
>  struct SymbolAssignment : BaseCommand {
>    SymbolAssignment(StringRef Name, Expr E, std::string Loc)
>        : BaseCommand(AssignmentKind), Name(Name), Expression(E), Location(Loc) {}
> @@ -95,7 +95,7 @@
>  
>    // The LHS of an expression. Name is either a symbol name or ".".
>    StringRef Name;
> -  Defined *Sym = nullptr;
> +  Symbol *Sym = nullptr;
>  
>    // The RHS of an expression.
>    Expr Expression;
> @@ -207,6 +207,7 @@
>    llvm::DenseMap<StringRef, OutputSection *> NameToOutputSection;
>  
>    void addSymbol(SymbolAssignment *Cmd);
> +  bool shouldDefineSym(SymbolAssignment *Cmd);
>    void assignSymbol(SymbolAssignment *Cmd, bool InSec);
>    void setDot(Expr E, const Twine &Loc, bool InSec);
>  
> @@ -263,6 +264,8 @@
>    void allocateHeaders(std::vector<PhdrEntry *> &Phdrs);
>    void processSectionCommands();
>  
> +  void declareSymbols();
> +
>    // SECTIONS command list.
>    std::vector<BaseCommand *> SectionCommands;
>  
> Index: ELF/LinkerScript.cpp
> ===================================================================
> --- ELF/LinkerScript.cpp
> +++ ELF/LinkerScript.cpp
> @@ -117,15 +117,6 @@
>  // This function is called from processSectionCommands,
>  // while we are fixing the output section layout.
>  void LinkerScript::addSymbol(SymbolAssignment *Cmd) {
> -  if (Cmd->Name == ".")
> -    return;
> -
> -  // If a symbol was in PROVIDE(), we need to define it only when
> -  // it is a referenced undefined symbol.
> -  Symbol *B = Symtab->find(Cmd->Name);
> -  if (Cmd->Provide && (!B || B->isDefined()))
> -    return;
> -
>    // Define a symbol.
>    Symbol *Sym;
>    uint8_t Visibility = Cmd->Hidden ? STV_HIDDEN : STV_DEFAULT;
> @@ -153,6 +144,49 @@
>    Cmd->Sym = cast<Defined>(Sym);
>  }
>  
> +// Used for handling linker symbol assignments, for both finalizing
> +// their values and doing early declarations. Returns true if symbol
> +// should be defined from linker script.
> +bool LinkerScript::shouldDefineSym(SymbolAssignment *Cmd) {
> +  if (Cmd->Name == ".")
> +    return false;
> +
> +  if (!Cmd->Provide)
> +    return true;
> +
> +  // If a symbol was in PROVIDE(), we need to define it only
> +  // when it is a referenced undefined symbol.
> +  Symbol *B = Symtab->find(Cmd->Name);
> +  if (!B || B->isDefined())
> +    return false;
> +  return true;
> +}
> +
> +// Symbols defined in script should not be inlined by LTO. At the same time
> +// we don't know their final values until late stages of link. Here we scan
> +// over symbol assignment commands and create undefined symbols if needed.
> +void LinkerScript::declareSymbols() {
> +  assert(!Ctx);
> +  for (BaseCommand *Base : SectionCommands) {
> +    auto *Cmd = dyn_cast<SymbolAssignment>(Base);
> +    if (!Cmd || !shouldDefineSym(Cmd))
> +      continue;
> +
> +    bool Inserted;
> +    std::tie(Cmd->Sym, Inserted) =
> +        Symtab->insert(Cmd->Name, STB_GLOBAL, STV_DEFAULT,
> +                       /*CanOmitFromDynSym*/ false, nullptr);
> +    Cmd->Sym->CanInline = false;
> +
> +    if (Inserted)
> +      replaceSymbol<Undefined>(Cmd->Sym, nullptr /*File*/, Cmd->Name,
> +                               STB_GLOBAL, STV_DEFAULT, STT_NOTYPE);
> +
> +    if (auto *U = dyn_cast<Undefined>(Cmd->Sym))
> +      U->DefinedInLinkerScript = true;
> +  }
> +}
> +
>  // This function is called from assignAddresses, while we are
>  // fixing the output section addresses. This function is supposed
>  // to set the final value for a given symbol assignment.
> @@ -165,13 +199,14 @@
>    if (!Cmd->Sym)
>      return;
>  
> +  Defined *Sym = cast<Defined>(Cmd->Sym);
>    ExprValue V = Cmd->Expression();
>    if (V.isAbsolute()) {
> -    Cmd->Sym->Section = nullptr;
> -    Cmd->Sym->Value = V.getValue();
> +    Sym->Section = nullptr;
> +    Sym->Value = V.getValue();
>    } else {
> -    Cmd->Sym->Section = V.Sec;
> -    Cmd->Sym->Value = V.getSectionOffset();
> +    Sym->Section = V.Sec;
> +    Sym->Value = V.getSectionOffset();
>    }
>  }
>  
> @@ -364,7 +399,8 @@
>    for (BaseCommand *Base : SectionCommands) {
>      // Handle symbol assignments outside of any output section.
>      if (auto *Cmd = dyn_cast<SymbolAssignment>(Base)) {
> -      addSymbol(Cmd);
> +      if (shouldDefineSym(Cmd))
> +        addSymbol(Cmd);
>        continue;
>      }
>  
> @@ -396,7 +432,8 @@
>        // ".foo : { ...; bar = .; }". Handle them.
>        for (BaseCommand *Base : Sec->SectionCommands)
>          if (auto *OutCmd = dyn_cast<SymbolAssignment>(Base))
> -          addSymbol(OutCmd);
> +          if (shouldDefineSym(OutCmd))
> +            addSymbol(OutCmd);
>  
>        // Handle subalign (e.g. ".foo : SUBALIGN(32) { ... }"). If subalign
>        // is given, input sections are aligned to that value, whether the
> Index: ELF/LTO.cpp
> ===================================================================
> --- ELF/LTO.cpp
> +++ ELF/LTO.cpp
> @@ -129,11 +129,6 @@
>    std::vector<Symbol *> Syms = F.getSymbols();
>    std::vector<lto::SymbolResolution> Resols(Syms.size());
>  
> -  DenseSet<StringRef> ScriptSymbols;
> -  for (BaseCommand *Base : Script->SectionCommands)
> -    if (auto *Cmd = dyn_cast<SymbolAssignment>(Base))
> -      ScriptSymbols.insert(Cmd->Name);
> -
>    // Provide a resolution to the LTO API for each symbol.
>    for (const lto::InputFile::Symbol &ObjSym : Obj.symbols()) {
>      Symbol *Sym = Syms[SymNum];
> @@ -164,7 +159,7 @@
>      // still not final:
>      // 1) Aliased (with --defsym) or wrapped (with --wrap) symbols.
>      // 2) Symbols redefined in linker script.
> -    R.LinkerRedefined = !Sym->CanInline || ScriptSymbols.count(Sym->getName());
> +    R.LinkerRedefined = !Sym->CanInline;
>    }
>    checkError(LTOObj->add(std::move(F.Obj), Resols));
>  }
> Index: ELF/Driver.cpp
> ===================================================================
> --- ELF/Driver.cpp
> +++ ELF/Driver.cpp
> @@ -1029,6 +1029,9 @@
>    for (StringRef Sym : Script->ReferencedSymbols)
>      Symtab->addUndefined<ELFT>(Sym);
>  
> +  // We want to declare linker script's symbols early, so that can version them.
> +  Script->declareSymbols();
> +
>    // Handle the `--undefined <sym>` options.
>    for (StringRef S : Config->Undefined)
>      Symtab->fetchIfLazy<ELFT>(S);


More information about the llvm-commits mailing list