[PATCH] D38239: [ELF] - Define linkerscript symbols early.
Rafael Avila de Espindola via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 22 14:07:45 PST 2017
defsym.ll was originally added with a change to implement --wrap for
LTO.
Producing a duplicated symbol error for both an explicit linker script
and --defsym seems reasonable. I will experiment with that a bit.
Cheers,
Rafael
George Rimar via Phabricator <reviews at reviews.llvm.org> writes:
> grimar updated this revision to Diff 128019.
> grimar edited the summary of this revision.
> grimar added a comment.
>
> - Reimplemented.
>
> I think correct solution is to add linker script symbols as undefined and
> set flag for them, showing we know they be defined/adjusted later by script.
> That looks cleaner and allows to avod multiple definition error I mentioned earlier.
>
> Patch do this.
>
>
> https://reviews.llvm.org/D38239
>
> Files:
> ELF/Driver.cpp
> ELF/LTO.cpp
> ELF/LinkerScript.cpp
> ELF/LinkerScript.h
> ELF/SymbolTable.cpp
> ELF/Symbols.h
> test/ELF/linkerscript/symbols-synthetic.s
> test/ELF/linkerscript/version-script.s
>
> Index: test/ELF/linkerscript/version-script.s
> ===================================================================
> --- test/ELF/linkerscript/version-script.s
> +++ test/ELF/linkerscript/version-script.s
> @@ -0,0 +1,52 @@
> +# REQUIRES: x86
> +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t.o
> +
> +# RUN: echo "bar = foo; VERSION { V { global: foo; bar; local: *; }; }" > %t.script
> +# RUN: ld.lld -T %t.script -shared --no-undefined-version %t.o -o %t.so
> +# RUN: llvm-readobj -V %t.so | FileCheck %s
> +
> +## Check that we are able to version symbols defined in script.
> +# CHECK: Symbols [
> +# CHECK-NEXT: Symbol {
> +# CHECK-NEXT: Version: 0
> +# CHECK-NEXT: Name: @
> +# CHECK-NEXT: }
> +# CHECK-NEXT: Symbol {
> +# CHECK-NEXT: Version: 0
> +# CHECK-NEXT: Name: und@
> +# CHECK-NEXT: }
> +# CHECK-NEXT: Symbol {
> +# CHECK-NEXT: Version: 2
> +# CHECK-NEXT: Name: foo@@V
> +# CHECK-NEXT: }
> +# CHECK-NEXT: Symbol {
> +# CHECK-NEXT: Version: 2
> +# CHECK-NEXT: Name: bar@@V
> +# CHECK-NEXT: }
> +# CHECK-NEXT: ]
> +
> +# RUN: echo "bar = und; VERSION { V { global: foo; bar; local: *; }; }" > %t.script
> +# RUN: not ld.lld -T %t.script -shared --no-undefined-version %t.o -o %t.so \
> +# RUN: 2>&1 | FileCheck --check-prefix=ERR %s
> +# ERR: symbol not found: und
> +
> +# RUN: echo "und = 0x1; VERSION { V { global: und; local: *; }; }" > %t.script
> +# RUN: ld.lld -T %t.script -shared --no-undefined-version %t.o -o %t.so
> +# RUN: llvm-readobj -V %t.so | FileCheck %s --check-prefix=UNDEF
> +# UNDEF: Symbols [
> +# UNDEF-NEXT: Symbol {
> +# UNDEF-NEXT: Version: 0
> +# UNDEF-NEXT: Name: @
> +# UNDEF-NEXT: }
> +# UNDEF-NEXT: Symbol {
> +# UNDEF-NEXT: Version: 2
> +# UNDEF-NEXT: Name: und@@V
> +# UNDEF-NEXT: }
> +# UNDEF-NEXT: ]
> +
> +.global und
> +
> +.text
> +.globl foo
> +.type foo, at function
> +foo:
> Index: test/ELF/linkerscript/symbols-synthetic.s
> ===================================================================
> --- test/ELF/linkerscript/symbols-synthetic.s
> +++ test/ELF/linkerscript/symbols-synthetic.s
> @@ -61,14 +61,14 @@
> # SIMPLE-NEXT: 0000000000000120 .foo 00000000 _begin_sec
> # SIMPLE-NEXT: 0000000000000128 *ABS* 00000000 _end_sec_abs
> # SIMPLE-NEXT: 0000000000001048 .text 00000000 _start
> +# SIMPLE-NEXT: 0000000000000ee4 *ABS* 00000000 size_foo_3
> # SIMPLE-NEXT: 0000000000000120 .foo 00000000 begin_foo
> # SIMPLE-NEXT: 0000000000000128 .foo 00000000 end_foo
> # SIMPLE-NEXT: 0000000000000008 *ABS* 00000000 size_foo_1
> # SIMPLE-NEXT: 0000000000000008 *ABS* 00000000 size_foo_1_abs
> # SIMPLE-NEXT: 0000000000001000 .foo 00000000 begin_bar
> # SIMPLE-NEXT: 0000000000001004 .foo 00000000 end_bar
> # SIMPLE-NEXT: 0000000000000ee4 *ABS* 00000000 size_foo_2
> -# SIMPLE-NEXT: 0000000000000ee4 *ABS* 00000000 size_foo_3
> # SIMPLE-NEXT: 0000000000001004 .eh_frame_hdr 00000000 __eh_frame_hdr_start
> # SIMPLE-NEXT: 0000000000001010 *ABS* 00000000 __eh_frame_hdr_start2
> # SIMPLE-NEXT: 0000000000001018 .eh_frame_hdr 00000000 __eh_frame_hdr_end
> Index: ELF/Symbols.h
> ===================================================================
> --- ELF/Symbols.h
> +++ ELF/Symbols.h
> @@ -205,6 +205,9 @@
> : Symbol(UndefinedKind, File, Name, Binding, StOther, Type) {}
>
> static bool classof(const Symbol *S) { return S->kind() == UndefinedKind; }
> +
> + // If set, means that symbol has definition in linker script.
> + bool DefinedInLinkerScript;
> };
>
> class SharedSymbol : public Symbol {
> Index: ELF/SymbolTable.cpp
> ===================================================================
> --- ELF/SymbolTable.cpp
> +++ ELF/SymbolTable.cpp
> @@ -612,6 +612,16 @@
> }
> }
>
> +// We can version only defined symbols and those undefined that
> +// are known to be defined in linker script.
> +static bool canBeVersioned(Symbol *Sym) {
> + if (Sym->isDefined())
> + return true;
> + if (auto *U = dyn_cast<Undefined>(Sym))
> + return U->DefinedInLinkerScript;
> + return false;
> +}
> +
> // Initialize DemangledSyms with a map from demangled symbols to symbol
> // objects. Used to handle "extern C++" directive in version scripts.
> //
> @@ -629,7 +639,7 @@
> if (!DemangledSyms) {
> DemangledSyms.emplace();
> for (Symbol *Sym : SymVector) {
> - if (!Sym->isDefined())
> + if (!canBeVersioned(Sym))
> continue;
> if (Optional<std::string> S = demangleItanium(Sym->getName()))
> (*DemangledSyms)[*S].push_back(Sym);
> @@ -644,7 +654,7 @@
> if (Ver.IsExternCpp)
> return getDemangledSyms().lookup(Ver.Name);
> if (Symbol *B = find(Ver.Name))
> - if (B->isDefined())
> + if (canBeVersioned(B))
> return {B};
> return {};
> }
> @@ -661,7 +671,7 @@
> }
>
> for (Symbol *Sym : SymVector)
> - if (Sym->isDefined() && M.match(Sym->getName()))
> + if (canBeVersioned(Sym) && M.match(Sym->getName()))
> Res.push_back(Sym);
> return Res;
> }
> Index: ELF/LinkerScript.h
> ===================================================================
> --- ELF/LinkerScript.h
> +++ ELF/LinkerScript.h
> @@ -84,7 +84,7 @@
> int Kind;
> };
>
> -// This represents ". = <expr>" or "<symbol> = <expr>".
> +// This represents ". = <expr>" or "<symbol> = <expr>" or -defsym=<expr>.
> struct SymbolAssignment : BaseCommand {
> SymbolAssignment(StringRef Name, Expr E, std::string Loc)
> : BaseCommand(AssignmentKind), Name(Name), Expression(E), Location(Loc) {}
> @@ -95,7 +95,7 @@
>
> // The LHS of an expression. Name is either a symbol name or ".".
> StringRef Name;
> - Defined *Sym = nullptr;
> + Symbol *Sym = nullptr;
>
> // The RHS of an expression.
> Expr Expression;
> @@ -207,6 +207,7 @@
> llvm::DenseMap<StringRef, OutputSection *> NameToOutputSection;
>
> void addSymbol(SymbolAssignment *Cmd);
> + bool shouldDefineSym(SymbolAssignment *Cmd);
> void assignSymbol(SymbolAssignment *Cmd, bool InSec);
> void setDot(Expr E, const Twine &Loc, bool InSec);
>
> @@ -263,6 +264,8 @@
> void allocateHeaders(std::vector<PhdrEntry *> &Phdrs);
> void processSectionCommands();
>
> + void declareSymbols();
> +
> // SECTIONS command list.
> std::vector<BaseCommand *> SectionCommands;
>
> Index: ELF/LinkerScript.cpp
> ===================================================================
> --- ELF/LinkerScript.cpp
> +++ ELF/LinkerScript.cpp
> @@ -117,15 +117,6 @@
> // This function is called from processSectionCommands,
> // while we are fixing the output section layout.
> void LinkerScript::addSymbol(SymbolAssignment *Cmd) {
> - if (Cmd->Name == ".")
> - return;
> -
> - // If a symbol was in PROVIDE(), we need to define it only when
> - // it is a referenced undefined symbol.
> - Symbol *B = Symtab->find(Cmd->Name);
> - if (Cmd->Provide && (!B || B->isDefined()))
> - return;
> -
> // Define a symbol.
> Symbol *Sym;
> uint8_t Visibility = Cmd->Hidden ? STV_HIDDEN : STV_DEFAULT;
> @@ -153,6 +144,49 @@
> Cmd->Sym = cast<Defined>(Sym);
> }
>
> +// Used for handling linker symbol assignments, for both finalizing
> +// their values and doing early declarations. Returns true if symbol
> +// should be defined from linker script.
> +bool LinkerScript::shouldDefineSym(SymbolAssignment *Cmd) {
> + if (Cmd->Name == ".")
> + return false;
> +
> + if (!Cmd->Provide)
> + return true;
> +
> + // If a symbol was in PROVIDE(), we need to define it only
> + // when it is a referenced undefined symbol.
> + Symbol *B = Symtab->find(Cmd->Name);
> + if (!B || B->isDefined())
> + return false;
> + return true;
> +}
> +
> +// Symbols defined in script should not be inlined by LTO. At the same time
> +// we don't know their final values until late stages of link. Here we scan
> +// over symbol assignment commands and create undefined symbols if needed.
> +void LinkerScript::declareSymbols() {
> + assert(!Ctx);
> + for (BaseCommand *Base : SectionCommands) {
> + auto *Cmd = dyn_cast<SymbolAssignment>(Base);
> + if (!Cmd || !shouldDefineSym(Cmd))
> + continue;
> +
> + bool Inserted;
> + std::tie(Cmd->Sym, Inserted) =
> + Symtab->insert(Cmd->Name, STB_GLOBAL, STV_DEFAULT,
> + /*CanOmitFromDynSym*/ false, nullptr);
> + Cmd->Sym->CanInline = false;
> +
> + if (Inserted)
> + replaceSymbol<Undefined>(Cmd->Sym, nullptr /*File*/, Cmd->Name,
> + STB_GLOBAL, STV_DEFAULT, STT_NOTYPE);
> +
> + if (auto *U = dyn_cast<Undefined>(Cmd->Sym))
> + U->DefinedInLinkerScript = true;
> + }
> +}
> +
> // This function is called from assignAddresses, while we are
> // fixing the output section addresses. This function is supposed
> // to set the final value for a given symbol assignment.
> @@ -165,13 +199,14 @@
> if (!Cmd->Sym)
> return;
>
> + Defined *Sym = cast<Defined>(Cmd->Sym);
> ExprValue V = Cmd->Expression();
> if (V.isAbsolute()) {
> - Cmd->Sym->Section = nullptr;
> - Cmd->Sym->Value = V.getValue();
> + Sym->Section = nullptr;
> + Sym->Value = V.getValue();
> } else {
> - Cmd->Sym->Section = V.Sec;
> - Cmd->Sym->Value = V.getSectionOffset();
> + Sym->Section = V.Sec;
> + Sym->Value = V.getSectionOffset();
> }
> }
>
> @@ -364,7 +399,8 @@
> for (BaseCommand *Base : SectionCommands) {
> // Handle symbol assignments outside of any output section.
> if (auto *Cmd = dyn_cast<SymbolAssignment>(Base)) {
> - addSymbol(Cmd);
> + if (shouldDefineSym(Cmd))
> + addSymbol(Cmd);
> continue;
> }
>
> @@ -396,7 +432,8 @@
> // ".foo : { ...; bar = .; }". Handle them.
> for (BaseCommand *Base : Sec->SectionCommands)
> if (auto *OutCmd = dyn_cast<SymbolAssignment>(Base))
> - addSymbol(OutCmd);
> + if (shouldDefineSym(OutCmd))
> + addSymbol(OutCmd);
>
> // Handle subalign (e.g. ".foo : SUBALIGN(32) { ... }"). If subalign
> // is given, input sections are aligned to that value, whether the
> Index: ELF/LTO.cpp
> ===================================================================
> --- ELF/LTO.cpp
> +++ ELF/LTO.cpp
> @@ -129,11 +129,6 @@
> std::vector<Symbol *> Syms = F.getSymbols();
> std::vector<lto::SymbolResolution> Resols(Syms.size());
>
> - DenseSet<StringRef> ScriptSymbols;
> - for (BaseCommand *Base : Script->SectionCommands)
> - if (auto *Cmd = dyn_cast<SymbolAssignment>(Base))
> - ScriptSymbols.insert(Cmd->Name);
> -
> // Provide a resolution to the LTO API for each symbol.
> for (const lto::InputFile::Symbol &ObjSym : Obj.symbols()) {
> Symbol *Sym = Syms[SymNum];
> @@ -164,7 +159,7 @@
> // still not final:
> // 1) Aliased (with --defsym) or wrapped (with --wrap) symbols.
> // 2) Symbols redefined in linker script.
> - R.LinkerRedefined = !Sym->CanInline || ScriptSymbols.count(Sym->getName());
> + R.LinkerRedefined = !Sym->CanInline;
> }
> checkError(LTOObj->add(std::move(F.Obj), Resols));
> }
> Index: ELF/Driver.cpp
> ===================================================================
> --- ELF/Driver.cpp
> +++ ELF/Driver.cpp
> @@ -1029,6 +1029,9 @@
> for (StringRef Sym : Script->ReferencedSymbols)
> Symtab->addUndefined<ELFT>(Sym);
>
> + // We want to declare linker script's symbols early, so that can version them.
> + Script->declareSymbols();
> +
> // Handle the `--undefined <sym>` options.
> for (StringRef S : Config->Undefined)
> Symtab->fetchIfLazy<ELFT>(S);
More information about the llvm-commits
mailing list