[llvm] r309009 - [LTO] Prevent dead stripping and internalization of symbols with sections

Rafael Avila de Espindola via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 25 14:56:37 PDT 2017


In lld we have a precise handling of __start/__stop symbols. If you want
I can try to implement the corresponding patch in lld.

Cheers,
Rafael

Teresa Johnson via llvm-commits <llvm-commits at lists.llvm.org> writes:

> Author: tejohnson
> Date: Tue Jul 25 12:42:32 2017
> New Revision: 309009
>
> URL: http://llvm.org/viewvc/llvm-project?rev=309009&view=rev
> Log:
> [LTO] Prevent dead stripping and internalization of symbols with sections
>
> Summary:
> ELF linkers generate __start_<secname> and __stop_<secname> symbols
> when there is a value in a section <secname> where the name is a valid
> C identifier.  If dead stripping determines that the values declared
> in section <secname> are dead, and we then internalize (and delete)
> such a symbol, programs that reference the corresponding start and end
> section symbols will get undefined reference linking errors.
>
> To fix this, add the section name to the IRSymtab entry when a symbol is
> defined in a specific section. Then use this in the gold-plugin to mark
> the symbol as external and visible from outside the summary when the
> section name is a valid C identifier.
>
> Reviewers: pcc
>
> Subscribers: mehdi_amini, inglorion, eraman, llvm-commits
>
> Differential Revision: https://reviews.llvm.org/D35639
>
> Added:
>     llvm/trunk/test/tools/gold/X86/Inputs/global_with_section.ll
>     llvm/trunk/test/tools/gold/X86/global_with_section.ll
> Modified:
>     llvm/trunk/include/llvm/LTO/LTO.h
>     llvm/trunk/include/llvm/Object/IRSymtab.h
>     llvm/trunk/lib/Object/IRSymtab.cpp
>     llvm/trunk/test/Object/X86/irsymtab.ll
>     llvm/trunk/tools/gold/gold-plugin.cpp
>     llvm/trunk/tools/llvm-lto2/llvm-lto2.cpp
>
> Modified: llvm/trunk/include/llvm/LTO/LTO.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/LTO/LTO.h?rev=309009&r1=309008&r2=309009&view=diff
> ==============================================================================
> --- llvm/trunk/include/llvm/LTO/LTO.h (original)
> +++ llvm/trunk/include/llvm/LTO/LTO.h Tue Jul 25 12:42:32 2017
> @@ -126,6 +126,7 @@ public:
>      using irsymtab::Symbol::getCommonSize;
>      using irsymtab::Symbol::getCommonAlignment;
>      using irsymtab::Symbol::getCOFFWeakExternalFallback;
> +    using irsymtab::Symbol::getSectionName;
>      using irsymtab::Symbol::isExecutable;
>    };
>  
>
> Modified: llvm/trunk/include/llvm/Object/IRSymtab.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Object/IRSymtab.h?rev=309009&r1=309008&r2=309009&view=diff
> ==============================================================================
> --- llvm/trunk/include/llvm/Object/IRSymtab.h (original)
> +++ llvm/trunk/include/llvm/Object/IRSymtab.h Tue Jul 25 12:42:32 2017
> @@ -121,6 +121,9 @@ struct Uncommon {
>    /// COFF-specific: the name of the symbol that a weak external resolves to
>    /// if not defined.
>    Str COFFWeakExternFallbackName;
> +
> +  /// Specified section name, if any.
> +  Str SectionName;
>  };
>  
>  struct Header {
> @@ -128,7 +131,7 @@ struct Header {
>    /// when the format changes, but it does not need to be incremented if a
>    /// change to LLVM would cause it to create a different symbol table.
>    Word Version;
> -  enum { kCurrentVersion = 0 };
> +  enum { kCurrentVersion = 1 };
>  
>    /// The producer's version string (LLVM_VERSION_STRING " " LLVM_REVISION).
>    /// Consumers should rebuild the symbol table from IR if the producer's
> @@ -165,6 +168,7 @@ struct Symbol {
>    // Copied from storage::Uncommon.
>    uint32_t CommonSize, CommonAlign;
>    StringRef COFFWeakExternFallbackName;
> +  StringRef SectionName;
>  
>    /// Returns the mangled symbol name.
>    StringRef getName() const { return Name; }
> @@ -215,6 +219,8 @@ struct Symbol {
>      assert(isWeak() && isIndirect());
>      return COFFWeakExternFallbackName;
>    }
> +
> +  StringRef getSectionName() const { return SectionName; }
>  };
>  
>  /// This class can be used to read a Symtab and Strtab produced by
> @@ -300,7 +306,10 @@ class Reader::SymbolRef : public Symbol
>        CommonSize = UncI->CommonSize;
>        CommonAlign = UncI->CommonAlign;
>        COFFWeakExternFallbackName = R->str(UncI->COFFWeakExternFallbackName);
> -    }
> +      SectionName = R->str(UncI->SectionName);
> +    } else
> +      // Reset this field so it can be queried unconditionally for all symbols.
> +      SectionName = "";
>    }
>  
>  public:
>
> Modified: llvm/trunk/lib/Object/IRSymtab.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Object/IRSymtab.cpp?rev=309009&r1=309008&r2=309009&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Object/IRSymtab.cpp (original)
> +++ llvm/trunk/lib/Object/IRSymtab.cpp Tue Jul 25 12:42:32 2017
> @@ -156,6 +156,7 @@ Error Builder::addSymbol(const ModuleSym
>      Unc = &Uncommons.back();
>      *Unc = {};
>      setStr(Unc->COFFWeakExternFallbackName, "");
> +    setStr(Unc->SectionName, "");
>      return *Unc;
>    };
>  
> @@ -240,6 +241,9 @@ Error Builder::addSymbol(const ModuleSym
>      }
>    }
>  
> +  if (!Base->getSection().empty())
> +    setStr(Uncommon().SectionName, Saver.save(Base->getSection()));
> +
>    return Error::success();
>  }
>  
>
> Modified: llvm/trunk/test/Object/X86/irsymtab.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Object/X86/irsymtab.ll?rev=309009&r1=309008&r2=309009&view=diff
> ==============================================================================
> --- llvm/trunk/test/Object/X86/irsymtab.ll (original)
> +++ llvm/trunk/test/Object/X86/irsymtab.ll Tue Jul 25 12:42:32 2017
> @@ -9,13 +9,13 @@
>  
>  ; BCA:      <SYMTAB_BLOCK
>  ; Version stored at offset 0.
> -; BCA-NEXT:   <BLOB abbrevid=4/> blob data = '\x00\x00\x00\x00\x06\x00\x00\x00\x08\x00\x00\x00D\x00\x00\x00\x01\x00\x00\x00P\x00\x00\x00\x00\x00\x00\x00P\x00\x00\x00\x02\x00\x00\x00\x80\x00\x00\x00\x00\x00\x00\x00\x0E\x00\x00\x00\x18\x00\x00\x00&\x00\x00\x00\x0B\x00\x00\x001\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\xFF\xFF\xFF\xFF\x00$\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00\xFF\xFF\xFF\xFF\x08$\x00\x00'
> +; BCA-NEXT:   <BLOB abbrevid=4/> blob data = '\x01\x00\x00\x00\x06\x00\x00\x00\x08\x00\x00\x00D\x00\x00\x00\x01\x00\x00\x00P\x00\x00\x00\x00\x00\x00\x00P\x00\x00\x00\x02\x00\x00\x00\x80\x00\x00\x00\x00\x00\x00\x00\x0E\x00\x00\x00\x18\x00\x00\x00&\x00\x00\x00\x0B\x00\x00\x001\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\xFF\xFF\xFF\xFF\x00$\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00\xFF\xFF\xFF\xFF\x08$\x00\x00'
>  ; BCA-NEXT: </SYMTAB_BLOCK>
>  ; BCA-NEXT: <STRTAB_BLOCK
>  ; BCA-NEXT:   <BLOB abbrevid=4/> blob data = 'foobarproducerx86_64-unknown-linux-gnuirsymtab.ll'
>  ; BCA-NEXT: </STRTAB_BLOCK>
>  
> -; SYMTAB:      version: 0
> +; SYMTAB:      version: 1
>  ; SYMTAB-NEXT: producer: producer
>  ; SYMTAB-NEXT: target triple: x86_64-unknown-linux-gnu
>  ; SYMTAB-NEXT: source filename: irsymtab.ll
>
> Added: llvm/trunk/test/tools/gold/X86/Inputs/global_with_section.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/gold/X86/Inputs/global_with_section.ll?rev=309009&view=auto
> ==============================================================================
> --- llvm/trunk/test/tools/gold/X86/Inputs/global_with_section.ll (added)
> +++ llvm/trunk/test/tools/gold/X86/Inputs/global_with_section.ll Tue Jul 25 12:42:32 2017
> @@ -0,0 +1,10 @@
> +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
> +target triple = "x86_64-unknown-linux-gnu"
> +
> +define void @deadfunc2_called_from_section() {
> +  ret void
> +}
> +
> +define void @deadfunc2_called_from_nonC_section() {
> +  ret void
> +}
>
> Added: llvm/trunk/test/tools/gold/X86/global_with_section.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/gold/X86/global_with_section.ll?rev=309009&view=auto
> ==============================================================================
> --- llvm/trunk/test/tools/gold/X86/global_with_section.ll (added)
> +++ llvm/trunk/test/tools/gold/X86/global_with_section.ll Tue Jul 25 12:42:32 2017
> @@ -0,0 +1,79 @@
> +; Test to ensure we don't internalize or treat as dead a global value
> +; with a valid C identifier section name. Otherwise, ELF linker generation of
> +; __start_"sectionname" and __stop_"sectionname" symbols would not occur and
> +; we can end up with undefined references at link time.
> +
> +; First try RegularLTO
> +; RUN: opt %s -o %t.o
> +; RUN: llvm-lto2 dump-symtab %t.o | FileCheck %s --check-prefix=SYMTAB
> +; RUN: opt %p/Inputs/global_with_section.ll -o %t2.o
> +; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold.so \
> +; RUN:     --plugin-opt=save-temps \
> +; RUN:     -o %t3.o %t.o %t2.o
> +; Check results of internalization
> +; RUN: llvm-dis %t3.o.0.2.internalize.bc -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK2-REGULARLTO
> +
> +; Next try ThinLTO
> +; RUN: opt -module-summary %s -o %t.o
> +; RUN: llvm-lto2 dump-symtab %t.o | FileCheck %s --check-prefix=SYMTAB
> +; RUN: opt -module-summary %p/Inputs/global_with_section.ll -o %t2.o
> +; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold.so \
> +; RUN:     --plugin-opt=thinlto \
> +; RUN:     --plugin-opt=save-temps \
> +; RUN:     -o %t3.o %t.o %t2.o
> +; Check results of internalization
> +; RUN: llvm-dis %t.o.2.internalize.bc -o - | FileCheck %s
> +; RUN: llvm-dis %t2.o.2.internalize.bc -o - | FileCheck %s --check-prefix=CHECK2-THINLTO
> +
> +; SYMTAB: deadfunc_with_section
> +; SYMTAB-NEXT: section some_other_section
> +; SYMTAB-NEXT: deadfunc_with_nonC_section
> +; SYMTAB-NEXT: section .nonCsection
> +; SYMTAB-NEXT: deadfunc2_called_from_section
> +; SYMTAB-NEXT: deadfunc2_called_from_nonC_section
> +; SYMTAB-NEXT: var_with_section
> +; SYMTAB-NEXT: section some_section
> +; SYMTAB-NEXT: var_with_nonC_section
> +; SYMTAB-NEXT: section .nonCsection
> +
> +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
> +target triple = "x86_64-unknown-linux-gnu"
> +
> +; We should not internalize @var_with_section due to section
> +; CHECK-DAG: @var_with_section = global i32 0, section "some_section"
> + at var_with_section = global i32 0, section "some_section"
> +
> +; Confirm via a variable with a non-C identifier section that we are getting
> +; the expected internalization.
> +; CHECK-DAG: @var_with_nonC_section = internal global i32 0, section ".nonCsection"
> + at var_with_nonC_section = global i32 0, section ".nonCsection"
> +
> +; We should not internalize @deadfunc_with_section due to section
> +; CHECK-DAG: define void @deadfunc_with_section() section "some_other_section"
> +define void @deadfunc_with_section() section "some_other_section" {
> +  call void @deadfunc2_called_from_section()
> +  ret void
> +}
> +
> +; Confirm via a function with a non-C identifier section that we are getting
> +; the expected internalization.
> +; CHECK-DAG: define internal void @deadfunc_with_nonC_section() section ".nonCsection"
> +define void @deadfunc_with_nonC_section() section ".nonCsection" {
> +  call void @deadfunc2_called_from_nonC_section()
> +  ret void
> +}
> +
> +; In RegularLTO mode, where we have combined all the IR,
> +; @deadfunc2_called_from_section can be internalized.
> +; CHECK2-REGULARLTO: define internal void @deadfunc2_called_from_section
> +; In ThinLTO mode, we can't internalize it as it needs to be preserved
> +; (due to the access from @deadfunc_with_section which must be preserved), and
> +; can't be internalized since the reference is from a different module.
> +; CHECK2-THINLTO: define void @deadfunc2_called_from_section
> +declare void @deadfunc2_called_from_section()
> +
> +; Confirm when called from a function with a non-C identifier section that we
> +; are getting the expected internalization.
> +; CHECK2-REGULARLTO: define internal void @deadfunc2_called_from_nonC_section
> +; CHECK2-THINLTO: define internal void @deadfunc2_called_from_nonC_section
> +declare void @deadfunc2_called_from_nonC_section()
>
> Modified: llvm/trunk/tools/gold/gold-plugin.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/gold/gold-plugin.cpp?rev=309009&r1=309008&r2=309009&view=diff
> ==============================================================================
> --- llvm/trunk/tools/gold/gold-plugin.cpp (original)
> +++ llvm/trunk/tools/gold/gold-plugin.cpp Tue Jul 25 12:42:32 2017
> @@ -605,6 +605,18 @@ static std::string getThinLTOObjectFileN
>    return NewPath.str() + NewSuffix;
>  }
>  
> +static bool isAlpha(char C) {
> +  return ('a' <= C && C <= 'z') || ('A' <= C && C <= 'Z') || C == '_';
> +}
> +
> +static bool isAlnum(char C) { return isAlpha(C) || ('0' <= C && C <= '9'); }
> +
> +// Returns true if S is valid as a C language identifier.
> +static bool isValidCIdentifier(StringRef S) {
> +  return !S.empty() && isAlpha(S[0]) &&
> +         std::all_of(S.begin() + 1, S.end(), isAlnum);
> +}
> +
>  static void addModule(LTO &Lto, claimed_file &F, const void *View,
>                        StringRef Filename) {
>    MemoryBufferRef BufferRef(StringRef((const char *)View, F.filesize),
> @@ -616,8 +628,12 @@ static void addModule(LTO &Lto, claimed_
>              toString(ObjOrErr.takeError()).c_str());
>  
>    unsigned SymNum = 0;
> +  std::unique_ptr<InputFile> Input = std::move(ObjOrErr.get());
> +  auto InputFileSyms = Input->symbols();
> +  assert(InputFileSyms.size() == F.syms.size());
>    std::vector<SymbolResolution> Resols(F.syms.size());
>    for (ld_plugin_symbol &Sym : F.syms) {
> +    const InputFile::Symbol &InpSym = InputFileSyms[SymNum];
>      SymbolResolution &R = Resols[SymNum++];
>  
>      ld_plugin_symbol_resolution Resolution =
> @@ -653,6 +669,13 @@ static void addModule(LTO &Lto, claimed_
>        break;
>      }
>  
> +    // If the symbol has a C identifier section name, we need to mark
> +    // it as visible to a regular object so that LTO will keep it around
> +    // to ensure the linker generates special __start_<secname> and
> +    // __stop_<secname> symbols which may be used elsewhere.
> +    if (isValidCIdentifier(InpSym.getSectionName()))
> +      R.VisibleToRegularObj = true;
> +
>      if (Resolution != LDPR_RESOLVED_DYN && Resolution != LDPR_UNDEF &&
>          (IsExecutable || !Res.DefaultVisibility))
>        R.FinalDefinitionInLinkageUnit = true;
> @@ -660,7 +683,7 @@ static void addModule(LTO &Lto, claimed_
>      freeSymName(Sym);
>    }
>  
> -  check(Lto.add(std::move(*ObjOrErr), Resols),
> +  check(Lto.add(std::move(Input), Resols),
>          std::string("Failed to link module ") + F.name);
>  }
>  
>
> Modified: llvm/trunk/tools/llvm-lto2/llvm-lto2.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-lto2/llvm-lto2.cpp?rev=309009&r1=309008&r2=309009&view=diff
> ==============================================================================
> --- llvm/trunk/tools/llvm-lto2/llvm-lto2.cpp (original)
> +++ llvm/trunk/tools/llvm-lto2/llvm-lto2.cpp Tue Jul 25 12:42:32 2017
> @@ -355,6 +355,9 @@ static int dumpSymtab(int argc, char **a
>  
>        if (TT.isOSBinFormatCOFF() && Sym.isWeak() && Sym.isIndirect())
>          outs() << "         fallback " << Sym.getCOFFWeakExternalFallback() << '\n';
> +
> +      if (!Sym.getSectionName().empty())
> +        outs() << "         section " << Sym.getSectionName() << "\n";
>      }
>  
>      outs() << '\n';
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits


More information about the llvm-commits mailing list