[llvm] r309009 - [LTO] Prevent dead stripping and internalization of symbols with sections

Teresa Johnson via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 25 12:42:32 PDT 2017


Author: tejohnson
Date: Tue Jul 25 12:42:32 2017
New Revision: 309009

URL: http://llvm.org/viewvc/llvm-project?rev=309009&view=rev
Log:
[LTO] Prevent dead stripping and internalization of symbols with sections

Summary:
ELF linkers generate __start_<secname> and __stop_<secname> symbols
when there is a value in a section <secname> where the name is a valid
C identifier.  If dead stripping determines that the values declared
in section <secname> are dead, and we then internalize (and delete)
such a symbol, programs that reference the corresponding start and end
section symbols will get undefined reference linking errors.

To fix this, add the section name to the IRSymtab entry when a symbol is
defined in a specific section. Then use this in the gold-plugin to mark
the symbol as external and visible from outside the summary when the
section name is a valid C identifier.

Reviewers: pcc

Subscribers: mehdi_amini, inglorion, eraman, llvm-commits

Differential Revision: https://reviews.llvm.org/D35639

Added:
    llvm/trunk/test/tools/gold/X86/Inputs/global_with_section.ll
    llvm/trunk/test/tools/gold/X86/global_with_section.ll
Modified:
    llvm/trunk/include/llvm/LTO/LTO.h
    llvm/trunk/include/llvm/Object/IRSymtab.h
    llvm/trunk/lib/Object/IRSymtab.cpp
    llvm/trunk/test/Object/X86/irsymtab.ll
    llvm/trunk/tools/gold/gold-plugin.cpp
    llvm/trunk/tools/llvm-lto2/llvm-lto2.cpp

Modified: llvm/trunk/include/llvm/LTO/LTO.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/LTO/LTO.h?rev=309009&r1=309008&r2=309009&view=diff
==============================================================================
--- llvm/trunk/include/llvm/LTO/LTO.h (original)
+++ llvm/trunk/include/llvm/LTO/LTO.h Tue Jul 25 12:42:32 2017
@@ -126,6 +126,7 @@ public:
     using irsymtab::Symbol::getCommonSize;
     using irsymtab::Symbol::getCommonAlignment;
     using irsymtab::Symbol::getCOFFWeakExternalFallback;
+    using irsymtab::Symbol::getSectionName;
     using irsymtab::Symbol::isExecutable;
   };
 

Modified: llvm/trunk/include/llvm/Object/IRSymtab.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Object/IRSymtab.h?rev=309009&r1=309008&r2=309009&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Object/IRSymtab.h (original)
+++ llvm/trunk/include/llvm/Object/IRSymtab.h Tue Jul 25 12:42:32 2017
@@ -121,6 +121,9 @@ struct Uncommon {
   /// COFF-specific: the name of the symbol that a weak external resolves to
   /// if not defined.
   Str COFFWeakExternFallbackName;
+
+  /// Specified section name, if any.
+  Str SectionName;
 };
 
 struct Header {
@@ -128,7 +131,7 @@ struct Header {
   /// when the format changes, but it does not need to be incremented if a
   /// change to LLVM would cause it to create a different symbol table.
   Word Version;
-  enum { kCurrentVersion = 0 };
+  enum { kCurrentVersion = 1 };
 
   /// The producer's version string (LLVM_VERSION_STRING " " LLVM_REVISION).
   /// Consumers should rebuild the symbol table from IR if the producer's
@@ -165,6 +168,7 @@ struct Symbol {
   // Copied from storage::Uncommon.
   uint32_t CommonSize, CommonAlign;
   StringRef COFFWeakExternFallbackName;
+  StringRef SectionName;
 
   /// Returns the mangled symbol name.
   StringRef getName() const { return Name; }
@@ -215,6 +219,8 @@ struct Symbol {
     assert(isWeak() && isIndirect());
     return COFFWeakExternFallbackName;
   }
+
+  StringRef getSectionName() const { return SectionName; }
 };
 
 /// This class can be used to read a Symtab and Strtab produced by
@@ -300,7 +306,10 @@ class Reader::SymbolRef : public Symbol
       CommonSize = UncI->CommonSize;
       CommonAlign = UncI->CommonAlign;
       COFFWeakExternFallbackName = R->str(UncI->COFFWeakExternFallbackName);
-    }
+      SectionName = R->str(UncI->SectionName);
+    } else
+      // Reset this field so it can be queried unconditionally for all symbols.
+      SectionName = "";
   }
 
 public:

Modified: llvm/trunk/lib/Object/IRSymtab.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Object/IRSymtab.cpp?rev=309009&r1=309008&r2=309009&view=diff
==============================================================================
--- llvm/trunk/lib/Object/IRSymtab.cpp (original)
+++ llvm/trunk/lib/Object/IRSymtab.cpp Tue Jul 25 12:42:32 2017
@@ -156,6 +156,7 @@ Error Builder::addSymbol(const ModuleSym
     Unc = &Uncommons.back();
     *Unc = {};
     setStr(Unc->COFFWeakExternFallbackName, "");
+    setStr(Unc->SectionName, "");
     return *Unc;
   };
 
@@ -240,6 +241,9 @@ Error Builder::addSymbol(const ModuleSym
     }
   }
 
+  if (!Base->getSection().empty())
+    setStr(Uncommon().SectionName, Saver.save(Base->getSection()));
+
   return Error::success();
 }
 

Modified: llvm/trunk/test/Object/X86/irsymtab.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Object/X86/irsymtab.ll?rev=309009&r1=309008&r2=309009&view=diff
==============================================================================
--- llvm/trunk/test/Object/X86/irsymtab.ll (original)
+++ llvm/trunk/test/Object/X86/irsymtab.ll Tue Jul 25 12:42:32 2017
@@ -9,13 +9,13 @@
 
 ; BCA:      <SYMTAB_BLOCK
 ; Version stored at offset 0.
-; BCA-NEXT:   <BLOB abbrevid=4/> blob data = '\x00\x00\x00\x00\x06\x00\x00\x00\x08\x00\x00\x00D\x00\x00\x00\x01\x00\x00\x00P\x00\x00\x00\x00\x00\x00\x00P\x00\x00\x00\x02\x00\x00\x00\x80\x00\x00\x00\x00\x00\x00\x00\x0E\x00\x00\x00\x18\x00\x00\x00&\x00\x00\x00\x0B\x00\x00\x001\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\xFF\xFF\xFF\xFF\x00$\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00\xFF\xFF\xFF\xFF\x08$\x00\x00'
+; BCA-NEXT:   <BLOB abbrevid=4/> blob data = '\x01\x00\x00\x00\x06\x00\x00\x00\x08\x00\x00\x00D\x00\x00\x00\x01\x00\x00\x00P\x00\x00\x00\x00\x00\x00\x00P\x00\x00\x00\x02\x00\x00\x00\x80\x00\x00\x00\x00\x00\x00\x00\x0E\x00\x00\x00\x18\x00\x00\x00&\x00\x00\x00\x0B\x00\x00\x001\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\xFF\xFF\xFF\xFF\x00$\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00\xFF\xFF\xFF\xFF\x08$\x00\x00'
 ; BCA-NEXT: </SYMTAB_BLOCK>
 ; BCA-NEXT: <STRTAB_BLOCK
 ; BCA-NEXT:   <BLOB abbrevid=4/> blob data = 'foobarproducerx86_64-unknown-linux-gnuirsymtab.ll'
 ; BCA-NEXT: </STRTAB_BLOCK>
 
-; SYMTAB:      version: 0
+; SYMTAB:      version: 1
 ; SYMTAB-NEXT: producer: producer
 ; SYMTAB-NEXT: target triple: x86_64-unknown-linux-gnu
 ; SYMTAB-NEXT: source filename: irsymtab.ll

Added: llvm/trunk/test/tools/gold/X86/Inputs/global_with_section.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/gold/X86/Inputs/global_with_section.ll?rev=309009&view=auto
==============================================================================
--- llvm/trunk/test/tools/gold/X86/Inputs/global_with_section.ll (added)
+++ llvm/trunk/test/tools/gold/X86/Inputs/global_with_section.ll Tue Jul 25 12:42:32 2017
@@ -0,0 +1,10 @@
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @deadfunc2_called_from_section() {
+  ret void
+}
+
+define void @deadfunc2_called_from_nonC_section() {
+  ret void
+}

Added: llvm/trunk/test/tools/gold/X86/global_with_section.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/gold/X86/global_with_section.ll?rev=309009&view=auto
==============================================================================
--- llvm/trunk/test/tools/gold/X86/global_with_section.ll (added)
+++ llvm/trunk/test/tools/gold/X86/global_with_section.ll Tue Jul 25 12:42:32 2017
@@ -0,0 +1,79 @@
+; Test to ensure we don't internalize or treat as dead a global value
+; with a valid C identifier section name. Otherwise, ELF linker generation of
+; __start_"sectionname" and __stop_"sectionname" symbols would not occur and
+; we can end up with undefined references at link time.
+
+; First try RegularLTO
+; RUN: opt %s -o %t.o
+; RUN: llvm-lto2 dump-symtab %t.o | FileCheck %s --check-prefix=SYMTAB
+; RUN: opt %p/Inputs/global_with_section.ll -o %t2.o
+; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold.so \
+; RUN:     --plugin-opt=save-temps \
+; RUN:     -o %t3.o %t.o %t2.o
+; Check results of internalization
+; RUN: llvm-dis %t3.o.0.2.internalize.bc -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK2-REGULARLTO
+
+; Next try ThinLTO
+; RUN: opt -module-summary %s -o %t.o
+; RUN: llvm-lto2 dump-symtab %t.o | FileCheck %s --check-prefix=SYMTAB
+; RUN: opt -module-summary %p/Inputs/global_with_section.ll -o %t2.o
+; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold.so \
+; RUN:     --plugin-opt=thinlto \
+; RUN:     --plugin-opt=save-temps \
+; RUN:     -o %t3.o %t.o %t2.o
+; Check results of internalization
+; RUN: llvm-dis %t.o.2.internalize.bc -o - | FileCheck %s
+; RUN: llvm-dis %t2.o.2.internalize.bc -o - | FileCheck %s --check-prefix=CHECK2-THINLTO
+
+; SYMTAB: deadfunc_with_section
+; SYMTAB-NEXT: section some_other_section
+; SYMTAB-NEXT: deadfunc_with_nonC_section
+; SYMTAB-NEXT: section .nonCsection
+; SYMTAB-NEXT: deadfunc2_called_from_section
+; SYMTAB-NEXT: deadfunc2_called_from_nonC_section
+; SYMTAB-NEXT: var_with_section
+; SYMTAB-NEXT: section some_section
+; SYMTAB-NEXT: var_with_nonC_section
+; SYMTAB-NEXT: section .nonCsection
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; We should not internalize @var_with_section due to section
+; CHECK-DAG: @var_with_section = global i32 0, section "some_section"
+ at var_with_section = global i32 0, section "some_section"
+
+; Confirm via a variable with a non-C identifier section that we are getting
+; the expected internalization.
+; CHECK-DAG: @var_with_nonC_section = internal global i32 0, section ".nonCsection"
+ at var_with_nonC_section = global i32 0, section ".nonCsection"
+
+; We should not internalize @deadfunc_with_section due to section
+; CHECK-DAG: define void @deadfunc_with_section() section "some_other_section"
+define void @deadfunc_with_section() section "some_other_section" {
+  call void @deadfunc2_called_from_section()
+  ret void
+}
+
+; Confirm via a function with a non-C identifier section that we are getting
+; the expected internalization.
+; CHECK-DAG: define internal void @deadfunc_with_nonC_section() section ".nonCsection"
+define void @deadfunc_with_nonC_section() section ".nonCsection" {
+  call void @deadfunc2_called_from_nonC_section()
+  ret void
+}
+
+; In RegularLTO mode, where we have combined all the IR,
+; @deadfunc2_called_from_section can be internalized.
+; CHECK2-REGULARLTO: define internal void @deadfunc2_called_from_section
+; In ThinLTO mode, we can't internalize it as it needs to be preserved
+; (due to the access from @deadfunc_with_section which must be preserved), and
+; can't be internalized since the reference is from a different module.
+; CHECK2-THINLTO: define void @deadfunc2_called_from_section
+declare void @deadfunc2_called_from_section()
+
+; Confirm when called from a function with a non-C identifier section that we
+; are getting the expected internalization.
+; CHECK2-REGULARLTO: define internal void @deadfunc2_called_from_nonC_section
+; CHECK2-THINLTO: define internal void @deadfunc2_called_from_nonC_section
+declare void @deadfunc2_called_from_nonC_section()

Modified: llvm/trunk/tools/gold/gold-plugin.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/gold/gold-plugin.cpp?rev=309009&r1=309008&r2=309009&view=diff
==============================================================================
--- llvm/trunk/tools/gold/gold-plugin.cpp (original)
+++ llvm/trunk/tools/gold/gold-plugin.cpp Tue Jul 25 12:42:32 2017
@@ -605,6 +605,18 @@ static std::string getThinLTOObjectFileN
   return NewPath.str() + NewSuffix;
 }
 
+static bool isAlpha(char C) {
+  return ('a' <= C && C <= 'z') || ('A' <= C && C <= 'Z') || C == '_';
+}
+
+static bool isAlnum(char C) { return isAlpha(C) || ('0' <= C && C <= '9'); }
+
+// Returns true if S is valid as a C language identifier.
+static bool isValidCIdentifier(StringRef S) {
+  return !S.empty() && isAlpha(S[0]) &&
+         std::all_of(S.begin() + 1, S.end(), isAlnum);
+}
+
 static void addModule(LTO &Lto, claimed_file &F, const void *View,
                       StringRef Filename) {
   MemoryBufferRef BufferRef(StringRef((const char *)View, F.filesize),
@@ -616,8 +628,12 @@ static void addModule(LTO &Lto, claimed_
             toString(ObjOrErr.takeError()).c_str());
 
   unsigned SymNum = 0;
+  std::unique_ptr<InputFile> Input = std::move(ObjOrErr.get());
+  auto InputFileSyms = Input->symbols();
+  assert(InputFileSyms.size() == F.syms.size());
   std::vector<SymbolResolution> Resols(F.syms.size());
   for (ld_plugin_symbol &Sym : F.syms) {
+    const InputFile::Symbol &InpSym = InputFileSyms[SymNum];
     SymbolResolution &R = Resols[SymNum++];
 
     ld_plugin_symbol_resolution Resolution =
@@ -653,6 +669,13 @@ static void addModule(LTO &Lto, claimed_
       break;
     }
 
+    // If the symbol has a C identifier section name, we need to mark
+    // it as visible to a regular object so that LTO will keep it around
+    // to ensure the linker generates special __start_<secname> and
+    // __stop_<secname> symbols which may be used elsewhere.
+    if (isValidCIdentifier(InpSym.getSectionName()))
+      R.VisibleToRegularObj = true;
+
     if (Resolution != LDPR_RESOLVED_DYN && Resolution != LDPR_UNDEF &&
         (IsExecutable || !Res.DefaultVisibility))
       R.FinalDefinitionInLinkageUnit = true;
@@ -660,7 +683,7 @@ static void addModule(LTO &Lto, claimed_
     freeSymName(Sym);
   }
 
-  check(Lto.add(std::move(*ObjOrErr), Resols),
+  check(Lto.add(std::move(Input), Resols),
         std::string("Failed to link module ") + F.name);
 }
 

Modified: llvm/trunk/tools/llvm-lto2/llvm-lto2.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-lto2/llvm-lto2.cpp?rev=309009&r1=309008&r2=309009&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-lto2/llvm-lto2.cpp (original)
+++ llvm/trunk/tools/llvm-lto2/llvm-lto2.cpp Tue Jul 25 12:42:32 2017
@@ -355,6 +355,9 @@ static int dumpSymtab(int argc, char **a
 
       if (TT.isOSBinFormatCOFF() && Sym.isWeak() && Sym.isIndirect())
         outs() << "         fallback " << Sym.getCOFFWeakExternalFallback() << '\n';
+
+      if (!Sym.getSectionName().empty())
+        outs() << "         section " << Sym.getSectionName() << "\n";
     }
 
     outs() << '\n';




More information about the llvm-commits mailing list