[llvm] r298158 - [asan] Fix dead stripping of globals on Linux.

Evgeniy Stepanov via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 17 15:17:30 PDT 2017


Author: eugenis
Date: Fri Mar 17 17:17:29 2017
New Revision: 298158

URL: http://llvm.org/viewvc/llvm-project?rev=298158&view=rev
Log:
[asan] Fix dead stripping of globals on Linux.

Use a combination of !associated, comdat, @llvm.compiler.used and
custom sections to allow dead stripping of globals and their asan
metadata. Sometimes.

Currently this works on LLD, which supports SHF_LINK_ORDER with
sh_link pointing to the associated section.

This also works on BFD, which seems to treat comdats as
all-or-nothing with respect to linker GC. There is a weird quirk
where the "first" global in each link is never GC-ed because of the
section symbols.

At this moment it does not work on Gold (as in the globals are never
stripped).

Differential Revision: https://reviews.llvm.org/D30121

Modified:
    llvm/trunk/include/llvm/Transforms/Utils/ModuleUtils.h
    llvm/trunk/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
    llvm/trunk/lib/Transforms/Instrumentation/AddressSanitizer.cpp
    llvm/trunk/lib/Transforms/Utils/ModuleUtils.cpp
    llvm/trunk/test/Instrumentation/AddressSanitizer/global_metadata.ll
    llvm/trunk/test/Instrumentation/AddressSanitizer/global_metadata_darwin.ll
    llvm/trunk/test/Instrumentation/AddressSanitizer/instrument_global.ll

Modified: llvm/trunk/include/llvm/Transforms/Utils/ModuleUtils.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Utils/ModuleUtils.h?rev=298158&r1=298157&r2=298158&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Transforms/Utils/ModuleUtils.h (original)
+++ llvm/trunk/include/llvm/Transforms/Utils/ModuleUtils.h Fri Mar 17 17:17:29 2017
@@ -81,6 +81,17 @@ void appendToCompilerUsed(Module &M, Arr
 void filterDeadComdatFunctions(
     Module &M, SmallVectorImpl<Function *> &DeadComdatFunctions);
 
+/// \brief Produce a unique identifier for this module by taking the MD5 sum of
+/// the names of the module's strong external symbols.
+///
+/// This identifier is normally guaranteed to be unique, or the program would
+/// fail to link due to multiply defined symbols.
+///
+/// If the module has no strong external symbols (such a module may still have a
+/// semantic effect if it performs global initialization), we cannot produce a
+/// unique identifier for this module, so we return the empty string.
+std::string getUniqueModuleId(Module *M);
+
 } // End llvm namespace
 
 #endif //  LLVM_TRANSFORMS_UTILS_MODULEUTILS_H

Modified: llvm/trunk/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp?rev=298158&r1=298157&r2=298158&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp (original)
+++ llvm/trunk/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp Fri Mar 17 17:17:29 2017
@@ -28,51 +28,11 @@
 #include "llvm/Support/ScopedPrinter.h"
 #include "llvm/Transforms/IPO/FunctionAttrs.h"
 #include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
 using namespace llvm;
 
 namespace {
 
-// Produce a unique identifier for this module by taking the MD5 sum of the
-// names of the module's strong external symbols. This identifier is
-// normally guaranteed to be unique, or the program would fail to link due to
-// multiply defined symbols.
-//
-// If the module has no strong external symbols (such a module may still have a
-// semantic effect if it performs global initialization), we cannot produce a
-// unique identifier for this module, so we return the empty string, which
-// causes the entire module to be written as a regular LTO module.
-std::string getModuleId(Module *M) {
-  MD5 Md5;
-  bool ExportsSymbols = false;
-  auto AddGlobal = [&](GlobalValue &GV) {
-    if (GV.isDeclaration() || GV.getName().startswith("llvm.") ||
-        !GV.hasExternalLinkage())
-      return;
-    ExportsSymbols = true;
-    Md5.update(GV.getName());
-    Md5.update(ArrayRef<uint8_t>{0});
-  };
-
-  for (auto &F : *M)
-    AddGlobal(F);
-  for (auto &GV : M->globals())
-    AddGlobal(GV);
-  for (auto &GA : M->aliases())
-    AddGlobal(GA);
-  for (auto &IF : M->ifuncs())
-    AddGlobal(IF);
-
-  if (!ExportsSymbols)
-    return "";
-
-  MD5::MD5Result R;
-  Md5.final(R);
-
-  SmallString<32> Str;
-  MD5::stringifyResult(R, Str);
-  return ("$" + Str).str();
-}
-
 // Promote each local-linkage entity defined by ExportM and used by ImportM by
 // changing visibility and appending the given ModuleId.
 void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId) {
@@ -253,7 +213,7 @@ void forEachVirtualFunction(Constant *C,
 void splitAndWriteThinLTOBitcode(
     raw_ostream &OS, function_ref<AAResults &(Function &)> AARGetter,
     Module &M) {
-  std::string ModuleId = getModuleId(&M);
+  std::string ModuleId = getUniqueModuleId(&M);
   if (ModuleId.empty()) {
     // We couldn't generate a module ID for this module, just write it out as a
     // regular LTO module.

Modified: llvm/trunk/lib/Transforms/Instrumentation/AddressSanitizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Instrumentation/AddressSanitizer.cpp?rev=298158&r1=298157&r2=298158&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Instrumentation/AddressSanitizer.cpp (original)
+++ llvm/trunk/lib/Transforms/Instrumentation/AddressSanitizer.cpp Fri Mar 17 17:17:29 2017
@@ -101,6 +101,10 @@ static const char *const kAsanRegisterIm
   "__asan_register_image_globals";
 static const char *const kAsanUnregisterImageGlobalsName =
   "__asan_unregister_image_globals";
+static const char *const kAsanRegisterElfGlobalsName =
+  "__asan_register_elf_globals";
+static const char *const kAsanUnregisterElfGlobalsName =
+  "__asan_unregister_elf_globals";
 static const char *const kAsanPoisonGlobalsName = "__asan_before_dynamic_init";
 static const char *const kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init";
 static const char *const kAsanInitName = "__asan_init";
@@ -120,8 +124,11 @@ static const char *const kAsanPoisonStac
     "__asan_poison_stack_memory";
 static const char *const kAsanUnpoisonStackMemoryName =
     "__asan_unpoison_stack_memory";
+
+// ASan version script has __asan_* wildcard. Triple underscore prevents a
+// linker (gold) warning about attempting to export a local symbol.
 static const char *const kAsanGlobalsRegisteredFlagName =
-    "__asan_globals_registered";
+    "___asan_globals_registered";
 
 static const char *const kAsanOptionDetectUseAfterReturn =
     "__asan_option_detect_stack_use_after_return";
@@ -612,6 +619,10 @@ private:
   void InstrumentGlobalsCOFF(IRBuilder<> &IRB, Module &M,
                              ArrayRef<GlobalVariable *> ExtendedGlobals,
                              ArrayRef<Constant *> MetadataInitializers);
+  void InstrumentGlobalsELF(IRBuilder<> &IRB, Module &M,
+                            ArrayRef<GlobalVariable *> ExtendedGlobals,
+                            ArrayRef<Constant *> MetadataInitializers,
+                            const std::string &UniqueModuleId);
   void InstrumentGlobalsMachO(IRBuilder<> &IRB, Module &M,
                               ArrayRef<GlobalVariable *> ExtendedGlobals,
                               ArrayRef<Constant *> MetadataInitializers);
@@ -622,7 +633,8 @@ private:
 
   GlobalVariable *CreateMetadataGlobal(Module &M, Constant *Initializer,
                                        StringRef OriginalName);
-  void SetComdatForGlobalMetadata(GlobalVariable *G, GlobalVariable *Metadata);
+  void SetComdatForGlobalMetadata(GlobalVariable *G, GlobalVariable *Metadata,
+                                  StringRef InternalSuffix);
   IRBuilder<> CreateAsanModuleDtor(Module &M);
 
   bool ShouldInstrumentGlobal(GlobalVariable *G);
@@ -647,6 +659,8 @@ private:
   Function *AsanUnregisterGlobals;
   Function *AsanRegisterImageGlobals;
   Function *AsanUnregisterImageGlobals;
+  Function *AsanRegisterElfGlobals;
+  Function *AsanUnregisterElfGlobals;
 };
 
 // Stack poisoning does not play well with exception handling.
@@ -1596,12 +1610,22 @@ void AddressSanitizerModule::initializeC
       checkSanitizerInterfaceFunction(M.getOrInsertFunction(
           kAsanUnregisterImageGlobalsName, IRB.getVoidTy(), IntptrTy, nullptr));
   AsanUnregisterImageGlobals->setLinkage(Function::ExternalLinkage);
+
+  AsanRegisterElfGlobals = checkSanitizerInterfaceFunction(
+      M.getOrInsertFunction(kAsanRegisterElfGlobalsName, IRB.getVoidTy(),
+                            IntptrTy, IntptrTy, IntptrTy, nullptr));
+  AsanRegisterElfGlobals->setLinkage(Function::ExternalLinkage);
+
+  AsanUnregisterElfGlobals = checkSanitizerInterfaceFunction(
+      M.getOrInsertFunction(kAsanUnregisterElfGlobalsName, IRB.getVoidTy(),
+                            IntptrTy, IntptrTy, IntptrTy, nullptr));
+  AsanUnregisterElfGlobals->setLinkage(Function::ExternalLinkage);
 }
 
 // Put the metadata and the instrumented global in the same group. This ensures
 // that the metadata is discarded if the instrumented global is discarded.
 void AddressSanitizerModule::SetComdatForGlobalMetadata(
-    GlobalVariable *G, GlobalVariable *Metadata) {
+    GlobalVariable *G, GlobalVariable *Metadata, StringRef InternalSuffix) {
   Module &M = *G->getParent();
   Comdat *C = G->getComdat();
   if (!C) {
@@ -1611,7 +1635,15 @@ void AddressSanitizerModule::SetComdatFo
       assert(G->hasLocalLinkage());
       G->setName(Twine(kAsanGenPrefix) + "_anon_global");
     }
-    C = M.getOrInsertComdat(G->getName());
+
+    if (!InternalSuffix.empty() && G->hasLocalLinkage()) {
+      std::string Name = G->getName();
+      Name += InternalSuffix;
+      C = M.getOrInsertComdat(Name);
+    } else {
+      C = M.getOrInsertComdat(G->getName());
+    }
+
     // Make this IMAGE_COMDAT_SELECT_NODUPLICATES on COFF.
     if (TargetTriple.isOSBinFormatCOFF())
       C->setSelectionKind(Comdat::NoDuplicates);
@@ -1666,8 +1698,67 @@ void AddressSanitizerModule::InstrumentG
            "global metadata will not be padded appropriately");
     Metadata->setAlignment(SizeOfGlobalStruct);
 
-    SetComdatForGlobalMetadata(G, Metadata);
+    SetComdatForGlobalMetadata(G, Metadata, "");
+  }
+}
+
+void AddressSanitizerModule::InstrumentGlobalsELF(
+    IRBuilder<> &IRB, Module &M, ArrayRef<GlobalVariable *> ExtendedGlobals,
+    ArrayRef<Constant *> MetadataInitializers,
+    const std::string &UniqueModuleId) {
+  assert(ExtendedGlobals.size() == MetadataInitializers.size());
+
+  SmallVector<GlobalValue *, 16> MetadataGlobals(ExtendedGlobals.size());
+  for (size_t i = 0; i < ExtendedGlobals.size(); i++) {
+    GlobalVariable *G = ExtendedGlobals[i];
+    GlobalVariable *Metadata =
+        CreateMetadataGlobal(M, MetadataInitializers[i], G->getName());
+    MDNode *MD = MDNode::get(M.getContext(), ValueAsMetadata::get(G));
+    Metadata->setMetadata(LLVMContext::MD_associated, MD);
+    MetadataGlobals[i] = Metadata;
+
+    SetComdatForGlobalMetadata(G, Metadata, UniqueModuleId);
   }
+
+  // Update llvm.compiler.used, adding the new metadata globals. This is
+  // needed so that during LTO these variables stay alive.
+  if (!MetadataGlobals.empty())
+    appendToCompilerUsed(M, MetadataGlobals);
+
+  // RegisteredFlag serves two purposes. First, we can pass it to dladdr()
+  // to look up the loaded image that contains it. Second, we can store in it
+  // whether registration has already occurred, to prevent duplicate
+  // registration.
+  //
+  // Common linkage ensures that there is only one global per shared library.
+  GlobalVariable *RegisteredFlag = new GlobalVariable(
+      M, IntptrTy, false, GlobalVariable::CommonLinkage,
+      ConstantInt::get(IntptrTy, 0), kAsanGlobalsRegisteredFlagName);
+  RegisteredFlag->setVisibility(GlobalVariable::HiddenVisibility);
+
+  // Create start and stop symbols.
+  GlobalVariable *StartELFMetadata = new GlobalVariable(
+      M, IntptrTy, false, GlobalVariable::ExternalWeakLinkage, nullptr,
+      "__start_" + getGlobalMetadataSection());
+  StartELFMetadata->setVisibility(GlobalVariable::HiddenVisibility);
+  GlobalVariable *StopELFMetadata = new GlobalVariable(
+      M, IntptrTy, false, GlobalVariable::ExternalWeakLinkage, nullptr,
+      "__stop_" + getGlobalMetadataSection());
+  StopELFMetadata->setVisibility(GlobalVariable::HiddenVisibility);
+
+  // Create a call to register the globals with the runtime.
+  IRB.CreateCall(AsanRegisterElfGlobals,
+                 {IRB.CreatePointerCast(RegisteredFlag, IntptrTy),
+                  IRB.CreatePointerCast(StartELFMetadata, IntptrTy),
+                  IRB.CreatePointerCast(StopELFMetadata, IntptrTy)});
+
+  // We also need to unregister globals at the end, e.g., when a shared library
+  // gets closed.
+  IRBuilder<> IRB_Dtor = CreateAsanModuleDtor(M);
+  IRB_Dtor.CreateCall(AsanUnregisterElfGlobals,
+                      {IRB.CreatePointerCast(RegisteredFlag, IntptrTy),
+                       IRB.CreatePointerCast(StartELFMetadata, IntptrTy),
+                       IRB.CreatePointerCast(StopELFMetadata, IntptrTy)});
 }
 
 void AddressSanitizerModule::InstrumentGlobalsMachO(
@@ -1912,7 +2003,12 @@ bool AddressSanitizerModule::InstrumentG
     Initializers[i] = Initializer;
   }
 
-  if (TargetTriple.isOSBinFormatCOFF()) {
+  std::string ELFUniqueModuleId =
+      TargetTriple.isOSBinFormatELF() ? getUniqueModuleId(&M) : "";
+
+  if (!ELFUniqueModuleId.empty()) {
+    InstrumentGlobalsELF(IRB, M, NewGlobals, Initializers, ELFUniqueModuleId);
+  } else if (TargetTriple.isOSBinFormatCOFF()) {
     InstrumentGlobalsCOFF(IRB, M, NewGlobals, Initializers);
   } else if (ShouldUseMachOGlobalsSection()) {
     InstrumentGlobalsMachO(IRB, M, NewGlobals, Initializers);

Modified: llvm/trunk/lib/Transforms/Utils/ModuleUtils.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/ModuleUtils.cpp?rev=298158&r1=298157&r2=298158&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Utils/ModuleUtils.cpp (original)
+++ llvm/trunk/lib/Transforms/Utils/ModuleUtils.cpp Fri Mar 17 17:17:29 2017
@@ -229,3 +229,35 @@ void llvm::filterDeadComdatFunctions(
            ComdatEntriesCovered.end();
   });
 }
+
+std::string llvm::getUniqueModuleId(Module *M) {
+  MD5 Md5;
+  bool ExportsSymbols = false;
+  auto AddGlobal = [&](GlobalValue &GV) {
+    if (GV.isDeclaration() || GV.getName().startswith("llvm.") ||
+        !GV.hasExternalLinkage())
+      return;
+    ExportsSymbols = true;
+    Md5.update(GV.getName());
+    Md5.update(ArrayRef<uint8_t>{0});
+  };
+
+  for (auto &F : *M)
+    AddGlobal(F);
+  for (auto &GV : M->globals())
+    AddGlobal(GV);
+  for (auto &GA : M->aliases())
+    AddGlobal(GA);
+  for (auto &IF : M->ifuncs())
+    AddGlobal(IF);
+
+  if (!ExportsSymbols)
+    return "";
+
+  MD5::MD5Result R;
+  Md5.final(R);
+
+  SmallString<32> Str;
+  MD5::stringifyResult(R, Str);
+  return ("$" + Str).str();
+}

Modified: llvm/trunk/test/Instrumentation/AddressSanitizer/global_metadata.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Instrumentation/AddressSanitizer/global_metadata.ll?rev=298158&r1=298157&r2=298158&view=diff
==============================================================================
--- llvm/trunk/test/Instrumentation/AddressSanitizer/global_metadata.ll (original)
+++ llvm/trunk/test/Instrumentation/AddressSanitizer/global_metadata.ll Fri Mar 17 17:17:29 2017
@@ -12,17 +12,23 @@ target triple = "x86_64-unknown-linux-gn
 @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @_GLOBAL__sub_I_asan_globals.cpp, i8* null }]
 
 ; Check that globals were instrumented:
-; CHECK: @global = global { i32, [60 x i8] } zeroinitializer, align 32
-; CHECK: @.str = internal unnamed_addr constant { [14 x i8], [50 x i8] } { [14 x i8] c"Hello, world!\00", [50 x i8] zeroinitializer }, align 32
+
+; CHECK: @global = global { i32, [60 x i8] } zeroinitializer, comdat, align 32
+; CHECK: @.str = internal unnamed_addr constant { [14 x i8], [50 x i8] } { [14 x i8] c"Hello, world!\00", [50 x i8] zeroinitializer }, comdat($".str${{[01-9a-f]+}}"), align 32
 
 ; Check emitted location descriptions:
 ; CHECK: [[VARNAME:@__asan_gen_.[0-9]+]] = private unnamed_addr constant [7 x i8] c"global\00", align 1
 ; CHECK: [[FILENAME:@__asan_gen_.[0-9]+]] = private unnamed_addr constant [22 x i8] c"/tmp/asan-globals.cpp\00", align 1
 ; CHECK: [[LOCDESCR:@__asan_gen_.[0-9]+]] = private unnamed_addr constant { [22 x i8]*, i32, i32 } { [22 x i8]* [[FILENAME]], i32 5, i32 5 }
+; CHECK: @__asan_global_global = {{.*}}i64 ptrtoint ({ i32, [60 x i8] }* @global to i64){{.*}} section "asan_globals"{{.*}}, !associated
+; CHECK: @__asan_global_.str = {{.*}}i64 ptrtoint ({ [14 x i8], [50 x i8] }* @.str to i64){{.*}} section "asan_globals"{{.*}}, !associated
+
+; The metadata has to be inserted to llvm.compiler.used to avoid being stripped
+; during LTO.
+; CHECK: @llvm.compiler.used {{.*}} @__asan_global_global {{.*}} section "llvm.metadata"
 
 ; Check that location descriptors and global names were passed into __asan_register_globals:
-; CHECK: i64 ptrtoint ([7 x i8]* [[VARNAME]] to i64)
-; CHECK: i64 ptrtoint ({ [22 x i8]*, i32, i32 }* [[LOCDESCR]] to i64)
+; CHECK: call void @__asan_register_elf_globals(i64 ptrtoint (i64* @___asan_globals_registered to i64), i64 ptrtoint (i64* @__start_asan_globals to i64), i64 ptrtoint (i64* @__stop_asan_globals to i64))
 
 ; Function Attrs: nounwind sanitize_address
 define internal void @__cxx_global_var_init() #0 section ".text.startup" {

Modified: llvm/trunk/test/Instrumentation/AddressSanitizer/global_metadata_darwin.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Instrumentation/AddressSanitizer/global_metadata_darwin.ll?rev=298158&r1=298157&r2=298158&view=diff
==============================================================================
--- llvm/trunk/test/Instrumentation/AddressSanitizer/global_metadata_darwin.ll (original)
+++ llvm/trunk/test/Instrumentation/AddressSanitizer/global_metadata_darwin.ll Fri Mar 17 17:17:29 2017
@@ -26,16 +26,16 @@ target triple = "x86_64-apple-macosx10.1
 ; CHECK: @llvm.compiler.used {{.*}} @__asan_binder_global {{.*}} section "llvm.metadata"
 
 ; Test that there is the flag global variable:
-; CHECK: @__asan_globals_registered = common hidden global i64 0
+; CHECK: @___asan_globals_registered = common hidden global i64 0
 
 ; Test that __asan_register_image_globals is invoked from the constructor:
 ; CHECK-LABEL: define internal void @asan.module_ctor
 ; CHECK-NOT: ret
-; CHECK: call void @__asan_register_image_globals(i64 ptrtoint (i64* @__asan_globals_registered to i64))
+; CHECK: call void @__asan_register_image_globals(i64 ptrtoint (i64* @___asan_globals_registered to i64))
 ; CHECK: ret
 
 ; Test that __asan_unregister_image_globals is invoked from the destructor:
 ; CHECK-LABEL: define internal void @asan.module_dtor
 ; CHECK-NOT: ret
-; CHECK: call void @__asan_unregister_image_globals(i64 ptrtoint (i64* @__asan_globals_registered to i64))
+; CHECK: call void @__asan_unregister_image_globals(i64 ptrtoint (i64* @___asan_globals_registered to i64))
 ; CHECK: ret

Modified: llvm/trunk/test/Instrumentation/AddressSanitizer/instrument_global.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Instrumentation/AddressSanitizer/instrument_global.ll?rev=298158&r1=298157&r2=298158&view=diff
==============================================================================
--- llvm/trunk/test/Instrumentation/AddressSanitizer/instrument_global.ll (original)
+++ llvm/trunk/test/Instrumentation/AddressSanitizer/instrument_global.ll Fri Mar 17 17:17:29 2017
@@ -73,10 +73,10 @@ entry:
 
 ; CHECK-LABEL: define internal void @asan.module_ctor
 ; CHECK-NOT: ret
-; CHECK: call void @__asan_register_globals
+; CHECK: call void @__asan_register_elf_globals
 ; CHECK: ret
 
 ; CHECK-LABEL: define internal void @asan.module_dtor
 ; CHECK-NOT: ret
-; CHECK: call void @__asan_unregister_globals
+; CHECK: call void @__asan_unregister_elf_globals
 ; CHECK: ret




More information about the llvm-commits mailing list