[llvm-commits] [llvm] r162268 - in /llvm/trunk: lib/Transforms/Instrumentation/AddressSanitizer.cpp test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll

Kostya Serebryany kcc at google.com
Tue Aug 21 01:24:26 PDT 2012


Author: kcc
Date: Tue Aug 21 03:24:25 2012
New Revision: 162268

URL: http://llvm.org/viewvc/llvm-project?rev=162268&view=rev
Log:
[asan] add code to detect global initialization fiasco in C/C++. The sub-pass is off by default for now. Patch by Reid Watson. Note: this patch changes the interface between LLVM and compiler-rt parts of asan. The corresponding patch to compiler-rt will follow.

Added:
    llvm/trunk/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll
Modified:
    llvm/trunk/lib/Transforms/Instrumentation/AddressSanitizer.cpp

Modified: llvm/trunk/lib/Transforms/Instrumentation/AddressSanitizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Instrumentation/AddressSanitizer.cpp?rev=162268&r1=162267&r2=162268&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Instrumentation/AddressSanitizer.cpp (original)
+++ llvm/trunk/lib/Transforms/Instrumentation/AddressSanitizer.cpp Tue Aug 21 03:24:25 2012
@@ -61,6 +61,8 @@
 static const char *kAsanReportErrorTemplate = "__asan_report_";
 static const char *kAsanRegisterGlobalsName = "__asan_register_globals";
 static const char *kAsanUnregisterGlobalsName = "__asan_unregister_globals";
+static const char *kAsanPoisonGlobalsName = "__asan_before_dynamic_init";
+static const char *kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init";
 static const char *kAsanInitName = "__asan_init";
 static const char *kAsanHandleNoReturnName = "__asan_handle_no_return";
 static const char *kAsanMappingOffsetName = "__asan_mapping_offset";
@@ -106,6 +108,8 @@
 // This flag may need to be replaced with -f[no]asan-globals.
 static cl::opt<bool> ClGlobals("asan-globals",
        cl::desc("Handle global objects"), cl::Hidden, cl::init(true));
+static cl::opt<bool> ClInitializers("asan-initialization-order",
+       cl::desc("Handle C++ initializer order"), cl::Hidden, cl::init(false));
 static cl::opt<bool> ClMemIntrin("asan-memintrin",
        cl::desc("Handle memset/memcpy/memmove"), cl::Hidden, cl::init(true));
 // This flag may need to be replaced with -fasan-blacklist.
@@ -171,6 +175,8 @@
                                    Instruction *InsertBefore, bool IsWrite);
   Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
   bool handleFunction(Module &M, Function &F);
+  void createInitializerPoisonCalls(Module &M,
+                                    Value *FirstAddr, Value *LastAddr);
   bool maybeInsertAsanInitAtFunctionEntry(Function &F);
   bool poisonStackInFunction(Module &M, Function &F);
   virtual bool runOnModule(Module &M);
@@ -178,7 +184,6 @@
   static char ID;  // Pass identification, replacement for typeid
 
  private:
-
   uint64_t getAllocaSizeInBytes(AllocaInst *AI) {
     Type *Ty = AI->getAllocatedType();
     uint64_t SizeInBytes = TD->getTypeAllocSize(Ty);
@@ -194,9 +199,12 @@
   }
 
   Function *checkInterfaceFunction(Constant *FuncOrBitcast);
+  bool ShouldInstrumentGlobal(GlobalVariable *G);
   void PoisonStack(const ArrayRef<AllocaInst*> &AllocaVec, IRBuilder<> IRB,
                    Value *ShadowBase, bool DoPoison);
   bool LooksLikeCodeInBug11395(Instruction *I);
+  void FindDynamicInitializers(Module &M);
+  bool HasDynamicInitializer(GlobalVariable *G);
 
   LLVMContext *C;
   TargetData *TD;
@@ -213,6 +221,7 @@
   // This array is indexed by AccessIsWrite and log2(AccessSize).
   Function *AsanErrorCallback[2][kNumberOfAccessSizes];
   InlineAsm *EmptyAsm;
+  SmallSet<GlobalValue*, 32> DynamicallyInitializedGlobals;
 };
 
 }  // namespace
@@ -358,14 +367,50 @@
   return NULL;
 }
 
+void AddressSanitizer::FindDynamicInitializers(Module& M) {
+  // Clang generates metadata identifying all dynamically initialized globals.
+  NamedMDNode *DynamicGlobals =
+      M.getNamedMetadata("llvm.asan.dynamically_initialized_globals");
+  if (!DynamicGlobals)
+    return;
+  for (int i = 0, n = DynamicGlobals->getNumOperands(); i < n; ++i) {
+    MDNode *MDN = DynamicGlobals->getOperand(i);
+    assert(MDN->getNumOperands() == 1);
+    Value *VG = MDN->getOperand(0);
+    // The optimizer may optimize away a global entirely, in which case we
+    // cannot instrument access to it.
+    if (!VG)
+      continue;
+
+    GlobalVariable *G = cast<GlobalVariable>(VG);
+    DynamicallyInitializedGlobals.insert(G);
+  }
+}
+// Returns true if a global variable is initialized dynamically in this TU.
+bool AddressSanitizer::HasDynamicInitializer(GlobalVariable *G) {
+  return DynamicallyInitializedGlobals.count(G);
+}
+
 void AddressSanitizer::instrumentMop(AsanFunctionContext &AFC, Instruction *I) {
   bool IsWrite;
   Value *Addr = isInterestingMemoryAccess(I, &IsWrite);
   assert(Addr);
-  if (ClOpt && ClOptGlobals && isa<GlobalVariable>(Addr)) {
-    // We are accessing a global scalar variable. Nothing to catch here.
-    return;
+  if (ClOpt && ClOptGlobals) {
+    if (GlobalVariable *G = dyn_cast<GlobalVariable>(Addr)) {
+      // If initialization order checking is disabled, a simple access to a
+      // dynamically initialized global is always valid.
+      if (!ClInitializers)
+        return;
+      // If a global variable does not have dynamic initialization we don't
+      // have to instrument it.  However, if a global has external linkage, we
+      // assume it has dynamic initialization, as it may have an initializer
+      // in a different TU.
+      if (G->getLinkage() != GlobalVariable::ExternalLinkage &&
+          !HasDynamicInitializer(G))
+        return;
+    }
   }
+
   Type *OrigPtrTy = Addr->getType();
   Type *OrigTy = cast<PointerType>(OrigPtrTy)->getElementType();
 
@@ -462,68 +507,106 @@
   Crash->setDebugLoc(OrigIns->getDebugLoc());
 }
 
-// This function replaces all global variables with new variables that have
-// trailing redzones. It also creates a function that poisons
-// redzones and inserts this function into llvm.global_ctors.
-bool AddressSanitizer::insertGlobalRedzones(Module &M) {
-  SmallVector<GlobalVariable *, 16> GlobalsToChange;
+void AddressSanitizer::createInitializerPoisonCalls(Module &M,
+                                                    Value *FirstAddr,
+                                                    Value *LastAddr) {
+  // We do all of our poisoning and unpoisoning within _GLOBAL__I_a.
+  Function *GlobalInit = M.getFunction("_GLOBAL__I_a");
+  // If that function is not present, this TU contains no globals, or they have
+  // all been optimized away
+  if (!GlobalInit)
+    return;
 
-  for (Module::GlobalListType::iterator G = M.getGlobalList().begin(),
-       E = M.getGlobalList().end(); G != E; ++G) {
-    Type *Ty = cast<PointerType>(G->getType())->getElementType();
-    DEBUG(dbgs() << "GLOBAL: " << *G);
-
-    if (!Ty->isSized()) continue;
-    if (!G->hasInitializer()) continue;
-    // Touch only those globals that will not be defined in other modules.
-    // Don't handle ODR type linkages since other modules may be built w/o asan.
-    if (G->getLinkage() != GlobalVariable::ExternalLinkage &&
-        G->getLinkage() != GlobalVariable::PrivateLinkage &&
-        G->getLinkage() != GlobalVariable::InternalLinkage)
-      continue;
-    // Two problems with thread-locals:
-    //   - The address of the main thread's copy can't be computed at link-time.
-    //   - Need to poison all copies, not just the main thread's one.
-    if (G->isThreadLocal())
-      continue;
-    // For now, just ignore this Alloca if the alignment is large.
-    if (G->getAlignment() > RedzoneSize) continue;
+  // Set up the arguments to our poison/unpoison functions.
+  IRBuilder<> IRB(GlobalInit->begin()->getFirstInsertionPt());
 
-    // Ignore all the globals with the names starting with "\01L_OBJC_".
-    // Many of those are put into the .cstring section. The linker compresses
-    // that section by removing the spare \0s after the string terminator, so
-    // our redzones get broken.
-    if ((G->getName().find("\01L_OBJC_") == 0) ||
-        (G->getName().find("\01l_OBJC_") == 0)) {
-      DEBUG(dbgs() << "Ignoring \\01L_OBJC_* global: " << *G);
-      continue;
+  // Declare our poisoning and unpoisoning functions.
+  Function *AsanPoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction(
+      kAsanPoisonGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
+  AsanPoisonGlobals->setLinkage(Function::ExternalLinkage);
+  Function *AsanUnpoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction(
+      kAsanUnpoisonGlobalsName, IRB.getVoidTy(), NULL));
+  AsanUnpoisonGlobals->setLinkage(Function::ExternalLinkage);
+
+  // Add a call to poison all external globals before the given function starts.
+  IRB.CreateCall2(AsanPoisonGlobals, FirstAddr, LastAddr);
+
+  // Add calls to unpoison all globals before each return instruction.
+  for (Function::iterator I = GlobalInit->begin(), E = GlobalInit->end();
+      I != E; ++I) {
+    if (ReturnInst *RI = dyn_cast<ReturnInst>(I->getTerminator())) {
+      CallInst::Create(AsanUnpoisonGlobals, "", RI);
     }
+  }
+}
 
-    if (G->hasSection()) {
-      StringRef Section(G->getSection());
-      // Ignore the globals from the __OBJC section. The ObjC runtime assumes
-      // those conform to /usr/lib/objc/runtime.h, so we can't add redzones to
-      // them.
-      if ((Section.find("__OBJC,") == 0) ||
-          (Section.find("__DATA, __objc_") == 0)) {
-        DEBUG(dbgs() << "Ignoring ObjC runtime global: " << *G);
-        continue;
-      }
-      // See http://code.google.com/p/address-sanitizer/issues/detail?id=32
-      // Constant CFString instances are compiled in the following way:
-      //  -- the string buffer is emitted into
-      //     __TEXT,__cstring,cstring_literals
-      //  -- the constant NSConstantString structure referencing that buffer
-      //     is placed into __DATA,__cfstring
-      // Therefore there's no point in placing redzones into __DATA,__cfstring.
-      // Moreover, it causes the linker to crash on OS X 10.7
-      if (Section.find("__DATA,__cfstring") == 0) {
-        DEBUG(dbgs() << "Ignoring CFString: " << *G);
-        continue;
-      }
+bool AddressSanitizer::ShouldInstrumentGlobal(GlobalVariable *G) {
+  Type *Ty = cast<PointerType>(G->getType())->getElementType();
+  DEBUG(dbgs() << "GLOBAL: " << *G);
+
+  if (!Ty->isSized()) return false;
+  if (!G->hasInitializer()) return false;
+  // Touch only those globals that will not be defined in other modules.
+  // Don't handle ODR type linkages since other modules may be built w/o asan.
+  if (G->getLinkage() != GlobalVariable::ExternalLinkage &&
+      G->getLinkage() != GlobalVariable::PrivateLinkage &&
+      G->getLinkage() != GlobalVariable::InternalLinkage)
+    return false;
+  // Two problems with thread-locals:
+  //   - The address of the main thread's copy can't be computed at link-time.
+  //   - Need to poison all copies, not just the main thread's one.
+  if (G->isThreadLocal())
+    return false;
+  // For now, just ignore this Alloca if the alignment is large.
+  if (G->getAlignment() > RedzoneSize) return false;
+
+  // Ignore all the globals with the names starting with "\01L_OBJC_".
+  // Many of those are put into the .cstring section. The linker compresses
+  // that section by removing the spare \0s after the string terminator, so
+  // our redzones get broken.
+  if ((G->getName().find("\01L_OBJC_") == 0) ||
+      (G->getName().find("\01l_OBJC_") == 0)) {
+    DEBUG(dbgs() << "Ignoring \\01L_OBJC_* global: " << *G);
+    return false;
+  }
+
+  if (G->hasSection()) {
+    StringRef Section(G->getSection());
+    // Ignore the globals from the __OBJC section. The ObjC runtime assumes
+    // those conform to /usr/lib/objc/runtime.h, so we can't add redzones to
+    // them.
+    if ((Section.find("__OBJC,") == 0) ||
+        (Section.find("__DATA, __objc_") == 0)) {
+      DEBUG(dbgs() << "Ignoring ObjC runtime global: " << *G);
+      return false;
+    }
+    // See http://code.google.com/p/address-sanitizer/issues/detail?id=32
+    // Constant CFString instances are compiled in the following way:
+    //  -- the string buffer is emitted into
+    //     __TEXT,__cstring,cstring_literals
+    //  -- the constant NSConstantString structure referencing that buffer
+    //     is placed into __DATA,__cfstring
+    // Therefore there's no point in placing redzones into __DATA,__cfstring.
+    // Moreover, it causes the linker to crash on OS X 10.7
+    if (Section.find("__DATA,__cfstring") == 0) {
+      DEBUG(dbgs() << "Ignoring CFString: " << *G);
+      return false;
     }
+  }
+
+  return true;
+}
 
-    GlobalsToChange.push_back(G);
+// This function replaces all global variables with new variables that have
+// trailing redzones. It also creates a function that poisons
+// redzones and inserts this function into llvm.global_ctors.
+bool AddressSanitizer::insertGlobalRedzones(Module &M) {
+  SmallVector<GlobalVariable *, 16> GlobalsToChange;
+
+  for (Module::GlobalListType::iterator G = M.global_begin(),
+       E = M.global_end(); G != E; ++G) {
+    if (ShouldInstrumentGlobal(G))
+      GlobalsToChange.push_back(G);
   }
 
   size_t n = GlobalsToChange.size();
@@ -534,13 +617,22 @@
   //   size_t size;
   //   size_t size_with_redzone;
   //   const char *name;
+  //   size_t has_dynamic_init;
   // We initialize an array of such structures and pass it to a run-time call.
   StructType *GlobalStructTy = StructType::get(IntptrTy, IntptrTy,
-                                               IntptrTy, IntptrTy, NULL);
-  SmallVector<Constant *, 16> Initializers(n);
+                                               IntptrTy, IntptrTy,
+                                               IntptrTy, NULL);
+  SmallVector<Constant *, 16> Initializers(n), DynamicInit;
 
   IRBuilder<> IRB(CtorInsertBefore);
 
+  if (ClInitializers)
+    FindDynamicInitializers(M);
+
+  // The addresses of the first and last dynamically initialized globals in
+  // this TU.  Used in initialization order checking.
+  Value *FirstDynamic = 0, *LastDynamic = 0;
+
   for (size_t i = 0; i < n; i++) {
     GlobalVariable *G = GlobalsToChange[i];
     PointerType *PtrTy = cast<PointerType>(G->getType());
@@ -549,6 +641,8 @@
     uint64_t RightRedzoneSize = RedzoneSize +
         (RedzoneSize - (SizeInBytes % RedzoneSize));
     Type *RightRedZoneTy = ArrayType::get(IRB.getInt8Ty(), RightRedzoneSize);
+    // Determine whether this global should be poisoned in initialization.
+    bool GlobalHasDynamicInitializer = HasDynamicInitializer(G);
 
     StructType *NewTy = StructType::get(Ty, RightRedZoneTy, NULL);
     Constant *NewInitializer = ConstantStruct::get(
@@ -583,7 +677,16 @@
         ConstantInt::get(IntptrTy, SizeInBytes),
         ConstantInt::get(IntptrTy, SizeInBytes + RightRedzoneSize),
         ConstantExpr::getPointerCast(Name, IntptrTy),
+        ConstantInt::get(IntptrTy, GlobalHasDynamicInitializer),
         NULL);
+
+    // Populate the first and last globals declared in this TU.
+    if (ClInitializers && GlobalHasDynamicInitializer) {
+      LastDynamic = ConstantExpr::getPointerCast(NewGlobal, IntptrTy);
+      if (FirstDynamic == 0)
+        FirstDynamic = LastDynamic;
+    }
+
     DEBUG(dbgs() << "NEW GLOBAL:\n" << *NewGlobal);
   }
 
@@ -592,8 +695,13 @@
       M, ArrayOfGlobalStructTy, false, GlobalVariable::PrivateLinkage,
       ConstantArray::get(ArrayOfGlobalStructTy, Initializers), "");
 
+  // Create calls for poisoning before initializers run and unpoisoning after.
+  if (ClInitializers && FirstDynamic && LastDynamic)
+    createInitializerPoisonCalls(M, FirstDynamic, LastDynamic);
+
   Function *AsanRegisterGlobals = checkInterfaceFunction(M.getOrInsertFunction(
-      kAsanRegisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
+      kAsanRegisterGlobalsName, IRB.getVoidTy(),
+      IntptrTy, IntptrTy, NULL));
   AsanRegisterGlobals->setLinkage(Function::ExternalLinkage);
 
   IRB.CreateCall2(AsanRegisterGlobals,

Added: llvm/trunk/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll?rev=162268&view=auto
==============================================================================
--- llvm/trunk/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll (added)
+++ llvm/trunk/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll Tue Aug 21 03:24:25 2012
@@ -0,0 +1,36 @@
+; RUN: opt < %s -asan -asan-initialization-order -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+ at xxx = global i32 0, align 4
+; Clang will emit the following metadata identifying @xxx as dynamically
+; initialized.
+!0 = metadata !{i32* @xxx}
+!llvm.asan.dynamically_initialized_globals = !{!0}
+
+define i32 @initializer() uwtable {
+entry:
+  ret i32 42
+}
+
+define internal void @__cxx_global_var_init() section ".text.startup" {
+entry:
+  %call = call i32 @initializer()
+  store i32 %call, i32* @xxx, align 4
+  ret void
+}
+
+define internal void @_GLOBAL__I_a() address_safety section ".text.startup" {
+entry:
+  call void @__cxx_global_var_init()
+  ret void
+}
+
+; Clang indicated that @xxx was dynamically initailized.
+; __asan_{before,after}_dynamic_init should be called from _GLOBAL__I_a
+
+; CHECK: define internal void @_GLOBAL__I_a
+; CHECK-NOT: ret
+; CHECK: call void @__asan_before_dynamic_init
+; CHECK: call void @__cxx_global_var_init
+; CHECK: call void @__asan_after_dynamic_init
+; CHECK: ret





More information about the llvm-commits mailing list