[llvm] r212188 - [ASan] Print exact source location of global variables in error reports.

Alexey Samsonov vonosmas at gmail.com
Wed Jul 2 09:54:42 PDT 2014


Author: samsonov
Date: Wed Jul  2 11:54:41 2014
New Revision: 212188

URL: http://llvm.org/viewvc/llvm-project?rev=212188&view=rev
Log:
[ASan] Print exact source location of global variables in error reports.

See https://code.google.com/p/address-sanitizer/issues/detail?id=299 for the
original feature request.

Introduce llvm.asan.globals metadata, which Clang (or any other frontend)
may use to report extra information about global variables to ASan
instrumentation pass in the backend. This metadata replaces
llvm.asan.dynamically_initialized_globals that was used to detect init-order
bugs. llvm.asan.globals contains the following data for each global:
  1) source location (file/line/column info);
  2) whether it is dynamically initialized;
  3) whether it is blacklisted (shouldn't be instrumented).

Source location data is then emitted in the binary and can be picked up
by ASan runtime in case it needs to print error report involving some global.
For example:

  0x... is located 4 bytes to the right of global variable 'C::array' defined in '/path/to/file:17:8' (0x...) of size 40

These source locations are printed even if the binary doesn't have any
debug info.

This is an ABI-breaking change. ASan initialization is renamed to
__asan_init_v4(). Pre-built libraries compiled with older Clang will not work
with the fresh runtime.

Added:
    llvm/trunk/test/Instrumentation/AddressSanitizer/global_metadata.ll
Modified:
    llvm/trunk/lib/Transforms/Instrumentation/AddressSanitizer.cpp
    llvm/trunk/test/Instrumentation/AddressSanitizer/instrument_global.ll
    llvm/trunk/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll

Modified: llvm/trunk/lib/Transforms/Instrumentation/AddressSanitizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Instrumentation/AddressSanitizer.cpp?rev=212188&r1=212187&r2=212188&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Instrumentation/AddressSanitizer.cpp (original)
+++ llvm/trunk/lib/Transforms/Instrumentation/AddressSanitizer.cpp Wed Jul  2 11:54:41 2014
@@ -16,6 +16,7 @@
 #include "llvm/Transforms/Instrumentation.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallString.h"
@@ -79,7 +80,7 @@ static const char *const kAsanUnregister
     "__asan_unregister_globals";
 static const char *const kAsanPoisonGlobalsName = "__asan_before_dynamic_init";
 static const char *const kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init";
-static const char *const kAsanInitName = "__asan_init_v3";
+static const char *const kAsanInitName = "__asan_init_v4";
 static const char *const kAsanCovModuleInitName = "__sanitizer_cov_module_init";
 static const char *const kAsanCovName = "__sanitizer_cov";
 static const char *const kAsanPtrCmp = "__sanitizer_ptr_cmp";
@@ -215,28 +216,86 @@ STATISTIC(NumOptimizedAccessesToGlobalVa
           "Number of optimized accesses to global vars");
 
 namespace {
-/// A set of dynamically initialized globals extracted from metadata.
-class SetOfDynamicallyInitializedGlobals {
+/// Frontend-provided metadata for global variables.
+class GlobalsMetadata {
  public:
-  void Init(Module& M) {
-    // Clang generates metadata identifying all dynamically initialized globals.
-    NamedMDNode *DynamicGlobals =
-        M.getNamedMetadata("llvm.asan.dynamically_initialized_globals");
-    if (!DynamicGlobals)
+  void init(Module& M) {
+    assert(!inited_);
+    inited_ = true;
+    NamedMDNode *Globals = M.getNamedMetadata("llvm.asan.globals");
+    if (!Globals)
       return;
-    for (const auto MDN : DynamicGlobals->operands()) {
-      assert(MDN->getNumOperands() == 1);
-      Value *VG = MDN->getOperand(0);
-      // The optimizer may optimize away a global entirely, in which case we
-      // cannot instrument access to it.
-      if (!VG)
+    for (auto MDN : Globals->operands()) {
+      // Format of the metadata node for the global:
+      // {
+      //   global,
+      //   source_location,
+      //   i1 is_dynamically_initialized,
+      //   i1 is_blacklisted
+      // }
+      assert(MDN->getNumOperands() == 4);
+      Value *V = MDN->getOperand(0);
+      // The optimizer may optimize away a global entirely.
+      if (!V)
         continue;
-      DynInitGlobals.insert(cast<GlobalVariable>(VG));
+      GlobalVariable *GV = cast<GlobalVariable>(V);
+      if (Value *Loc = MDN->getOperand(1)) {
+        GlobalVariable *GVLoc = cast<GlobalVariable>(Loc);
+        // We may already know the source location for GV, if it was merged
+        // with another global.
+        if (SourceLocation.insert(std::make_pair(GV, GVLoc)).second)
+          addSourceLocationGlobal(GVLoc);
+      }
+      ConstantInt *IsDynInit = cast<ConstantInt>(MDN->getOperand(2));
+      if (IsDynInit->isOne())
+        DynInitGlobals.insert(GV);
+      ConstantInt *IsBlacklisted = cast<ConstantInt>(MDN->getOperand(3));
+      if (IsBlacklisted->isOne())
+        BlacklistedGlobals.insert(GV);
     }
   }
-  bool Contains(GlobalVariable *G) { return DynInitGlobals.count(G) != 0; }
+
+  GlobalVariable *getSourceLocation(GlobalVariable *G) const {
+    auto Pos = SourceLocation.find(G);
+    return (Pos != SourceLocation.end()) ? Pos->second : nullptr;
+  }
+
+  /// Check if the global is dynamically initialized.
+  bool isDynInit(GlobalVariable *G) const {
+    return DynInitGlobals.count(G);
+  }
+
+  /// Check if the global was blacklisted.
+  bool isBlacklisted(GlobalVariable *G) const {
+    return BlacklistedGlobals.count(G);
+  }
+
+  /// Check if the global was generated to describe source location of another
+  /// global (we don't want to instrument them).
+  bool isSourceLocationGlobal(GlobalVariable *G) const {
+    return LocationGlobals.count(G);
+  }
+
  private:
-  SmallSet<GlobalValue*, 32> DynInitGlobals;
+  bool inited_ = false;
+  DenseMap<GlobalVariable*, GlobalVariable*> SourceLocation;
+  DenseSet<GlobalVariable*> DynInitGlobals;
+  DenseSet<GlobalVariable*> BlacklistedGlobals;
+  DenseSet<GlobalVariable*> LocationGlobals;
+
+  void addSourceLocationGlobal(GlobalVariable *SourceLocGV) {
+    // Source location global is a struct with layout:
+    // {
+    //    filename,
+    //    i32 line_number,
+    //    i32 column_number,
+    // }
+    LocationGlobals.insert(SourceLocGV);
+    ConstantStruct *Contents =
+        cast<ConstantStruct>(SourceLocGV->getInitializer());
+    GlobalVariable *FilenameGV = cast<GlobalVariable>(Contents->getOperand(0));
+    LocationGlobals.insert(FilenameGV);
+  }
 };
 
 /// This struct defines the shadow mapping using the rule:
@@ -351,7 +410,7 @@ struct AddressSanitizer : public Functio
            *AsanMemoryAccessCallbackSized[2];
   Function *AsanMemmove, *AsanMemcpy, *AsanMemset;
   InlineAsm *EmptyAsm;
-  SetOfDynamicallyInitializedGlobals DynamicallyInitializedGlobals;
+  GlobalsMetadata GlobalsMD;
 
   friend struct FunctionStackPoisoner;
 };
@@ -381,7 +440,7 @@ class AddressSanitizerModule : public Mo
   SmallString<64> BlacklistFile;
 
   std::unique_ptr<SpecialCaseList> BL;
-  SetOfDynamicallyInitializedGlobals DynamicallyInitializedGlobals;
+  GlobalsMetadata GlobalsMD;
   Type *IntptrTy;
   LLVMContext *C;
   const DataLayout *DL;
@@ -659,7 +718,7 @@ bool AddressSanitizer::GlobalIsLinkerIni
   // If a global variable does not have dynamic initialization we don't
   // have to instrument it.  However, if a global does not have initializer
   // at all, we assume it has dynamic initializer (in other TU).
-  return G->hasInitializer() && !DynamicallyInitializedGlobals.Contains(G);
+  return G->hasInitializer() && !GlobalsMD.isDynInit(G);
 }
 
 void
@@ -866,7 +925,11 @@ bool AddressSanitizerModule::ShouldInstr
   Type *Ty = cast<PointerType>(G->getType())->getElementType();
   DEBUG(dbgs() << "GLOBAL: " << *G << "\n");
 
+  // FIXME: Don't use the blacklist here, all the data should be collected
+  // by the frontend and passed in globals metadata.
   if (BL->isIn(*G)) return false;
+  if (GlobalsMD.isBlacklisted(G)) return false;
+  if (GlobalsMD.isSourceLocationGlobal(G)) return false;
   if (!Ty->isSized()) return false;
   if (!G->hasInitializer()) return false;
   if (GlobalWasGeneratedByAsan(G)) return false;  // Our own global.
@@ -967,7 +1030,7 @@ void AddressSanitizerModule::initializeC
 // trailing redzones. It also creates a function that poisons
 // redzones and inserts this function into llvm.global_ctors.
 bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
-  DynamicallyInitializedGlobals.Init(M);
+  GlobalsMD.init(M);
 
   SmallVector<GlobalVariable *, 16> GlobalsToChange;
 
@@ -986,10 +1049,11 @@ bool AddressSanitizerModule::InstrumentG
   //   const char *name;
   //   const char *module_name;
   //   size_t has_dynamic_init;
+  //   void *source_location;
   // We initialize an array of such structures and pass it to a run-time call.
-  StructType *GlobalStructTy = StructType::get(IntptrTy, IntptrTy,
-                                               IntptrTy, IntptrTy,
-                                               IntptrTy, IntptrTy, NULL);
+  StructType *GlobalStructTy =
+      StructType::get(IntptrTy, IntptrTy, IntptrTy, IntptrTy, IntptrTy,
+                      IntptrTy, IntptrTy, NULL);
   SmallVector<Constant *, 16> Initializers(n);
 
   bool HasDynamicallyInitializedGlobals = false;
@@ -1017,9 +1081,6 @@ bool AddressSanitizerModule::InstrumentG
       RightRedzoneSize += MinRZ - (SizeInBytes % MinRZ);
     assert(((RightRedzoneSize + SizeInBytes) % MinRZ) == 0);
     Type *RightRedZoneTy = ArrayType::get(IRB.getInt8Ty(), RightRedzoneSize);
-    // Determine whether this global should be poisoned in initialization.
-    bool GlobalHasDynamicInitializer =
-        DynamicallyInitializedGlobals.Contains(G);
 
     StructType *NewTy = StructType::get(Ty, RightRedZoneTy, NULL);
     Constant *NewInitializer = ConstantStruct::get(
@@ -1048,17 +1109,20 @@ bool AddressSanitizerModule::InstrumentG
     NewGlobal->takeName(G);
     G->eraseFromParent();
 
+    bool GlobalHasDynamicInitializer = GlobalsMD.isDynInit(G);
+    GlobalVariable *SourceLoc = GlobalsMD.getSourceLocation(G);
+
     Initializers[i] = ConstantStruct::get(
-        GlobalStructTy,
-        ConstantExpr::getPointerCast(NewGlobal, IntptrTy),
+        GlobalStructTy, ConstantExpr::getPointerCast(NewGlobal, IntptrTy),
         ConstantInt::get(IntptrTy, SizeInBytes),
         ConstantInt::get(IntptrTy, SizeInBytes + RightRedzoneSize),
         ConstantExpr::getPointerCast(Name, IntptrTy),
         ConstantExpr::getPointerCast(ModuleName, IntptrTy),
         ConstantInt::get(IntptrTy, GlobalHasDynamicInitializer),
+        SourceLoc ? ConstantExpr::getPointerCast(SourceLoc, IntptrTy)
+                  : ConstantInt::get(IntptrTy, 0),
         NULL);
 
-    // Populate the first and last globals declared in this TU.
     if (ClInitializers && GlobalHasDynamicInitializer)
       HasDynamicallyInitializedGlobals = true;
 
@@ -1186,7 +1250,7 @@ bool AddressSanitizer::doInitialization(
     report_fatal_error("data layout missing");
   DL = &DLP->getDataLayout();
 
-  DynamicallyInitializedGlobals.Init(M);
+  GlobalsMD.init(M);
 
   C = &(M.getContext());
   LongSize = DL->getPointerSizeInBits();

Added: llvm/trunk/test/Instrumentation/AddressSanitizer/global_metadata.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Instrumentation/AddressSanitizer/global_metadata.ll?rev=212188&view=auto
==============================================================================
--- llvm/trunk/test/Instrumentation/AddressSanitizer/global_metadata.ll (added)
+++ llvm/trunk/test/Instrumentation/AddressSanitizer/global_metadata.ll Wed Jul  2 11:54:41 2014
@@ -0,0 +1,63 @@
+; RUN: opt < %s -asan -asan-module -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Globals:
+ at global = global i32 0, align 4
+ at dyn_init_global = global i32 0, align 4
+ at blacklisted_global = global i32 0, align 4
+ at _ZZ4funcvE10static_var = internal global i32 0, align 4
+ at .str = private unnamed_addr constant [14 x i8] c"Hello, world!\00", align 1
+ at llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @_GLOBAL__sub_I_asan_globals.cpp, i8* null }]
+
+; Sanitizer location descriptors:
+ at .str1 = private unnamed_addr constant [22 x i8] c"/tmp/asan-globals.cpp\00", align 1
+ at .asan_loc_descr = private unnamed_addr constant { [22 x i8]*, i32, i32 } { [22 x i8]* @.str1, i32 5, i32 5 }
+ at .asan_loc_descr1 = private unnamed_addr constant { [22 x i8]*, i32, i32 } { [22 x i8]* @.str1, i32 7, i32 5 }
+ at .asan_loc_descr2 = private unnamed_addr constant { [22 x i8]*, i32, i32 } { [22 x i8]* @.str1, i32 12, i32 14 }
+ at .asan_loc_descr4 = private unnamed_addr constant { [22 x i8]*, i32, i32 } { [22 x i8]* @.str1, i32 14, i32 25 }
+
+; Check that globals were instrumented, but sanitizer location descriptors weren't:
+; CHECK: @global = global { i32, [60 x i8] } zeroinitializer, align 32
+; CHECK: @.str = internal unnamed_addr constant { [14 x i8], [50 x i8] } { [14 x i8] c"Hello, world!\00", [50 x i8] zeroinitializer }, align 32
+; CHECK: @.asan_loc_descr = private unnamed_addr constant { [22 x i8]*, i32, i32 } { [22 x i8]* @.str1, i32 5, i32 5 }
+
+; Check that location decriptors were passed into __asan_register_globals:
+; CHECK: i64 ptrtoint ({ [22 x i8]*, i32, i32 }* @.asan_loc_descr to i64)
+
+; Function Attrs: nounwind sanitize_address
+define internal void @__cxx_global_var_init() #0 section ".text.startup" {
+entry:
+  %0 = load i32* @global, align 4
+  store i32 %0, i32* @dyn_init_global, align 4
+  ret void
+}
+
+; Function Attrs: nounwind sanitize_address
+define void @_Z4funcv() #1 {
+entry:
+  %literal = alloca i8*, align 8
+  store i8* getelementptr inbounds ([14 x i8]* @.str, i32 0, i32 0), i8** %literal, align 8
+  ret void
+}
+
+; Function Attrs: nounwind sanitize_address
+define internal void @_GLOBAL__sub_I_asan_globals.cpp() #0 section ".text.startup" {
+entry:
+  call void @__cxx_global_var_init()
+  ret void
+}
+
+attributes #0 = { nounwind sanitize_address }
+attributes #1 = { nounwind sanitize_address "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.asan.globals = !{!0, !1, !2, !3, !4}
+!llvm.ident = !{!5}
+
+!0 = metadata !{i32* @global, { [22 x i8]*, i32, i32 }* @.asan_loc_descr, i1 false, i1 false}
+!1 = metadata !{i32* @dyn_init_global, { [22 x i8]*, i32, i32 }* @.asan_loc_descr1, i1 true, i1 false}
+!2 = metadata !{i32* @blacklisted_global, null, i1 false, i1 true}
+!3 = metadata !{i32* @_ZZ4funcvE10static_var, { [22 x i8]*, i32, i32 }* @.asan_loc_descr2, i1 false, i1 false}
+!4 = metadata !{[14 x i8]* @.str, { [22 x i8]*, i32, i32 }* @.asan_loc_descr4, i1 false, i1 false}
+!5 = metadata !{metadata !"clang version 3.5.0 (211282)"}

Modified: llvm/trunk/test/Instrumentation/AddressSanitizer/instrument_global.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Instrumentation/AddressSanitizer/instrument_global.ll?rev=212188&r1=212187&r2=212188&view=diff
==============================================================================
--- llvm/trunk/test/Instrumentation/AddressSanitizer/instrument_global.ll (original)
+++ llvm/trunk/test/Instrumentation/AddressSanitizer/instrument_global.ll Wed Jul  2 11:54:41 2014
@@ -68,8 +68,8 @@ entry:
 }
 
 
-!llvm.asan.dynamically_initialized_globals = !{!0}
-!0 = metadata !{[10 x i32]* @GlobDy}
+!llvm.asan.globals = !{!0}
+!0 = metadata !{[10 x i32]* @GlobDy, null, i1 true, i1 false}
 
 ; CHECK-LABEL: define internal void @asan.module_ctor
 ; CHECK-NOT: ret

Modified: llvm/trunk/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll?rev=212188&r1=212187&r2=212188&view=diff
==============================================================================
--- llvm/trunk/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll (original)
+++ llvm/trunk/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll Wed Jul  2 11:54:41 2014
@@ -7,9 +7,11 @@ target triple = "x86_64-unknown-linux-gn
 @YYY = global i32 0, align 4           ; W/o dynamic initializer.
 ; Clang will emit the following metadata identifying @xxx as dynamically
 ; initialized.
-!0 = metadata !{i32* @xxx}
-!1 = metadata !{i32* @XXX}
-!llvm.asan.dynamically_initialized_globals = !{!0, !1}
+!0 = metadata !{i32* @xxx, null, i1 true, i1 false}
+!1 = metadata !{i32* @XXX, null, i1 true, i1 false}
+!2 = metadata !{i32* @yyy, null, i1 false, i1 false}
+!3 = metadata !{i32* @YYY, null, i1 false, i1 false}
+!llvm.asan.globals = !{!0, !1, !2, !3}
 
 define i32 @initializer() uwtable {
 entry:





More information about the llvm-commits mailing list