[llvm] r368102 - hwasan: Instrument globals.

Peter Collingbourne via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 6 15:07:30 PDT 2019


Author: pcc
Date: Tue Aug  6 15:07:29 2019
New Revision: 368102

URL: http://llvm.org/viewvc/llvm-project?rev=368102&view=rev
Log:
hwasan: Instrument globals.

Globals are instrumented by adding a pointer tag to their symbol values
and emitting metadata into a special section that allows the runtime to tag
their memory when the library is loaded.

Due to order of initialization issues explained in more detail in the comments,
shadow initialization cannot happen during regular global initialization.
Instead, the location of the global section is marked using an ELF note,
and we require libc support for calling a function provided by the HWASAN
runtime when libraries are loaded and unloaded.

Based on ideas discussed with @evgeny777 in D56672.

Differential Revision: https://reviews.llvm.org/D65770

Added:
    llvm/trunk/test/Instrumentation/HWAddressSanitizer/globals.ll
Modified:
    llvm/trunk/include/llvm/BinaryFormat/ELF.h
    llvm/trunk/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp

Modified: llvm/trunk/include/llvm/BinaryFormat/ELF.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/BinaryFormat/ELF.h?rev=368102&r1=368101&r2=368102&view=diff
==============================================================================
--- llvm/trunk/include/llvm/BinaryFormat/ELF.h (original)
+++ llvm/trunk/include/llvm/BinaryFormat/ELF.h Tue Aug  6 15:07:29 2019
@@ -1416,6 +1416,11 @@ enum : unsigned {
   NT_SIGINFO = 0x53494749,
 };
 
+// LLVM-specific notes.
+enum {
+  NT_LLVM_HWASAN_GLOBALS = 3,
+};
+
 // GNU note types
 enum {
   NT_GNU_ABI_TAG = 1,

Modified: llvm/trunk/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp?rev=368102&r1=368101&r2=368102&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp (original)
+++ llvm/trunk/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp Tue Aug  6 15:07:29 2019
@@ -16,6 +16,7 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constant.h"
@@ -52,6 +53,7 @@ using namespace llvm;
 #define DEBUG_TYPE "hwasan"
 
 static const char *const kHwasanModuleCtorName = "hwasan.module_ctor";
+static const char *const kHwasanNoteName = "hwasan.note";
 static const char *const kHwasanInitName = "__hwasan_init";
 
 static const char *const kHwasanShadowMemoryDynamicAddress =
@@ -112,6 +114,9 @@ static cl::opt<bool> ClGenerateTagsWithC
     cl::desc("generate new tags with runtime library calls"), cl::Hidden,
     cl::init(false));
 
+static cl::opt<bool> ClGlobals("hwasan-globals", cl::desc("Instrument globals"),
+                               cl::Hidden, cl::init(false));
+
 static cl::opt<int> ClMatchAllTag(
     "hwasan-match-all-tag",
     cl::desc("don't report bad accesses via pointers with this tag"),
@@ -169,16 +174,16 @@ namespace {
 class HWAddressSanitizer {
 public:
   explicit HWAddressSanitizer(Module &M, bool CompileKernel = false,
-                              bool Recover = false) {
+                              bool Recover = false) : M(M) {
     this->Recover = ClRecover.getNumOccurrences() > 0 ? ClRecover : Recover;
     this->CompileKernel = ClEnableKhwasan.getNumOccurrences() > 0 ?
         ClEnableKhwasan : CompileKernel;
 
-    initializeModule(M);
+    initializeModule();
   }
 
   bool sanitizeFunction(Function &F);
-  void initializeModule(Module &M);
+  void initializeModule();
 
   void initializeCallbacks(Module &M);
 
@@ -216,8 +221,12 @@ public:
   Value *getHwasanThreadSlotPtr(IRBuilder<> &IRB, Type *Ty);
   void emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord);
 
+  void instrumentGlobal(GlobalVariable *GV, uint8_t Tag);
+  void instrumentGlobals();
+
 private:
   LLVMContext *C;
+  Module &M;
   Triple TargetTriple;
   FunctionCallee HWAsanMemmove, HWAsanMemcpy, HWAsanMemset;
   FunctionCallee HWAsanHandleVfork;
@@ -237,7 +246,7 @@ private:
     bool InTls;
 
     void init(Triple &TargetTriple);
-    unsigned getAllocaAlignment() const { return 1U << Scale; }
+    unsigned getObjectAlignment() const { return 1U << Scale; }
   };
   ShadowMapping Mapping;
 
@@ -245,6 +254,7 @@ private:
   Type *Int8PtrTy;
   Type *Int8Ty;
   Type *Int32Ty;
+  Type *Int64Ty = Type::getInt64Ty(M.getContext());
 
   bool CompileKernel;
   bool Recover;
@@ -332,7 +342,7 @@ PreservedAnalyses HWAddressSanitizerPass
 /// Module-level initialization.
 ///
 /// inserts a call to __hwasan_init to the module's constructor list.
-void HWAddressSanitizer::initializeModule(Module &M) {
+void HWAddressSanitizer::initializeModule() {
   LLVM_DEBUG(dbgs() << "Init " << M.getName() << "\n");
   auto &DL = M.getDataLayout();
 
@@ -361,6 +371,16 @@ void HWAddressSanitizer::initializeModul
               Ctor->setComdat(CtorComdat);
               appendToGlobalCtors(M, Ctor, 0, Ctor);
             });
+
+    // Older versions of Android do not have the required runtime support for
+    // global instrumentation. On other platforms we currently require using the
+    // latest version of the runtime.
+    bool InstrumentGlobals =
+        !TargetTriple.isAndroid() || !TargetTriple.isAndroidVersionLT(30);
+    if (ClGlobals.getNumOccurrences())
+      InstrumentGlobals = ClGlobals;
+    if (InstrumentGlobals)
+      instrumentGlobals();
   }
 
   if (!TargetTriple.isAndroid()) {
@@ -716,7 +736,7 @@ static uint64_t getAllocaSizeInBytes(con
 
 bool HWAddressSanitizer::tagAlloca(IRBuilder<> &IRB, AllocaInst *AI,
                                    Value *Tag, size_t Size) {
-  size_t AlignedSize = alignTo(Size, Mapping.getAllocaAlignment());
+  size_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment());
 
   Value *JustTag = IRB.CreateTrunc(Tag, IRB.getInt8Ty());
   if (ClInstrumentWithCalls) {
@@ -736,7 +756,7 @@ bool HWAddressSanitizer::tagAlloca(IRBui
       IRB.CreateMemSet(ShadowPtr, JustTag, ShadowSize, /*Align=*/1);
     if (Size != AlignedSize) {
       IRB.CreateStore(
-          ConstantInt::get(Int8Ty, Size % Mapping.getAllocaAlignment()),
+          ConstantInt::get(Int8Ty, Size % Mapping.getObjectAlignment()),
           IRB.CreateConstGEP1_32(Int8Ty, ShadowPtr, ShadowSize));
       IRB.CreateStore(JustTag, IRB.CreateConstGEP1_32(
                                    Int8Ty, IRB.CreateBitCast(AI, Int8PtrTy),
@@ -1018,7 +1038,7 @@ bool HWAddressSanitizer::instrumentStack
 
       // Re-tag alloca memory with the special UAR tag.
       Value *Tag = getUARTag(IRB, StackTag);
-      tagAlloca(IRB, AI, Tag, alignTo(Size, Mapping.getAllocaAlignment()));
+      tagAlloca(IRB, AI, Tag, alignTo(Size, Mapping.getObjectAlignment()));
     }
   }
 
@@ -1116,8 +1136,9 @@ bool HWAddressSanitizer::sanitizeFunctio
   DenseMap<AllocaInst *, AllocaInst *> AllocaToPaddedAllocaMap;
   for (AllocaInst *AI : AllocasToInstrument) {
     uint64_t Size = getAllocaSizeInBytes(*AI);
-    uint64_t AlignedSize = alignTo(Size, Mapping.getAllocaAlignment());
-    AI->setAlignment(std::max(AI->getAlignment(), 16u));
+    uint64_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment());
+    AI->setAlignment(
+        std::max(AI->getAlignment(), Mapping.getObjectAlignment()));
     if (Size != AlignedSize) {
       Type *AllocatedType = AI->getAllocatedType();
       if (AI->isArrayAllocation()) {
@@ -1177,6 +1198,194 @@ bool HWAddressSanitizer::sanitizeFunctio
   return Changed;
 }
 
+void HWAddressSanitizer::instrumentGlobal(GlobalVariable *GV, uint8_t Tag) {
+  Constant *Initializer = GV->getInitializer();
+  uint64_t SizeInBytes =
+      M.getDataLayout().getTypeAllocSize(Initializer->getType());
+  uint64_t NewSize = alignTo(SizeInBytes, Mapping.getObjectAlignment());
+  if (SizeInBytes != NewSize) {
+    // Pad the initializer out to the next multiple of 16 bytes and add the
+    // required short granule tag.
+    std::vector<uint8_t> Init(NewSize - SizeInBytes, 0);
+    Init.back() = Tag;
+    Constant *Padding = ConstantDataArray::get(*C, Init);
+    Initializer = ConstantStruct::getAnon({Initializer, Padding});
+  }
+
+  auto *NewGV = new GlobalVariable(M, Initializer->getType(), GV->isConstant(),
+                                   GlobalValue::ExternalLinkage, Initializer,
+                                   GV->getName() + ".hwasan");
+  NewGV->copyAttributesFrom(GV);
+  NewGV->setLinkage(GlobalValue::PrivateLinkage);
+  NewGV->copyMetadata(GV, 0);
+  NewGV->setAlignment(
+      std::max(GV->getAlignment(), Mapping.getObjectAlignment()));
+
+  // It is invalid to ICF two globals that have different tags. In the case
+  // where the size of the global is a multiple of the tag granularity the
+  // contents of the globals may be the same but the tags (i.e. symbol values)
+  // may be different, and the symbols are not considered during ICF. In the
+  // case where the size is not a multiple of the granularity, the short granule
+  // tags would discriminate two globals with different tags, but there would
+  // otherwise be nothing stopping such a global from being incorrectly ICF'd
+  // with an uninstrumented (i.e. tag 0) global that happened to have the short
+  // granule tag in the last byte.
+  NewGV->setUnnamedAddr(GlobalValue::UnnamedAddr::None);
+
+  // Descriptor format (assuming little-endian):
+  // bytes 0-3: relative address of global
+  // bytes 4-6: size of global (16MB ought to be enough for anyone, but in case
+  // it isn't, we create multiple descriptors)
+  // byte 7: tag
+  auto *DescriptorTy = StructType::get(Int32Ty, Int32Ty);
+  const uint64_t MaxDescriptorSize = 0xfffff0;
+  for (uint64_t DescriptorPos = 0; DescriptorPos < SizeInBytes;
+       DescriptorPos += MaxDescriptorSize) {
+    auto *Descriptor =
+        new GlobalVariable(M, DescriptorTy, true, GlobalValue::PrivateLinkage,
+                           nullptr, GV->getName() + ".hwasan.descriptor");
+    auto *GVRelPtr = ConstantExpr::getTrunc(
+        ConstantExpr::getAdd(
+            ConstantExpr::getSub(
+                ConstantExpr::getPtrToInt(NewGV, Int64Ty),
+                ConstantExpr::getPtrToInt(Descriptor, Int64Ty)),
+            ConstantInt::get(Int64Ty, DescriptorPos)),
+        Int32Ty);
+    uint32_t Size = std::min(SizeInBytes - DescriptorPos, MaxDescriptorSize);
+    auto *SizeAndTag = ConstantInt::get(Int32Ty, Size | (uint32_t(Tag) << 24));
+    Descriptor->setComdat(NewGV->getComdat());
+    Descriptor->setInitializer(ConstantStruct::getAnon({GVRelPtr, SizeAndTag}));
+    Descriptor->setSection("hwasan_globals");
+    Descriptor->setMetadata(LLVMContext::MD_associated,
+                            MDNode::get(*C, ValueAsMetadata::get(NewGV)));
+    appendToCompilerUsed(M, Descriptor);
+  }
+
+  Constant *Aliasee = ConstantExpr::getIntToPtr(
+      ConstantExpr::getAdd(
+          ConstantExpr::getPtrToInt(NewGV, Int64Ty),
+          ConstantInt::get(Int64Ty, uint64_t(Tag) << kPointerTagShift)),
+      GV->getType());
+  auto *Alias = GlobalAlias::create(GV->getValueType(), GV->getAddressSpace(),
+                                    GV->getLinkage(), "", Aliasee, &M);
+  Alias->setVisibility(GV->getVisibility());
+  Alias->takeName(GV);
+  GV->replaceAllUsesWith(Alias);
+  GV->eraseFromParent();
+}
+
+void HWAddressSanitizer::instrumentGlobals() {
+  // Start by creating a note that contains pointers to the list of global
+  // descriptors. Adding a note to the output file will cause the linker to
+  // create a PT_NOTE program header pointing to the note that we can use to
+  // find the descriptor list starting from the program headers. A function
+  // provided by the runtime initializes the shadow memory for the globals by
+  // accessing the descriptor list via the note. The dynamic loader needs to
+  // call this function whenever a library is loaded.
+  //
+  // The reason why we use a note for this instead of a more conventional
+  // approach of having a global constructor pass a descriptor list pointer to
+  // the runtime is because of an order of initialization problem. With
+  // constructors we can encounter the following problematic scenario:
+  //
+  // 1) library A depends on library B and also interposes one of B's symbols
+  // 2) B's constructors are called before A's (as required for correctness)
+  // 3) during construction, B accesses one of its "own" globals (actually
+  //    interposed by A) and triggers a HWASAN failure due to the initialization
+  //    for A not having happened yet
+  //
+  // Even without interposition it is possible to run into similar situations in
+  // cases where two libraries mutually depend on each other.
+  //
+  // We only need one note per binary, so put everything for the note in a
+  // comdat.
+  Comdat *NoteComdat = M.getOrInsertComdat(kHwasanNoteName);
+
+  Type *Int8Arr0Ty = ArrayType::get(Int8Ty, 0);
+  auto Start =
+      new GlobalVariable(M, Int8Arr0Ty, true, GlobalVariable::ExternalLinkage,
+                         nullptr, "__start_hwasan_globals");
+  Start->setVisibility(GlobalValue::HiddenVisibility);
+  Start->setDSOLocal(true);
+  auto Stop =
+      new GlobalVariable(M, Int8Arr0Ty, true, GlobalVariable::ExternalLinkage,
+                         nullptr, "__stop_hwasan_globals");
+  Stop->setVisibility(GlobalValue::HiddenVisibility);
+  Stop->setDSOLocal(true);
+
+  // Null-terminated so actually 8 bytes, which are required in order to align
+  // the note properly.
+  auto *Name = ConstantDataArray::get(*C, "LLVM\0\0\0");
+
+  auto *NoteTy = StructType::get(Int32Ty, Int32Ty, Int32Ty, Name->getType(),
+                                 Int32Ty, Int32Ty);
+  auto *Note =
+      new GlobalVariable(M, NoteTy, /*isConstantGlobal=*/true,
+                         GlobalValue::PrivateLinkage, nullptr, kHwasanNoteName);
+  Note->setSection(".note.hwasan.globals");
+  Note->setComdat(NoteComdat);
+  Note->setAlignment(4);
+  Note->setDSOLocal(true);
+
+  // The pointers in the note need to be relative so that the note ends up being
+  // placed in rodata, which is the standard location for notes.
+  auto CreateRelPtr = [&](Constant *Ptr) {
+    return ConstantExpr::getTrunc(
+        ConstantExpr::getSub(ConstantExpr::getPtrToInt(Ptr, Int64Ty),
+                             ConstantExpr::getPtrToInt(Note, Int64Ty)),
+        Int32Ty);
+  };
+  Note->setInitializer(ConstantStruct::getAnon(
+      {ConstantInt::get(Int32Ty, 8),                           // n_namesz
+       ConstantInt::get(Int32Ty, 8),                           // n_descsz
+       ConstantInt::get(Int32Ty, ELF::NT_LLVM_HWASAN_GLOBALS), // n_type
+       Name, CreateRelPtr(Start), CreateRelPtr(Stop)}));
+  appendToCompilerUsed(M, Note);
+
+  // Create a zero-length global in hwasan_globals so that the linker will
+  // always create start and stop symbols.
+  auto Dummy = new GlobalVariable(
+      M, Int8Arr0Ty, /*isConstantGlobal*/ true, GlobalVariable::PrivateLinkage,
+      Constant::getNullValue(Int8Arr0Ty), "hwasan.dummy.global");
+  Dummy->setSection("hwasan_globals");
+  Dummy->setComdat(NoteComdat);
+  Dummy->setMetadata(LLVMContext::MD_associated,
+                     MDNode::get(*C, ValueAsMetadata::get(Note)));
+  appendToCompilerUsed(M, Dummy);
+
+  std::vector<GlobalVariable *> Globals;
+  for (GlobalVariable &GV : M.globals()) {
+    if (GV.isDeclarationForLinker() || GV.getName().startswith("llvm.") ||
+        GV.isThreadLocal())
+      continue;
+
+    // Common symbols can't have aliases point to them, so they can't be tagged.
+    if (GV.hasCommonLinkage())
+      continue;
+
+    // Globals with custom sections may be used in __start_/__stop_ enumeration,
+    // which would be broken both by adding tags and potentially by the extra
+    // padding/alignment that we insert.
+    if (GV.hasSection())
+      continue;
+
+    Globals.push_back(&GV);
+  }
+
+  MD5 Hasher;
+  Hasher.update(M.getSourceFileName());
+  MD5::MD5Result Hash;
+  Hasher.final(Hash);
+  uint8_t Tag = Hash[0];
+
+  for (GlobalVariable *GV : Globals) {
+    // Skip tag 0 in order to avoid collisions with untagged memory.
+    if (Tag == 0)
+      Tag = 1;
+    instrumentGlobal(GV, Tag++);
+  }
+}
+
 void HWAddressSanitizer::ShadowMapping::init(Triple &TargetTriple) {
   Scale = kDefaultShadowScale;
   if (ClMappingOffset.getNumOccurrences() > 0) {

Added: llvm/trunk/test/Instrumentation/HWAddressSanitizer/globals.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Instrumentation/HWAddressSanitizer/globals.ll?rev=368102&view=auto
==============================================================================
--- llvm/trunk/test/Instrumentation/HWAddressSanitizer/globals.ll (added)
+++ llvm/trunk/test/Instrumentation/HWAddressSanitizer/globals.ll Tue Aug  6 15:07:29 2019
@@ -0,0 +1,37 @@
+; RUN: opt < %s -S -hwasan -mtriple=aarch64--linux-android29 | FileCheck --check-prefix=CHECK29 %s
+; RUN: opt < %s -S -hwasan -mtriple=aarch64--linux-android30 | FileCheck --check-prefix=CHECK30 %s
+
+; CHECK29-NOT: @hwasan.note
+; CHECK29: @four = global
+
+; CHECK30: @__start_hwasan_globals = external hidden constant [0 x i8]
+; CHECK30: @__stop_hwasan_globals = external hidden constant [0 x i8]
+
+; CHECK30: @hwasan.note = private constant { i32, i32, i32, [8 x i8], i32, i32 } { i32 8, i32 8, i32 3, [8 x i8] c"LLVM\00\00\00\00", i32 trunc (i64 sub (i64 ptrtoint ([0 x i8]* @__start_hwasan_globals to i64), i64 ptrtoint ({ i32, i32, i32, [8 x i8], i32, i32 }* @hwasan.note to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint ([0 x i8]* @__stop_hwasan_globals to i64), i64 ptrtoint ({ i32, i32, i32, [8 x i8], i32, i32 }* @hwasan.note to i64)) to i32) }, section ".note.hwasan.globals", comdat, align 4
+
+; CHECK30: @hwasan.dummy.global = private constant [0 x i8] zeroinitializer, section "hwasan_globals", comdat($hwasan.note), !associated [[NOTE:![0-9]+]]
+
+; CHECK30: @four.hwasan = private global { i32, [12 x i8] } { i32 1, [12 x i8] c"\00\00\00\00\00\00\00\00\00\00\00\AC" }, align 16
+; CHECK30: @four.hwasan.descriptor = private constant { i32, i32 } { i32 trunc (i64 sub (i64 ptrtoint ({ i32, [12 x i8] }* @four.hwasan to i64), i64 ptrtoint ({ i32, i32 }* @four.hwasan.descriptor to i64)) to i32), i32 -1409286140 }, section "hwasan_globals", !associated [[FOUR:![0-9]+]]
+
+; CHECK30: @sixteen.hwasan = private global [16 x i8] zeroinitializer, align 16
+; CHECK30: @sixteen.hwasan.descriptor = private constant { i32, i32 } { i32 trunc (i64 sub (i64 ptrtoint ([16 x i8]* @sixteen.hwasan to i64), i64 ptrtoint ({ i32, i32 }* @sixteen.hwasan.descriptor to i64)) to i32), i32 -1392508912 }, section "hwasan_globals", !associated [[SIXTEEN:![0-9]+]]
+
+; CHECK30: @huge.hwasan = private global [16777232 x i8] zeroinitializer, align 16
+; CHECK30: @huge.hwasan.descriptor = private constant { i32, i32 } { i32 trunc (i64 sub (i64 ptrtoint ([16777232 x i8]* @huge.hwasan to i64), i64 ptrtoint ({ i32, i32 }* @huge.hwasan.descriptor to i64)) to i32), i32 -1358954512 }, section "hwasan_globals", !associated [[HUGE:![0-9]+]]
+; CHECK30: @huge.hwasan.descriptor.1 = private constant { i32, i32 } { i32 trunc (i64 add (i64 sub (i64 ptrtoint ([16777232 x i8]* @huge.hwasan to i64), i64 ptrtoint ({ i32, i32 }* @huge.hwasan.descriptor.1 to i64)), i64 16777200) to i32), i32 -1375731680 }, section "hwasan_globals", !associated [[HUGE]]
+
+; CHECK30: @four = alias i32, inttoptr (i64 add (i64 ptrtoint ({ i32, [12 x i8] }* @four.hwasan to i64), i64 -6052837899185946624) to i32*)
+; CHECK30: @sixteen = alias [16 x i8], inttoptr (i64 add (i64 ptrtoint ([16 x i8]* @sixteen.hwasan to i64), i64 -5980780305148018688) to [16 x i8]*)
+; CHECK30: @huge = alias [16777232 x i8], inttoptr (i64 add (i64 ptrtoint ([16777232 x i8]* @huge.hwasan to i64), i64 -5908722711110090752) to [16777232 x i8]*)
+
+; CHECK30: [[NOTE]] = !{{{{}} i32, i32, i32, [8 x i8], i32, i32 }* @hwasan.note}
+; CHECK30: [[FOUR]] = !{{{{}} i32, [12 x i8] }* @four.hwasan}
+; CHECK30: [[SIXTEEN]] = !{[16 x i8]* @sixteen.hwasan}
+; CHECK30: [[HUGE]] = !{[16777232 x i8]* @huge.hwasan}
+
+source_filename = "foo"
+
+ at four = global i32 1
+ at sixteen = global [16 x i8] zeroinitializer
+ at huge = global [16777232 x i8] zeroinitializer




More information about the llvm-commits mailing list