On Mon, Jul 16, 2012 at 9:15 AM, Kostya Serebryany <span dir="ltr"><<a href="mailto:kcc@google.com" target="_blank" class="cremed">kcc@google.com</a>></span> wrote:<br><div class="gmail_extra"><div class="gmail_quote">
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Author: kcc<br>
Date: Mon Jul 16 11:15:40 2012<br>
New Revision: 160284<br>
<br>
URL: <a href="http://llvm.org/viewvc/llvm-project?rev=160284&view=rev" target="_blank" class="cremed">http://llvm.org/viewvc/llvm-project?rev=160284&view=rev</a><br>
Log:<br>
[asan] refactor instrumentation to allow merging the crash callbacks (not fully implemented yet, no functionality change except the BB order)<br></blockquote><div><br></div><div>Uh... BB order is incredibly sensitive and significant, as we have learned... =/ I'm beginning to wish we had kept my patch in now and built upon it, as I think its simplifications would have been nice here.</div>
<div><br></div><div>Should I go ahead and provide a patch that introduces probabilities so at least performance remains stable as the basic block order shifts?</div><div><br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

<br>
Modified:<br>
    llvm/trunk/lib/Transforms/Instrumentation/AddressSanitizer.cpp<br>
    llvm/trunk/test/Instrumentation/AddressSanitizer/basic.ll<br>
<br>
Modified: llvm/trunk/lib/Transforms/Instrumentation/AddressSanitizer.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Instrumentation/AddressSanitizer.cpp?rev=160284&r1=160283&r2=160284&view=diff" target="_blank" class="cremed">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Instrumentation/AddressSanitizer.cpp?rev=160284&r1=160283&r2=160284&view=diff</a><br>

==============================================================================<br>
--- llvm/trunk/lib/Transforms/Instrumentation/AddressSanitizer.cpp (original)<br>
+++ llvm/trunk/lib/Transforms/Instrumentation/AddressSanitizer.cpp Mon Jul 16 11:15:40 2012<br>
@@ -72,6 +72,9 @@<br>
 static const int kAsanStackRightRedzoneMagic = 0xf3;<br>
 static const int kAsanStackPartialRedzoneMagic = 0xf4;<br>
<br>
+// Accesses sizes are powers of two: 1, 2, 4, 8, 16.<br>
+static const size_t kNumberOfAccessSizes = 5;<br>
+<br>
 // Command-line flags.<br>
<br>
 // This flag may need to be replaced with -f[no-]asan-reads.<br>
@@ -82,7 +85,10 @@<br>
 static cl::opt<bool> ClInstrumentAtomics("asan-instrument-atomics",<br>
        cl::desc("instrument atomic instructions (rmw, cmpxchg)"),<br>
        cl::Hidden, cl::init(true));<br>
-// This flags limits the number of instructions to be instrumented<br>
+static cl::opt<bool> ClMergeCallbacks("asan-merge-callbacks",<br>
+       cl::desc("merge __asan_report_ callbacks to create fewer BBs"),<br>
+       cl::Hidden, cl::init(false));<br>
+// This flag limits the number of instructions to be instrumented<br>
 // in any given BB. Normally, this should be set to unlimited (INT_MAX),<br>
 // but due to <a href="http://llvm.org/bugs/show_bug.cgi?id=12652" target="_blank" class="cremed">http://llvm.org/bugs/show_bug.cgi?id=12652</a> we temporary<br>
 // set it to 10000.<br>
@@ -138,18 +144,33 @@<br>
<br>
 namespace {<br>
<br>
+/// An object of this type is created while instrumenting every function.<br>
+struct AsanFunctionContext {<br>
+  AsanFunctionContext() {<br>
+    memset(this, 0, sizeof(*this));<br></blockquote><div><br></div><div>This seems like a hack... Is there some reason it was necessary? That is, why not:</div><div><br></div><div>AsanFunctionContext() : CrashBlock() {}</div>
<div><br></div><div>Which should do the same thing?</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+  }<br>
+<br>
+  // These are initially zero. If we require at least one call to<br>
+  // __asan_report_{read,write}{1,2,4,8,16}, an appropriate BB is created.<br>
+  BasicBlock *CrashBlock[2][kNumberOfAccessSizes];<br>
+};<br>
+<br>
 /// AddressSanitizer: instrument the code in module to find memory bugs.<br>
 struct AddressSanitizer : public ModulePass {<br>
   AddressSanitizer();<br>
   virtual const char *getPassName() const;<br>
-  void instrumentMop(Instruction *I);<br>
-  void instrumentAddress(Instruction *OrigIns, IRBuilder<> &IRB,<br>
+  void instrumentMop(AsanFunctionContext &AFC, Instruction *I);<br>
+  void instrumentAddress(AsanFunctionContext &AFC,<br>
+                         Instruction *OrigIns, IRBuilder<> &IRB,<br>
                          Value *Addr, uint32_t TypeSize, bool IsWrite);<br>
-  Instruction *generateCrashCode(IRBuilder<> &IRB, Value *Addr,<br>
+  Value *createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong,<br>
+                           Value *ShadowValue, uint32_t TypeSize);<br>
+  Instruction *generateCrashCode(BasicBlock *BB, Value *Addr,<br>
                                  bool IsWrite, uint32_t TypeSize);<br>
-  bool instrumentMemIntrinsic(MemIntrinsic *MI);<br>
-  void instrumentMemIntrinsicParam(Instruction *OrigIns, Value *Addr,<br>
-                                  Value *Size,<br>
+  bool instrumentMemIntrinsic(AsanFunctionContext &AFC, MemIntrinsic *MI);<br>
+  void instrumentMemIntrinsicParam(AsanFunctionContext &AFC,<br>
+                                   Instruction *OrigIns, Value *Addr,<br>
+                                   Value *Size,<br></blockquote><div><br></div><div>All of this churn makes me think that this pass should really be a function pass rather than a module pass, potentially depending on a module pass running first. Would such a split make sense? Doesn't have to happen now, but I think it would make the organization of the instrumentation passes much more clear.</div>
<div><br></div><div>Eventually, it will also have a performance impact -- there is some hope that we will be able to parallelize function passes. Anyways, that's a long-term thing.</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

                                    Instruction *InsertBefore, bool IsWrite);<br>
   Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);<br>
   bool handleFunction(Module &M, Function &F);<br>
@@ -192,11 +213,10 @@<br>
   Function *AsanInitFunction;<br>
   Instruction *CtorInsertBefore;<br>
   OwningPtr<FunctionBlackList> BL;<br>
-  // Accesses sizes are powers of two: 1, 2, 4, 8, 16.<br>
-  static const size_t kNumberOfAccessSizes = 5;<br>
   // This array is indexed by AccessIsWrite and log2(AccessSize).<br>
   Function *AsanErrorCallback[2][kNumberOfAccessSizes];<br>
 };<br>
+<br>
 }  // namespace<br>
<br>
 char AddressSanitizer::ID = 0;<br>
@@ -230,19 +250,24 @@<br>
 //     ThenBlock<br>
 //   Tail<br>
 //<br>
-// Returns the ThenBlock's terminator.<br>
-static BranchInst *splitBlockAndInsertIfThen(Value *Cmp) {<br>
+// If ThenBlock is zero, a new block is created and its terminator is returned.<br>
+// Otherwize NULL is returned.<br>
+static BranchInst *splitBlockAndInsertIfThen(Value *Cmp,<br>
+                                             BasicBlock *ThenBlock = 0) {<br>
   Instruction *SplitBefore = cast<Instruction>(Cmp)->getNextNode();<br>
   BasicBlock *Head = SplitBefore->getParent();<br>
   BasicBlock *Tail = Head->splitBasicBlock(SplitBefore);<br>
   TerminatorInst *HeadOldTerm = Head->getTerminator();<br>
-  LLVMContext &C = Head->getParent()->getParent()->getContext();<br>
-  BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent());<br>
+  BranchInst *CheckTerm = NULL;<br></blockquote><div><br></div><div>LLVM style is to use '0' rather than NULL....</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+  if (!ThenBlock) {<br>
+    LLVMContext &C = Head->getParent()->getParent()->getContext();<br></blockquote><div><br></div><div>This should really just be a method, it would simplify stuff such as getting the context.</div><div> </div>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+    ThenBlock = BasicBlock::Create(C, "", Head->getParent());<br>
+    CheckTerm = BranchInst::Create(Tail, ThenBlock);<br>
+  }<br>
   BranchInst *HeadNewTerm =<br>
     BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/Tail, Cmp);<br>
   ReplaceInstWithInst(HeadOldTerm, HeadNewTerm);<br>
<br>
-  BranchInst *CheckTerm = BranchInst::Create(Tail, ThenBlock);<br>
   return CheckTerm;<br>
 }<br>
<br>
@@ -256,12 +281,13 @@<br>
                                                MappingOffset));<br>
 }<br>
<br>
-void AddressSanitizer::instrumentMemIntrinsicParam(Instruction *OrigIns,<br>
+void AddressSanitizer::instrumentMemIntrinsicParam(<br>
+    AsanFunctionContext &AFC, Instruction *OrigIns,<br>
     Value *Addr, Value *Size, Instruction *InsertBefore, bool IsWrite) {<br>
   // Check the first byte.<br>
   {<br>
     IRBuilder<> IRB(InsertBefore);<br>
-    instrumentAddress(OrigIns, IRB, Addr, 8, IsWrite);<br>
+    instrumentAddress(AFC, OrigIns, IRB, Addr, 8, IsWrite);<br>
   }<br>
   // Check the last byte.<br>
   {<br>
@@ -271,12 +297,13 @@<br>
     SizeMinusOne = IRB.CreateIntCast(SizeMinusOne, IntptrTy, false);<br>
     Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);<br>
     Value *AddrPlusSizeMinisOne = IRB.CreateAdd(AddrLong, SizeMinusOne);<br>
-    instrumentAddress(OrigIns, IRB, AddrPlusSizeMinisOne, 8, IsWrite);<br>
+    instrumentAddress(AFC, OrigIns, IRB, AddrPlusSizeMinisOne, 8, IsWrite);<br>
   }<br>
 }<br>
<br>
 // Instrument memset/memmove/memcpy<br>
-bool AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {<br>
+bool AddressSanitizer::instrumentMemIntrinsic(AsanFunctionContext &AFC,<br>
+                                              MemIntrinsic *MI) {<br>
   Value *Dst = MI->getDest();<br>
   MemTransferInst *MemTran = dyn_cast<MemTransferInst>(MI);<br>
   Value *Src = MemTran ? MemTran->getSource() : NULL;<br>
@@ -295,9 +322,9 @@<br>
     InsertBefore = splitBlockAndInsertIfThen(Cmp);<br>
   }<br>
<br>
-  instrumentMemIntrinsicParam(MI, Dst, Length, InsertBefore, true);<br>
+  instrumentMemIntrinsicParam(AFC, MI, Dst, Length, InsertBefore, true);<br>
   if (Src)<br>
-    instrumentMemIntrinsicParam(MI, Src, Length, InsertBefore, false);<br>
+    instrumentMemIntrinsicParam(AFC, MI, Src, Length, InsertBefore, false);<br>
   return true;<br>
 }<br>
<br>
@@ -327,7 +354,7 @@<br>
   return NULL;<br>
 }<br>
<br>
-void AddressSanitizer::instrumentMop(Instruction *I) {<br>
+void AddressSanitizer::instrumentMop(AsanFunctionContext &AFC, Instruction *I) {<br>
   bool IsWrite;<br>
   Value *Addr = isInterestingMemoryAccess(I, &IsWrite);<br>
   assert(Addr);<br>
@@ -348,7 +375,7 @@<br>
   }<br>
<br>
   IRBuilder<> IRB(I);<br>
-  instrumentAddress(I, IRB, Addr, TypeSize, IsWrite);<br>
+  instrumentAddress(AFC, I, IRB, Addr, TypeSize, IsWrite);<br>
 }<br>
<br>
 // Validate the result of Module::getOrInsertFunction called for an interface<br>
@@ -363,7 +390,8 @@<br>
 }<br>
<br>
 Instruction *AddressSanitizer::generateCrashCode(<br>
-    IRBuilder<> &IRB, Value *Addr, bool IsWrite, uint32_t TypeSize) {<br>
+    BasicBlock *BB, Value *Addr, bool IsWrite, uint32_t TypeSize) {<br>
+  IRBuilder<> IRB(BB->getFirstNonPHI());<br>
   size_t AccessSizeIndex = CountTrailingZeros_32(TypeSize / 8);<br>
   assert(AccessSizeIndex < kNumberOfAccessSizes);<br>
   CallInst *Call = IRB.CreateCall(AsanErrorCallback[IsWrite][AccessSizeIndex],<br>
@@ -372,7 +400,26 @@<br>
   return Call;<br>
 }<br>
<br>
-void AddressSanitizer::instrumentAddress(Instruction *OrigIns,<br>
+Value * AddressSanitizer::createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong,<br></blockquote><div><br></div><div>No ' ' on both sides of the '*' here: Value *AddressSanitizer::...</div><div> </div>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+                                            Value *ShadowValue,<br>
+                                            uint32_t TypeSize) {<br>
+  size_t Granularity = 1 << MappingScale;<br>
+  // Addr & (Granularity - 1)<br>
+  Value *LastAccessedByte = IRB.CreateAnd(<br>
+      AddrLong, ConstantInt::get(IntptrTy, Granularity - 1));<br>
+  // (Addr & (Granularity - 1)) + size - 1<br>
+  if (TypeSize / 8 > 1)<br>
+    LastAccessedByte = IRB.CreateAdd(<br>
+        LastAccessedByte, ConstantInt::get(IntptrTy, TypeSize / 8 - 1));<br>
+  // (uint8_t) ((Addr & (Granularity-1)) + size - 1)<br>
+  LastAccessedByte = IRB.CreateIntCast(<br>
+      LastAccessedByte, IRB.getInt8Ty(), false);<br>
+  // ((uint8_t) ((Addr & (Granularity-1)) + size - 1)) >= ShadowValue<br>
+  return IRB.CreateICmpSGE(LastAccessedByte, ShadowValue);<br>
+}<br>
+<br>
+void AddressSanitizer::instrumentAddress(AsanFunctionContext &AFC,<br>
+                                         Instruction *OrigIns,<br>
                                          IRBuilder<> &IRB, Value *Addr,<br>
                                          uint32_t TypeSize, bool IsWrite) {<br>
   Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);<br>
@@ -387,31 +434,27 @@<br>
<br>
   Value *Cmp = IRB.CreateICmpNE(ShadowValue, CmpVal);<br>
<br>
-  Instruction *CheckTerm = splitBlockAndInsertIfThen(Cmp);<br>
-  IRBuilder<> IRB2(CheckTerm);<br>
+  BasicBlock *CrashBlock = NULL;<br>
+  if (ClMergeCallbacks) {<br>
+    llvm_unreachable("unimplemented yet");<br>
+  } else {<br>
+    CrashBlock = BasicBlock::Create(*C, "crash_bb",<br>
+                                    OrigIns->getParent()->getParent());<br>
+    new UnreachableInst(*C, CrashBlock);<br>
+    Instruction *Crash =<br>
+        generateCrashCode(CrashBlock, AddrLong, IsWrite, TypeSize);<br>
+    Crash->setDebugLoc(OrigIns->getDebugLoc());<br>
+  }<br>
<br>
   size_t Granularity = 1 << MappingScale;<br>
   if (TypeSize < 8 * Granularity) {<br>
-    // Addr & (Granularity - 1)<br>
-    Value *LastAccessedByte = IRB2.CreateAnd(<br>
-        AddrLong, ConstantInt::get(IntptrTy, Granularity - 1));<br>
-    // (Addr & (Granularity - 1)) + size - 1<br>
-    if (TypeSize / 8 > 1)<br>
-      LastAccessedByte = IRB2.CreateAdd(<br>
-          LastAccessedByte, ConstantInt::get(IntptrTy, TypeSize / 8 - 1));<br>
-    // (uint8_t) ((Addr & (Granularity-1)) + size - 1)<br>
-    LastAccessedByte = IRB2.CreateIntCast(<br>
-        LastAccessedByte, IRB.getInt8Ty(), false);<br>
-    // ((uint8_t) ((Addr & (Granularity-1)) + size - 1)) >= ShadowValue<br>
-    Value *Cmp2 = IRB2.CreateICmpSGE(LastAccessedByte, ShadowValue);<br>
-<br>
-    CheckTerm = splitBlockAndInsertIfThen(Cmp2);<br>
-  }<br>
-<br>
-  IRBuilder<> IRB1(CheckTerm);<br>
-  Instruction *Crash = generateCrashCode(IRB1, AddrLong, IsWrite, TypeSize);<br>
-  Crash->setDebugLoc(OrigIns->getDebugLoc());<br>
-  ReplaceInstWithInst(CheckTerm, new UnreachableInst(*C));<br>
+    Instruction *CheckTerm = splitBlockAndInsertIfThen(Cmp);<br>
+    IRB.SetInsertPoint(CheckTerm);<br>
+    Value *Cmp2 = createSlowPathCmp(IRB, AddrLong, ShadowValue, TypeSize);<br>
+    splitBlockAndInsertIfThen(Cmp2, CrashBlock);<br>
+  } else {<br>
+    splitBlockAndInsertIfThen(Cmp, CrashBlock);<br>
+  }<br>
 }<br>
<br>
 // This function replaces all global variables with new variables that have<br>
@@ -733,6 +776,8 @@<br>
     }<br>
   }<br>
<br>
+  AsanFunctionContext AFC;<br>
+<br>
   // Instrument.<br>
   int NumInstrumented = 0;<br>
   for (size_t i = 0, n = ToInstrument.size(); i != n; i++) {<br>
@@ -740,9 +785,9 @@<br>
     if (ClDebugMin < 0 || ClDebugMax < 0 ||<br>
         (NumInstrumented >= ClDebugMin && NumInstrumented <= ClDebugMax)) {<br>
       if (isInterestingMemoryAccess(Inst, &IsWrite))<br>
-        instrumentMop(Inst);<br>
+        instrumentMop(AFC, Inst);<br>
       else<br>
-        instrumentMemIntrinsic(cast<MemIntrinsic>(Inst));<br>
+        instrumentMemIntrinsic(AFC, cast<MemIntrinsic>(Inst));<br>
     }<br>
     NumInstrumented++;<br>
   }<br>
<br>
Modified: llvm/trunk/test/Instrumentation/AddressSanitizer/basic.ll<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Instrumentation/AddressSanitizer/basic.ll?rev=160284&r1=160283&r2=160284&view=diff" target="_blank" class="cremed">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Instrumentation/AddressSanitizer/basic.ll?rev=160284&r1=160283&r2=160284&view=diff</a><br>

==============================================================================<br>
--- llvm/trunk/test/Instrumentation/AddressSanitizer/basic.ll (original)<br>
+++ llvm/trunk/test/Instrumentation/AddressSanitizer/basic.ll Mon Jul 16 11:15:40 2012<br>
@@ -20,6 +20,10 @@<br>
 ; to the end of the function.<br>
 ; CHECK:   %tmp1 = load i32* %a<br>
 ; CHECK:   ret i32 %tmp1<br>
+<br>
+; The crash block reports the error.<br>
+; CHECK:   call void @__asan_report_load4(i64 %[[LOAD_ADDR]]) noreturn<br>
+; CHECK:   unreachable<br>
 ;<br>
 ; First instrumentation block refines the shadow test.<br>
 ; CHECK:   and i64 %[[LOAD_ADDR]], 7<br>
@@ -28,9 +32,6 @@<br>
 ; CHECK:   icmp sge i8 %{{.*}}, %[[LOAD_SHADOW]]<br>
 ; CHECK:   br i1 %{{.*}}, label %{{.*}}, label %{{.*}}<br>
 ;<br>
-; Final instrumentation block reports the error.<br>
-; CHECK:   call void @__asan_report_load4(i64 %[[LOAD_ADDR]]) noreturn<br>
-; CHECK:   unreachable<br>
<br>
 entry:<br>
   %tmp1 = load i32* %a<br>
@@ -53,6 +54,10 @@<br>
 ; CHECK:   store i32 42, i32* %a<br>
 ; CHECK:   ret void<br>
 ;<br>
+; The crash block reports the error.<br>
+; CHECK:   call void @__asan_report_store4(i64 %[[STORE_ADDR]]) noreturn<br>
+; CHECK:   unreachable<br>
+;<br>
 ; First instrumentation block refines the shadow test.<br>
 ; CHECK:   and i64 %[[STORE_ADDR]], 7<br>
 ; CHECK:   add i64 %{{.*}}, 3<br>
@@ -60,9 +65,6 @@<br>
 ; CHECK:   icmp sge i8 %{{.*}}, %[[STORE_SHADOW]]<br>
 ; CHECK:   br i1 %{{.*}}, label %{{.*}}, label %{{.*}}<br>
 ;<br>
-; Final instrumentation block reports the error.<br>
-; CHECK:   call void @__asan_report_store4(i64 %[[STORE_ADDR]]) noreturn<br>
-; CHECK:   unreachable<br>
<br>
 entry:<br>
   store i32 42, i32* %a<br>
<br>
<br>
_______________________________________________<br>
llvm-commits mailing list<br>
<a href="mailto:llvm-commits@cs.uiuc.edu" class="cremed">llvm-commits@cs.uiuc.edu</a><br>
<a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits" target="_blank" class="cremed">http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits</a><br>
</blockquote></div><br></div>