[llvm] 1cf5c7f - [msan] Disambiguate warnings debug location

Vitaly Buka via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 8 14:17:26 PDT 2022


Author: Vitaly Buka
Date: 2022-09-08T14:17:07-07:00
New Revision: 1cf5c7fe8ce21b3add80972e6aaeb30a1f61bff9

URL: https://github.com/llvm/llvm-project/commit/1cf5c7fe8ce21b3add80972e6aaeb30a1f61bff9
DIFF: https://github.com/llvm/llvm-project/commit/1cf5c7fe8ce21b3add80972e6aaeb30a1f61bff9.diff

LOG: [msan] Disambiguate warnings debug location

If multiple warnings created on the same instruction (debug location)
it can be difficult to figure out which input value is the cause.

This patches chains origins just before the warning using last origins
update debug information.

To avoid inflating the binary unnecessarily, do this only when uncertainty is
high enough, 3 warnings by default. On average it adds 0.4% to the
.text size.

Reviewed By: kda, fmayer

Differential Revision: https://reviews.llvm.org/D133232

Added: 
    llvm/test/Instrumentation/MemorySanitizer/disambiguate-origin.ll

Modified: 
    llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 66ac1ecd6d6b..8ab0296e7647 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -145,6 +145,7 @@
 #include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallString.h"
@@ -352,6 +353,12 @@ static cl::opt<uint64_t> ClOriginBase("msan-origin-base",
                                       cl::desc("Define custom MSan OriginBase"),
                                       cl::Hidden, cl::init(0));
 
+static cl::opt<int>
+    ClDisambiguateWarning("msan-disambiguate-warning-threshold",
+                          cl::desc("Define threshold for number of checks per "
+                                   "debug location to force origin update."),
+                          cl::Hidden, cl::init(3));
+
 const char kMsanModuleCtorName[] = "msan.module_ctor";
 const char kMsanInitName[] = "__msan_init";
 
@@ -1105,6 +1112,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
         : Shadow(S), Origin(O), OrigIns(I) {}
   };
   SmallVector<ShadowOriginAndInsertPoint, 16> InstrumentationList;
+  DenseMap<const DILocation *, int> LazyWarningDebugLocationCount;
   bool InstrumentLifetimeStart = ClHandleLifetimeIntrinsics;
   SmallSetVector<AllocaInst *, 16> AllocaSet;
   SmallVector<std::pair<IntrinsicInst *, AllocaInst *>, 16> LifetimeStartList;
@@ -1145,6 +1153,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
            (&I == FnPrologueEnd || I.comesBefore(FnPrologueEnd));
   }
 
+  // Creates a new origin and records the stack trace. In general we can call
+  // this function for any origin manipulation we like. However it will cost
+  // runtime resources. So use this wisely only if it can provide additional
+  // information helpful to a user.
   Value *updateOrigin(Value *V, IRBuilder<> &IRB) {
     if (MS.TrackOrigins <= 1)
       return V;
@@ -1261,11 +1273,42 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     }
   }
 
+  // Returns true if Debug Location curresponds to multiple warnings.
+  bool shouldDisambiguateWarningLocation(const DebugLoc &DebugLoc) {
+    if (MS.TrackOrigins < 2)
+      return false;
+
+    if (LazyWarningDebugLocationCount.empty())
+      for (const auto &I : InstrumentationList)
+        ++LazyWarningDebugLocationCount[I.OrigIns->getDebugLoc()];
+
+    return LazyWarningDebugLocationCount[DebugLoc] >= ClDisambiguateWarning;
+  }
+
   /// Helper function to insert a warning at IRB's current insert point.
   void insertWarningFn(IRBuilder<> &IRB, Value *Origin) {
     if (!Origin)
       Origin = (Value *)IRB.getInt32(0);
     assert(Origin->getType()->isIntegerTy());
+
+    if (shouldDisambiguateWarningLocation(IRB.getCurrentDebugLocation())) {
+      // Try to create additional origin with debug info of the last origin
+      // instruction. It may provide additional information to the user.
+      if (Instruction *OI = dyn_cast_or_null<Instruction>(Origin)) {
+        assert(MS.TrackOrigins);
+        auto NewDebugLoc = OI->getDebugLoc();
+        // Origin update with missing or the same debug location provides no
+        // additional value.
+        if (NewDebugLoc && NewDebugLoc != IRB.getCurrentDebugLocation()) {
+          // Insert update just before the check, so we call runtime only just
+          // before the report.
+          IRBuilder<> IRBOrigin(&*IRB.GetInsertPoint());
+          IRBOrigin.SetCurrentDebugLocation(NewDebugLoc);
+          Origin = updateOrigin(Origin, IRBOrigin);
+        }
+      }
+    }
+
     if (MS.CompileKernel || MS.TrackOrigins)
       IRB.CreateCall(MS.WarningFn, Origin)->setCannotMerge();
     else

diff  --git a/llvm/test/Instrumentation/MemorySanitizer/disambiguate-origin.ll b/llvm/test/Instrumentation/MemorySanitizer/disambiguate-origin.ll
new file mode 100644
index 000000000000..df94ec007dad
--- /dev/null
+++ b/llvm/test/Instrumentation/MemorySanitizer/disambiguate-origin.ll
@@ -0,0 +1,103 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -msan-track-origins=2 -msan-eager-checks=1 -S -passes=msan 2>&1 | FileCheck %s --implicit-check-not="call void @__msan_"
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+!llvm.module.flags = !{!0}
+
+!0 = !{i32 2, !"Debug Info Version", i32 3}
+!2 = distinct !DISubprogram(name: "t", scope: !3, file: !3, line: 4, type: !4, spFlags: DISPFlagDefinition, unit: !6)
+!3 = !DIFile(filename: "tmp/noundef.cpp", directory: "/")
+!4 = !DISubroutineType(types: !5)
+!5 = !{}
+!6 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug)
+!10 = !DILocation(line: 9, column: 0, scope: !2)
+!11 = !DILocation(line: 9, column: 1, scope: !2)
+!12 = !DILocation(line: 9, column: 2, scope: !2)
+!13 = !DILocation(line: 9, column: 3, scope: !2)
+
+declare void @OneArg(i32 noundef %a) nounwind uwtable sanitize_memory;
+declare void @ManyArgs(i32 noundef %a, i32 noundef %b, i32 noundef %c) nounwind uwtable sanitize_memory;
+
+define void @TestOne(i32* noundef %a)  nounwind uwtable sanitize_memory {
+; CHECK-LABEL: @TestOne(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    call void @llvm.donothing(), !dbg [[DBG1:![0-9]+]]
+; CHECK-DAG:    [[V:%.*]] = load i32, i32* [[A:%.*]], align 4, !dbg [[DBG1]]
+; CHECK-DAG:    [[TMP0:%.*]] = ptrtoint i32* [[A]] to i64, !dbg [[DBG1]]
+; CHECK-DAG:    [[TMP1:%.*]] = xor i64 [[TMP0]], 87960930222080, !dbg [[DBG1]]
+; CHECK-DAG:    [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to i32*, !dbg [[DBG1]]
+; CHECK-DAG:    [[TMP3:%.*]] = add i64 [[TMP1]], 17592186044416, !dbg [[DBG1]]
+; CHECK-DAG:    [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to i32*, !dbg [[DBG1]]
+; CHECK-DAG:    [[_MSLD:%.*]] = load i32, i32* [[TMP2]], align 4, !dbg [[DBG1]]
+; CHECK-DAG:    [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4, !dbg [[DBG1]]
+; CHECK-DAG:    [[_MSCMP:%.*]] = icmp ne i32 [[_MSLD]], 0, !dbg [[DBG7:![0-9]+]]
+; CHECK:    br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !dbg [[DBG7]], !prof [[PROF8:![0-9]+]]
+; CHECK:    call void @__msan_warning_with_origin_noreturn(i32 [[TMP5]]) #[[ATTR2:[0-9]+]], !dbg [[DBG7]]
+; CHECK:    unreachable, !dbg [[DBG7]]
+; CHECK:    call void @OneArg(i32 noundef [[V]]), !dbg [[DBG7]]
+; CHECK:    ret void
+;
+entry:
+  %v = load i32, i32* %a, !dbg !11
+  call void @OneArg(i32 noundef %v), !dbg !10
+  ret void
+}
+
+define void @TestMany(i32* noundef %a)  nounwind uwtable sanitize_memory {
+; CHECK-LABEL: @TestMany(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    call void @llvm.donothing(), !dbg [[DBG1]]
+; CHECK-DAG:    [[X:%.*]] = load i32, i32* [[A:%.*]], align 4, !dbg [[DBG1]]
+; CHECK-DAG:    [[TMP0:%.*]] = ptrtoint i32* [[A]] to i64, !dbg [[DBG1]]
+; CHECK-DAG:    [[TMP1:%.*]] = xor i64 [[TMP0]], 87960930222080, !dbg [[DBG1]]
+; CHECK-DAG:    [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to i32*, !dbg [[DBG1]]
+; CHECK-DAG:    [[TMP3:%.*]] = add i64 [[TMP1]], 17592186044416, !dbg [[DBG1]]
+; CHECK-DAG:    [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to i32*, !dbg [[DBG1]]
+; CHECK-DAG:    [[_MSLD:%.*]] = load i32, i32* [[TMP2]], align 4, !dbg [[DBG1]]
+; CHECK-DAG:    [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4, !dbg [[DBG1]]
+; CHECK-DAG:    [[Y:%.*]] = load i32, i32* [[A]], align 4, !dbg [[DBG9:![0-9]+]]
+; CHECK-DAG:    [[TMP6:%.*]] = ptrtoint i32* [[A]] to i64, !dbg [[DBG9]]
+; CHECK-DAG:    [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080, !dbg [[DBG9]]
+; CHECK-DAG:    [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to i32*, !dbg [[DBG9]]
+; CHECK-DAG:    [[TMP9:%.*]] = add i64 [[TMP7]], 17592186044416, !dbg [[DBG9]]
+; CHECK-DAG:    [[TMP10:%.*]] = inttoptr i64 [[TMP9]] to i32*, !dbg [[DBG9]]
+; CHECK-DAG:    [[_MSLD1:%.*]] = load i32, i32* [[TMP8]], align 4, !dbg [[DBG9]]
+; CHECK-DAG:    [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4, !dbg [[DBG9]]
+; CHECK-DAG:    [[Z:%.*]] = load i32, i32* [[A]], align 4, !dbg [[DBG10:![0-9]+]]
+; CHECK-DAG:    [[TMP12:%.*]] = ptrtoint i32* [[A]] to i64, !dbg [[DBG10]]
+; CHECK-DAG:    [[TMP13:%.*]] = xor i64 [[TMP12]], 87960930222080, !dbg [[DBG10]]
+; CHECK-DAG:    [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to i32*, !dbg [[DBG10]]
+; CHECK-DAG:    [[TMP15:%.*]] = add i64 [[TMP13]], 17592186044416, !dbg [[DBG10]]
+; CHECK-DAG:    [[TMP16:%.*]] = inttoptr i64 [[TMP15]] to i32*, !dbg [[DBG10]]
+; CHECK-DAG:    [[_MSLD2:%.*]] = load i32, i32* [[TMP14]], align 4, !dbg [[DBG10]]
+; CHECK-DAG:    [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4, !dbg [[DBG10]]
+; CHECK-DAG:    [[_MSCMP:%.*]] = icmp ne i32 [[_MSLD]], 0, !dbg [[DBG7]]
+; CHECK:    br i1 [[_MSCMP]], label [[TMP18:%.*]], label [[TMP20:%.*]], !dbg [[DBG7]], !prof [[PROF8]]
+; CHECK:    [[TMP19:%.*]] = call i32 @__msan_chain_origin(i32 [[TMP5]]), !dbg [[DBG1]]
+; CHECK:    call void @__msan_warning_with_origin_noreturn(i32 [[TMP19]]) #[[ATTR2]], !dbg [[DBG7]]
+; CHECK:    unreachable, !dbg [[DBG7]]
+; CHECK:    [[_MSCMP3:%.*]] = icmp ne i32 [[_MSLD1]], 0, !dbg [[DBG7]]
+; CHECK:    br i1 [[_MSCMP3]], label [[TMP21:%.*]], label [[TMP23:%.*]], !dbg [[DBG7]], !prof [[PROF8]]
+; CHECK:    [[TMP22:%.*]] = call i32 @__msan_chain_origin(i32 [[TMP11]]), !dbg [[DBG9]]
+; CHECK:    call void @__msan_warning_with_origin_noreturn(i32 [[TMP22]]) #[[ATTR2]], !dbg [[DBG7]]
+; CHECK:    unreachable, !dbg [[DBG7]]
+; CHECK:    [[_MSCMP4:%.*]] = icmp ne i32 [[_MSLD2]], 0, !dbg [[DBG7]]
+; CHECK:    br i1 [[_MSCMP4]], label [[TMP24:%.*]], label [[TMP26:%.*]], !dbg [[DBG7]], !prof [[PROF8]]
+; CHECK:    [[TMP25:%.*]] = call i32 @__msan_chain_origin(i32 [[TMP17]]), !dbg [[DBG10]]
+; CHECK:    call void @__msan_warning_with_origin_noreturn(i32 [[TMP25]]) #[[ATTR2]], !dbg [[DBG7]]
+; CHECK:    unreachable, !dbg [[DBG7]]
+; CHECK:    call void @ManyArgs(i32 noundef [[X]], i32 noundef [[Y]], i32 noundef [[Z]]), !dbg [[DBG7]]
+; CHECK:    ret void
+;
+entry:
+  %x = load i32, i32* %a, !dbg !11
+  %y = load i32, i32* %a, !dbg !12
+  %z = load i32, i32* %a, !dbg !13
+  call void @ManyArgs(i32 noundef %x, i32 noundef %y, i32 noundef %z), !dbg !10
+  ret void
+}
+
+; CHECK-LABEL: define internal void @msan.module_ctor()
+; CHECK:         call void @__msan_init()


        


More information about the llvm-commits mailing list