[compiler-rt] [llvm] [msan] Automatically print shadow for failing outlined checks (PR #145107)

Thurston Dang via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 20 17:02:16 PDT 2025


https://github.com/thurstond updated https://github.com/llvm/llvm-project/pull/145107

>From be37467155a3b2617a5f8e69e0b51a60ccff7e4d Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston at google.com>
Date: Wed, 11 Jun 2025 22:11:18 +0000
Subject: [PATCH 1/5] [msan] Automatically print shadow for failing outlined
 checks

A commonly used aid for debugging MSan reports is __msan_print_shadow(),
which must be manually applied (typically to the variable in the UUM
report or nearby). This is in contrast to ASan, which automatically
prints out the shadow map when a check fails.

This patch changes MSan to print the shadow that failed an outlined check (checks are outlined per function after -msan-instrumentation-with-call-threshold is exceeded). As a side effect, this also enables outlined checks for arbitrary-sized shadows (vs. the current hardcoded handlers for {1,2,4,8}-byte shadows). Note that we do not print out the shadow map of "neighboring" variables because this is technically infeasible; see "Caveat" below.

This patch can be easier to use than __msan_print_shadow() because this does not require manual app code annotations. Additionally, due to optimizations, __msan_print_shadow()
calls can sometimes spuriously affect whether a variable is initialized.

Caveat: the shadow does not necessarily correspond to an individual user
variable (MSan instrumentation may combine and/or truncate multiple shadows prior to
emitting a check that the mangled shadow is zero; e.g., convertShadowToScalar,
handleSSEVectorConvertIntrinsic, materializeInstructionChecks). OTOH it is arguably a
strength that this feature emit the shadow that directly matters for the MSan check, but which cannot be obtained using the MSan API.
---
 compiler-rt/lib/msan/msan.cpp                 | 45 +++++++++++++++++++
 .../lib/msan/msan_interface_internal.h        |  2 +
 .../msan_print_shadow_on_outlined_check.cpp   | 39 ++++++++++++++++
 .../Instrumentation/MemorySanitizer.cpp       | 42 ++++++++++++-----
 4 files changed, 118 insertions(+), 10 deletions(-)
 create mode 100644 compiler-rt/test/msan/msan_print_shadow_on_outlined_check.cpp

diff --git a/compiler-rt/lib/msan/msan.cpp b/compiler-rt/lib/msan/msan.cpp
index a3c0c2e485af3..d82881235340b 100644
--- a/compiler-rt/lib/msan/msan.cpp
+++ b/compiler-rt/lib/msan/msan.cpp
@@ -352,11 +352,33 @@ void __sanitizer::BufferedStackTrace::UnwindImpl(
 
 using namespace __msan;
 
+// N.B. Only [shadow, shadow+size) is defined. shadow is *not* a pointer into
+// an MSan shadow region.
+static void print_shadow_value(void *shadow, u64 size) {
+  Printf("\n");
+  Printf("Shadow value (%llu byte%s):", size, size == 1 ? "" : "s");
+  for (unsigned int i = 0; i < size; i++) {
+    if (i % 4 == 0)
+      Printf(" ");
+
+    unsigned char x = ((unsigned char *)shadow)[i];
+    Printf("%x%x", x >> 4, x & 0xf);
+  }
+  Printf("\n");
+  Printf(
+      "Caveat: the shadow value does not necessarily directly correspond to a "
+      "single user variable. The correspondence is stronger, but not always "
+      "perfect, when origin tracking is enabled.\n");
+  Printf("\n");
+}
+
 #define MSAN_MAYBE_WARNING(type, size)              \
   void __msan_maybe_warning_##size(type s, u32 o) { \
     GET_CALLER_PC_BP;                               \
+                                                    \
     if (UNLIKELY(s)) {                              \
       PrintWarningWithOrigin(pc, bp, o);            \
+      print_shadow_value((void *)(&s), sizeof(s));  \
       if (__msan::flags()->halt_on_error) {         \
         Printf("Exiting\n");                        \
         Die();                                      \
@@ -369,6 +391,29 @@ MSAN_MAYBE_WARNING(u16, 2)
 MSAN_MAYBE_WARNING(u32, 4)
 MSAN_MAYBE_WARNING(u64, 8)
 
+// N.B. Only [shadow, shadow+size) is defined. shadow is *not* a pointer into
+// an MSan shadow region.
+void __msan_maybe_warning_N(void *shadow, u64 size, u32 o) {
+  GET_CALLER_PC_BP;
+
+  bool allZero = true;
+  for (unsigned int i = 0; i < size; i++) {
+    if (((char *)shadow)[i]) {
+      allZero = false;
+      break;
+    }
+  }
+
+  if (UNLIKELY(!allZero)) {
+    PrintWarningWithOrigin(pc, bp, o);
+    print_shadow_value(shadow, size);
+    if (__msan::flags()->halt_on_error) {
+      Printf("Exiting\n");
+      Die();
+    }
+  }
+}
+
 #define MSAN_MAYBE_STORE_ORIGIN(type, size)                       \
   void __msan_maybe_store_origin_##size(type s, void *p, u32 o) { \
     if (UNLIKELY(s)) {                                            \
diff --git a/compiler-rt/lib/msan/msan_interface_internal.h b/compiler-rt/lib/msan/msan_interface_internal.h
index c2eead13c20cf..75425b98166a9 100644
--- a/compiler-rt/lib/msan/msan_interface_internal.h
+++ b/compiler-rt/lib/msan/msan_interface_internal.h
@@ -60,6 +60,8 @@ SANITIZER_INTERFACE_ATTRIBUTE
 void __msan_maybe_warning_4(u32 s, u32 o);
 SANITIZER_INTERFACE_ATTRIBUTE
 void __msan_maybe_warning_8(u64 s, u32 o);
+SANITIZER_INTERFACE_ATTRIBUTE
+void __msan_maybe_warning_N(void *shadow, u64 size, u32 o);
 
 SANITIZER_INTERFACE_ATTRIBUTE
 void __msan_maybe_store_origin_1(u8 s, void *p, u32 o);
diff --git a/compiler-rt/test/msan/msan_print_shadow_on_outlined_check.cpp b/compiler-rt/test/msan/msan_print_shadow_on_outlined_check.cpp
new file mode 100644
index 0000000000000..a087c1d8a9053
--- /dev/null
+++ b/compiler-rt/test/msan/msan_print_shadow_on_outlined_check.cpp
@@ -0,0 +1,39 @@
+// RUN: %clangxx_msan -fsanitize-recover=memory -mllvm -msan-instrumentation-with-call-threshold=0 -g %s -o %t \
+// RUN:   && not %run %t 2>&1 | FileCheck %s
+
+#include <ctype.h>
+#include <stdio.h>
+
+#include <sanitizer/msan_interface.h>
+
+int main(int argc, char *argv[]) {
+  long double a;
+  printf("a: %Lf\n", a);
+  // CHECK: Shadow value (16 bytes): ffffffff ffffffff ffff0000 00000000
+
+  unsigned long long b;
+  printf("b: %llu\n", b);
+  // CHECK: Shadow value (8 bytes): ffffffff ffffffff
+
+  char *p = (char *)(&b);
+  p[2] = 36;
+  printf("b: %lld\n", b);
+  // CHECK: Shadow value (8 bytes): ffff00ff ffffffff
+
+  b = b << 8;
+  printf("b: %lld\n", b);
+  __msan_print_shadow(&b, sizeof(b));
+  // CHECK: Shadow value (8 bytes): 00ffff00 ffffffff
+
+  unsigned int c;
+  printf("c: %u\n", c);
+  // CHECK: Shadow value (4 bytes): ffffffff
+
+  // Converted to boolean
+  if (c) {
+    // CHECK: Shadow value (1 byte): 01
+    printf("Hello\n");
+  }
+
+  return 0;
+}
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index c2315d5de7041..1fbeebc49e149 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -652,6 +652,7 @@ class MemorySanitizer {
 
   // These arrays are indexed by log2(AccessSize).
   FunctionCallee MaybeWarningFn[kNumberOfAccessSizes];
+  FunctionCallee MaybeWarningVarSizeFn;
   FunctionCallee MaybeStoreOriginFn[kNumberOfAccessSizes];
 
   /// Run-time helper that generates a new origin value for a stack
@@ -926,7 +927,9 @@ void MemorySanitizer::createUserspaceApi(Module &M,
     MaybeWarningFn[AccessSizeIndex] = M.getOrInsertFunction(
         FunctionName, TLI.getAttrList(C, {0, 1}, /*Signed=*/false),
         IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8), IRB.getInt32Ty());
-
+    MaybeWarningVarSizeFn = M.getOrInsertFunction(
+        "__msan_maybe_warning_N", TLI.getAttrList(C, {}, /*Signed=*/false),
+        IRB.getVoidTy(), PtrTy, IRB.getInt64Ty(), IRB.getInt32Ty());
     FunctionName = "__msan_maybe_store_origin_" + itostr(AccessSize);
     MaybeStoreOriginFn[AccessSizeIndex] = M.getOrInsertFunction(
         FunctionName, TLI.getAttrList(C, {0, 2}, /*Signed=*/false),
@@ -1233,7 +1236,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     // Constants likely will be eliminated by follow-up passes.
     if (isa<Constant>(V))
       return false;
-
     ++SplittableBlocksCount;
     return ClInstrumentationWithCallThreshold >= 0 &&
            SplittableBlocksCount > ClInstrumentationWithCallThreshold;
@@ -1432,18 +1434,38 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     const DataLayout &DL = F.getDataLayout();
     TypeSize TypeSizeInBits = DL.getTypeSizeInBits(ConvertedShadow->getType());
     unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
-    if (instrumentWithCalls(ConvertedShadow) &&
-        SizeIndex < kNumberOfAccessSizes && !MS.CompileKernel) {
-      FunctionCallee Fn = MS.MaybeWarningFn[SizeIndex];
+    if (instrumentWithCalls(ConvertedShadow) && !MS.CompileKernel) {
       // ZExt cannot convert between vector and scalar
       ConvertedShadow = convertShadowToScalar(ConvertedShadow, IRB);
       Value *ConvertedShadow2 =
           IRB.CreateZExt(ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex)));
-      CallBase *CB = IRB.CreateCall(
-          Fn, {ConvertedShadow2,
-               MS.TrackOrigins && Origin ? Origin : (Value *)IRB.getInt32(0)});
-      CB->addParamAttr(0, Attribute::ZExt);
-      CB->addParamAttr(1, Attribute::ZExt);
+
+      if (SizeIndex < kNumberOfAccessSizes) {
+        FunctionCallee Fn = MS.MaybeWarningFn[SizeIndex];
+        CallBase *CB = IRB.CreateCall(
+            Fn,
+            {ConvertedShadow2,
+             MS.TrackOrigins && Origin ? Origin : (Value *)IRB.getInt32(0)});
+        CB->addParamAttr(0, Attribute::ZExt);
+        CB->addParamAttr(1, Attribute::ZExt);
+      } else {
+        FunctionCallee Fn = MS.MaybeWarningVarSizeFn;
+
+        // Note: we can only dump the current shadow value, not an entire
+        // neighborhood shadow map (as ASan does). This is because the shadow
+        // value does not necessarily correspond to a user variable: MSan code
+        // often combines shadows (e.g., convertShadowToScalar,
+        // handleSSEVectorConvertIntrinsic, materializeInstructionChecks).
+        Value *ShadowAlloca = IRB.CreateAlloca(ConvertedShadow2->getType(), 0u);
+        IRB.CreateStore(ConvertedShadow2, ShadowAlloca);
+        unsigned ShadowSize = DL.getTypeAllocSize(ConvertedShadow2->getType());
+        CallBase *CB = IRB.CreateCall(
+            Fn,
+            {ShadowAlloca, ConstantInt::get(IRB.getInt64Ty(), ShadowSize),
+             MS.TrackOrigins && Origin ? Origin : (Value *)IRB.getInt32(0)});
+        CB->addParamAttr(1, Attribute::ZExt);
+        CB->addParamAttr(2, Attribute::ZExt);
+      }
     } else {
       Value *Cmp = convertToBool(ConvertedShadow, IRB, "_mscmp");
       Instruction *CheckTerm = SplitBlockAndInsertIfThen(

>From a856b74110679d50c3d0d91f2a1278b78b7146cf Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston at google.com>
Date: Fri, 20 Jun 2025 23:01:41 +0000
Subject: [PATCH 2/5] Require verbosity >= 1

---
 compiler-rt/lib/msan/msan.cpp                 | 28 ++++++++++---------
 .../msan_print_shadow_on_outlined_check.cpp   |  2 +-
 2 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/compiler-rt/lib/msan/msan.cpp b/compiler-rt/lib/msan/msan.cpp
index d82881235340b..6f58dfad32bbb 100644
--- a/compiler-rt/lib/msan/msan.cpp
+++ b/compiler-rt/lib/msan/msan.cpp
@@ -372,18 +372,19 @@ static void print_shadow_value(void *shadow, u64 size) {
   Printf("\n");
 }
 
-#define MSAN_MAYBE_WARNING(type, size)              \
-  void __msan_maybe_warning_##size(type s, u32 o) { \
-    GET_CALLER_PC_BP;                               \
-                                                    \
-    if (UNLIKELY(s)) {                              \
-      PrintWarningWithOrigin(pc, bp, o);            \
-      print_shadow_value((void *)(&s), sizeof(s));  \
-      if (__msan::flags()->halt_on_error) {         \
-        Printf("Exiting\n");                        \
-        Die();                                      \
-      }                                             \
-    }                                               \
+#define MSAN_MAYBE_WARNING(type, size)               \
+  void __msan_maybe_warning_##size(type s, u32 o) {  \
+    GET_CALLER_PC_BP;                                \
+                                                     \
+    if (UNLIKELY(s)) {                               \
+      PrintWarningWithOrigin(pc, bp, o);             \
+      if (Verbosity() >= 1)                          \
+        print_shadow_value((void *)(&s), sizeof(s)); \
+      if (__msan::flags()->halt_on_error) {          \
+        Printf("Exiting\n");                         \
+        Die();                                       \
+      }                                              \
+    }                                                \
   }
 
 MSAN_MAYBE_WARNING(u8, 1)
@@ -406,7 +407,8 @@ void __msan_maybe_warning_N(void *shadow, u64 size, u32 o) {
 
   if (UNLIKELY(!allZero)) {
     PrintWarningWithOrigin(pc, bp, o);
-    print_shadow_value(shadow, size);
+    if (Verbosity() >= 1)
+      print_shadow_value(shadow, size);
     if (__msan::flags()->halt_on_error) {
       Printf("Exiting\n");
       Die();
diff --git a/compiler-rt/test/msan/msan_print_shadow_on_outlined_check.cpp b/compiler-rt/test/msan/msan_print_shadow_on_outlined_check.cpp
index a087c1d8a9053..3ecb1277f23cc 100644
--- a/compiler-rt/test/msan/msan_print_shadow_on_outlined_check.cpp
+++ b/compiler-rt/test/msan/msan_print_shadow_on_outlined_check.cpp
@@ -1,5 +1,5 @@
 // RUN: %clangxx_msan -fsanitize-recover=memory -mllvm -msan-instrumentation-with-call-threshold=0 -g %s -o %t \
-// RUN:   && not %run %t 2>&1 | FileCheck %s
+// RUN:   && not env MSAN_OPTIONS=verbosity=1 %run %t 2>&1 | FileCheck %s
 
 #include <ctype.h>
 #include <stdio.h>

>From 338bccd3d82a344f00d8e05ac79baf66b99c41af Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston at google.com>
Date: Fri, 20 Jun 2025 23:11:21 +0000
Subject: [PATCH 3/5] Move before PrintWarningWithOrigin

---
 compiler-rt/lib/msan/msan.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/compiler-rt/lib/msan/msan.cpp b/compiler-rt/lib/msan/msan.cpp
index 6f58dfad32bbb..67879e37fad5e 100644
--- a/compiler-rt/lib/msan/msan.cpp
+++ b/compiler-rt/lib/msan/msan.cpp
@@ -355,7 +355,6 @@ using namespace __msan;
 // N.B. Only [shadow, shadow+size) is defined. shadow is *not* a pointer into
 // an MSan shadow region.
 static void print_shadow_value(void *shadow, u64 size) {
-  Printf("\n");
   Printf("Shadow value (%llu byte%s):", size, size == 1 ? "" : "s");
   for (unsigned int i = 0; i < size; i++) {
     if (i % 4 == 0)
@@ -377,9 +376,9 @@ static void print_shadow_value(void *shadow, u64 size) {
     GET_CALLER_PC_BP;                                \
                                                      \
     if (UNLIKELY(s)) {                               \
-      PrintWarningWithOrigin(pc, bp, o);             \
       if (Verbosity() >= 1)                          \
         print_shadow_value((void *)(&s), sizeof(s)); \
+      PrintWarningWithOrigin(pc, bp, o);             \
       if (__msan::flags()->halt_on_error) {          \
         Printf("Exiting\n");                         \
         Die();                                       \
@@ -406,9 +405,9 @@ void __msan_maybe_warning_N(void *shadow, u64 size, u32 o) {
   }
 
   if (UNLIKELY(!allZero)) {
-    PrintWarningWithOrigin(pc, bp, o);
     if (Verbosity() >= 1)
       print_shadow_value(shadow, size);
+    PrintWarningWithOrigin(pc, bp, o);
     if (__msan::flags()->halt_on_error) {
       Printf("Exiting\n");
       Die();

>From 789ec92b2168ecf9e5514f2c49a069f0126eeee5 Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston at google.com>
Date: Fri, 20 Jun 2025 23:38:24 +0000
Subject: [PATCH 4/5] Remove note

---
 llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 1fbeebc49e149..4e4be4c6dd47e 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -1450,12 +1450,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
         CB->addParamAttr(1, Attribute::ZExt);
       } else {
         FunctionCallee Fn = MS.MaybeWarningVarSizeFn;
-
-        // Note: we can only dump the current shadow value, not an entire
-        // neighborhood shadow map (as ASan does). This is because the shadow
-        // value does not necessarily correspond to a user variable: MSan code
-        // often combines shadows (e.g., convertShadowToScalar,
-        // handleSSEVectorConvertIntrinsic, materializeInstructionChecks).
         Value *ShadowAlloca = IRB.CreateAlloca(ConvertedShadow2->getType(), 0u);
         IRB.CreateStore(ConvertedShadow2, ShadowAlloca);
         unsigned ShadowSize = DL.getTypeAllocSize(ConvertedShadow2->getType());

>From 345ae9f4182d59977249d0b95c0d67960359eb84 Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston at google.com>
Date: Fri, 20 Jun 2025 23:38:42 +0000
Subject: [PATCH 5/5] Update
 llvm/test/Instrumentation/MemorySanitizer/with-call-type-size.ll

---
 .../MemorySanitizer/with-call-type-size.ll    | 21 +++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/llvm/test/Instrumentation/MemorySanitizer/with-call-type-size.ll b/llvm/test/Instrumentation/MemorySanitizer/with-call-type-size.ll
index 0b81e5682062a..3b1ab3364b310 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/with-call-type-size.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/with-call-type-size.ll
@@ -73,13 +73,30 @@ define <4 x i32> @test64(<4 x i32> %vec, i64 %idx, i32 %x) sanitize_memory {
 ; CHECK:         call void @__msan_maybe_warning_8(i64 zeroext %{{.*}}, i32 zeroext 0)
 ; CHECK:         ret <4 x i32>
 
-; Type size too large => inline check.
+; Type size too large => use variable-size handler.
 define <4 x i32> @test65(<4 x i32> %vec, i65 %idx, i32 %x) sanitize_memory {
   %vec1 = insertelement <4 x i32> %vec, i32 %x, i65 %idx
   ret <4 x i32> %vec1
 }
 ; CHECK-LABEL: @test65(
-; CHECK:         call void @__msan_warning_noreturn
+; CHECK:         %[[A:.*]] = zext i65 %1 to i128
+; CHECK:         call void @__msan_maybe_warning_N(ptr %{{.*}}, i64 zeroext 16, i32 zeroext 0)
+; CHECK:         ret <4 x i32>
+
+define <4 x i32> @test128(<4 x i32> %vec, i128 %idx, i32 %x) sanitize_memory {
+  %vec1 = insertelement <4 x i32> %vec, i32 %x, i128 %idx
+  ret <4 x i32> %vec1
+}
+; CHECK-LABEL: @test128(
+; CHECK:         call void @__msan_maybe_warning_N(ptr %{{.*}}, i64 zeroext 16, i32 zeroext 0)
+; CHECK:         ret <4 x i32>
+
+define <4 x i32> @test256(<4 x i32> %vec, i256 %idx, i32 %x) sanitize_memory {
+  %vec1 = insertelement <4 x i32> %vec, i32 %x, i256 %idx
+  ret <4 x i32> %vec1
+}
+; CHECK-LABEL: @test256(
+; CHECK:         call void @__msan_maybe_warning_N(ptr %{{.*}}, i64 zeroext 32, i32 zeroext 0)
 ; CHECK:         ret <4 x i32>
 
 define <4 x i32> @testUndef(<4 x i32> %vec, i32 %x) sanitize_memory {



More information about the llvm-commits mailing list