[compiler-rt] [llvm] [msan] Automatically print shadow for failing outlined checks (PR #145107)
Thurston Dang via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 20 14:56:48 PDT 2025
https://github.com/thurstond created https://github.com/llvm/llvm-project/pull/145107
A commonly used aid for debugging MSan reports is __msan_print_shadow(), which requires manual app code annotations (typically of the variable in the UUM report or nearby). This is in contrast to ASan, which automatically prints out the shadow map when a check fails.
This patch changes MSan to print the shadow that failed an outlined check (checks are outlined per function after the -msan-instrumentation-with-call-threshold is exceeded). Note that we do not print out the shadow map of "neighboring" variables because this is technically infeasible; see "Caveat" below.
This patch can be easier to use than __msan_print_shadow() because this does not require manual app code annotations. Additionally, due to optimizations, __msan_print_shadow() calls can sometimes spuriously affect whether a variable is initialized.
As a side effect, this patch also enables outlined checks for arbitrary-sized shadows (vs. the current hardcoded handlers for {1,2,4,8}-byte shadows).
Caveat: the shadow does not necessarily correspond to an individual user variable (MSan instrumentation may combine and/or truncate multiple shadows prior to emitting a check that the mangled shadow is zero; e.g., convertShadowToScalar, handleSSEVectorConvertIntrinsic, materializeInstructionChecks). OTOH it is arguably a strength that this feature emit the shadow that directly matters for the MSan check, but which cannot be obtained using the MSan API.
>From be37467155a3b2617a5f8e69e0b51a60ccff7e4d Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston at google.com>
Date: Wed, 11 Jun 2025 22:11:18 +0000
Subject: [PATCH] [msan] Automatically print shadow for failing outlined checks
A commonly used aid for debugging MSan reports is __msan_print_shadow(),
which must be manually applied (typically to the variable in the UUM
report or nearby). This is in contrast to ASan, which automatically
prints out the shadow map when a check fails.
This patch changes MSan to print the shadow that failed an outlined check (checks are outlined per function after -msan-instrumentation-with-call-threshold is exceeded). As a side effect, this also enables outlined checks for arbitrary-sized shadows (vs. the current hardcoded handlers for {1,2,4,8}-byte shadows). Note that we do not print out the shadow map of "neighboring" variables because this is technically infeasible; see "Caveat" below.
This patch can be easier to use than __msan_print_shadow() because this does not require manual app code annotations. Additionally, due to optimizations, __msan_print_shadow()
calls can sometimes spuriously affect whether a variable is initialized.
Caveat: the shadow does not necessarily correspond to an individual user
variable (MSan instrumentation may combine and/or truncate multiple shadows prior to
emitting a check that the mangled shadow is zero; e.g., convertShadowToScalar,
handleSSEVectorConvertIntrinsic, materializeInstructionChecks). OTOH it is arguably a
strength that this feature emit the shadow that directly matters for the MSan check, but which cannot be obtained using the MSan API.
---
compiler-rt/lib/msan/msan.cpp | 45 +++++++++++++++++++
.../lib/msan/msan_interface_internal.h | 2 +
.../msan_print_shadow_on_outlined_check.cpp | 39 ++++++++++++++++
.../Instrumentation/MemorySanitizer.cpp | 42 ++++++++++++-----
4 files changed, 118 insertions(+), 10 deletions(-)
create mode 100644 compiler-rt/test/msan/msan_print_shadow_on_outlined_check.cpp
diff --git a/compiler-rt/lib/msan/msan.cpp b/compiler-rt/lib/msan/msan.cpp
index a3c0c2e485af3..d82881235340b 100644
--- a/compiler-rt/lib/msan/msan.cpp
+++ b/compiler-rt/lib/msan/msan.cpp
@@ -352,11 +352,33 @@ void __sanitizer::BufferedStackTrace::UnwindImpl(
using namespace __msan;
+// N.B. Only [shadow, shadow+size) is defined. shadow is *not* a pointer into
+// an MSan shadow region.
+static void print_shadow_value(void *shadow, u64 size) {
+ Printf("\n");
+ Printf("Shadow value (%llu byte%s):", size, size == 1 ? "" : "s");
+ for (unsigned int i = 0; i < size; i++) {
+ if (i % 4 == 0)
+ Printf(" ");
+
+ unsigned char x = ((unsigned char *)shadow)[i];
+ Printf("%x%x", x >> 4, x & 0xf);
+ }
+ Printf("\n");
+ Printf(
+ "Caveat: the shadow value does not necessarily directly correspond to a "
+ "single user variable. The correspondence is stronger, but not always "
+ "perfect, when origin tracking is enabled.\n");
+ Printf("\n");
+}
+
#define MSAN_MAYBE_WARNING(type, size) \
void __msan_maybe_warning_##size(type s, u32 o) { \
GET_CALLER_PC_BP; \
+ \
if (UNLIKELY(s)) { \
PrintWarningWithOrigin(pc, bp, o); \
+ print_shadow_value((void *)(&s), sizeof(s)); \
if (__msan::flags()->halt_on_error) { \
Printf("Exiting\n"); \
Die(); \
@@ -369,6 +391,29 @@ MSAN_MAYBE_WARNING(u16, 2)
MSAN_MAYBE_WARNING(u32, 4)
MSAN_MAYBE_WARNING(u64, 8)
+// N.B. Only [shadow, shadow+size) is defined. shadow is *not* a pointer into
+// an MSan shadow region.
+void __msan_maybe_warning_N(void *shadow, u64 size, u32 o) {
+ GET_CALLER_PC_BP;
+
+ bool allZero = true;
+ for (unsigned int i = 0; i < size; i++) {
+ if (((char *)shadow)[i]) {
+ allZero = false;
+ break;
+ }
+ }
+
+ if (UNLIKELY(!allZero)) {
+ PrintWarningWithOrigin(pc, bp, o);
+ print_shadow_value(shadow, size);
+ if (__msan::flags()->halt_on_error) {
+ Printf("Exiting\n");
+ Die();
+ }
+ }
+}
+
#define MSAN_MAYBE_STORE_ORIGIN(type, size) \
void __msan_maybe_store_origin_##size(type s, void *p, u32 o) { \
if (UNLIKELY(s)) { \
diff --git a/compiler-rt/lib/msan/msan_interface_internal.h b/compiler-rt/lib/msan/msan_interface_internal.h
index c2eead13c20cf..75425b98166a9 100644
--- a/compiler-rt/lib/msan/msan_interface_internal.h
+++ b/compiler-rt/lib/msan/msan_interface_internal.h
@@ -60,6 +60,8 @@ SANITIZER_INTERFACE_ATTRIBUTE
void __msan_maybe_warning_4(u32 s, u32 o);
SANITIZER_INTERFACE_ATTRIBUTE
void __msan_maybe_warning_8(u64 s, u32 o);
+SANITIZER_INTERFACE_ATTRIBUTE
+void __msan_maybe_warning_N(void *shadow, u64 size, u32 o);
SANITIZER_INTERFACE_ATTRIBUTE
void __msan_maybe_store_origin_1(u8 s, void *p, u32 o);
diff --git a/compiler-rt/test/msan/msan_print_shadow_on_outlined_check.cpp b/compiler-rt/test/msan/msan_print_shadow_on_outlined_check.cpp
new file mode 100644
index 0000000000000..a087c1d8a9053
--- /dev/null
+++ b/compiler-rt/test/msan/msan_print_shadow_on_outlined_check.cpp
@@ -0,0 +1,39 @@
+// RUN: %clangxx_msan -fsanitize-recover=memory -mllvm -msan-instrumentation-with-call-threshold=0 -g %s -o %t \
+// RUN: && not %run %t 2>&1 | FileCheck %s
+
+#include <ctype.h>
+#include <stdio.h>
+
+#include <sanitizer/msan_interface.h>
+
+int main(int argc, char *argv[]) {
+ long double a;
+ printf("a: %Lf\n", a);
+ // CHECK: Shadow value (16 bytes): ffffffff ffffffff ffff0000 00000000
+
+ unsigned long long b;
+ printf("b: %llu\n", b);
+ // CHECK: Shadow value (8 bytes): ffffffff ffffffff
+
+ char *p = (char *)(&b);
+ p[2] = 36;
+ printf("b: %lld\n", b);
+ // CHECK: Shadow value (8 bytes): ffff00ff ffffffff
+
+ b = b << 8;
+ printf("b: %lld\n", b);
+ __msan_print_shadow(&b, sizeof(b));
+ // CHECK: Shadow value (8 bytes): 00ffff00 ffffffff
+
+ unsigned int c;
+ printf("c: %u\n", c);
+ // CHECK: Shadow value (4 bytes): ffffffff
+
+ // Converted to boolean
+ if (c) {
+ // CHECK: Shadow value (1 byte): 01
+ printf("Hello\n");
+ }
+
+ return 0;
+}
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index c2315d5de7041..1fbeebc49e149 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -652,6 +652,7 @@ class MemorySanitizer {
// These arrays are indexed by log2(AccessSize).
FunctionCallee MaybeWarningFn[kNumberOfAccessSizes];
+ FunctionCallee MaybeWarningVarSizeFn;
FunctionCallee MaybeStoreOriginFn[kNumberOfAccessSizes];
/// Run-time helper that generates a new origin value for a stack
@@ -926,7 +927,9 @@ void MemorySanitizer::createUserspaceApi(Module &M,
MaybeWarningFn[AccessSizeIndex] = M.getOrInsertFunction(
FunctionName, TLI.getAttrList(C, {0, 1}, /*Signed=*/false),
IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8), IRB.getInt32Ty());
-
+ MaybeWarningVarSizeFn = M.getOrInsertFunction(
+ "__msan_maybe_warning_N", TLI.getAttrList(C, {}, /*Signed=*/false),
+ IRB.getVoidTy(), PtrTy, IRB.getInt64Ty(), IRB.getInt32Ty());
FunctionName = "__msan_maybe_store_origin_" + itostr(AccessSize);
MaybeStoreOriginFn[AccessSizeIndex] = M.getOrInsertFunction(
FunctionName, TLI.getAttrList(C, {0, 2}, /*Signed=*/false),
@@ -1233,7 +1236,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// Constants likely will be eliminated by follow-up passes.
if (isa<Constant>(V))
return false;
-
++SplittableBlocksCount;
return ClInstrumentationWithCallThreshold >= 0 &&
SplittableBlocksCount > ClInstrumentationWithCallThreshold;
@@ -1432,18 +1434,38 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
const DataLayout &DL = F.getDataLayout();
TypeSize TypeSizeInBits = DL.getTypeSizeInBits(ConvertedShadow->getType());
unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
- if (instrumentWithCalls(ConvertedShadow) &&
- SizeIndex < kNumberOfAccessSizes && !MS.CompileKernel) {
- FunctionCallee Fn = MS.MaybeWarningFn[SizeIndex];
+ if (instrumentWithCalls(ConvertedShadow) && !MS.CompileKernel) {
// ZExt cannot convert between vector and scalar
ConvertedShadow = convertShadowToScalar(ConvertedShadow, IRB);
Value *ConvertedShadow2 =
IRB.CreateZExt(ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex)));
- CallBase *CB = IRB.CreateCall(
- Fn, {ConvertedShadow2,
- MS.TrackOrigins && Origin ? Origin : (Value *)IRB.getInt32(0)});
- CB->addParamAttr(0, Attribute::ZExt);
- CB->addParamAttr(1, Attribute::ZExt);
+
+ if (SizeIndex < kNumberOfAccessSizes) {
+ FunctionCallee Fn = MS.MaybeWarningFn[SizeIndex];
+ CallBase *CB = IRB.CreateCall(
+ Fn,
+ {ConvertedShadow2,
+ MS.TrackOrigins && Origin ? Origin : (Value *)IRB.getInt32(0)});
+ CB->addParamAttr(0, Attribute::ZExt);
+ CB->addParamAttr(1, Attribute::ZExt);
+ } else {
+ FunctionCallee Fn = MS.MaybeWarningVarSizeFn;
+
+ // Note: we can only dump the current shadow value, not an entire
+ // neighborhood shadow map (as ASan does). This is because the shadow
+ // value does not necessarily correspond to a user variable: MSan code
+ // often combines shadows (e.g., convertShadowToScalar,
+ // handleSSEVectorConvertIntrinsic, materializeInstructionChecks).
+ Value *ShadowAlloca = IRB.CreateAlloca(ConvertedShadow2->getType(), 0u);
+ IRB.CreateStore(ConvertedShadow2, ShadowAlloca);
+ unsigned ShadowSize = DL.getTypeAllocSize(ConvertedShadow2->getType());
+ CallBase *CB = IRB.CreateCall(
+ Fn,
+ {ShadowAlloca, ConstantInt::get(IRB.getInt64Ty(), ShadowSize),
+ MS.TrackOrigins && Origin ? Origin : (Value *)IRB.getInt32(0)});
+ CB->addParamAttr(1, Attribute::ZExt);
+ CB->addParamAttr(2, Attribute::ZExt);
+ }
} else {
Value *Cmp = convertToBool(ConvertedShadow, IRB, "_mscmp");
Instruction *CheckTerm = SplitBlockAndInsertIfThen(
More information about the llvm-commits
mailing list