[llvm] [msan] Add experimental '-msan-or-shadow-for-strict-instructions' flag to pessimize output (PR #128036)

via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 20 09:43:46 PST 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-compiler-rt-sanitizer

Author: Thurston Dang (thurstond)

<details>
<summary>Changes</summary>

visitInstruction currently checks that each parameter is fully initialized, and then propagates a constant zero (fully initialized) shadow. This patch introduces an experimental flag, '-msan-or-shadow-for-unknown', which will always compute the shadow (by OR'ing the shadows of each parameter) in visitInstruction where possible.

This has two effects:
- for non-recover mode: the shadow will now be properly propagated instead of being reset to clean after each unknown instruction.
- for recover mode: this is conceptually a no-op (if the input shadow checks pass, the output shadow must be zero), but it pessimizes the compiler, which sometimes makes it easier for MSan to detect bugs.

This patch also does a drive-by fix of the ClCheckConstantShadow description, to note that constant zero shadows are never checked.

---
Full diff: https://github.com/llvm/llvm-project/pull/128036.diff


2 Files Affected:

- (modified) llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp (+32-6) 
- (added) llvm/test/Instrumentation/MemorySanitizer/AArch64/or-shadow-for-strict-instructions.ll (+54) 


``````````diff
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 8708489ac4fef..734330b6018b0 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -343,10 +343,20 @@ static cl::opt<bool>
                     cl::desc("Apply no_sanitize to the whole file"), cl::Hidden,
                     cl::init(false));
 
-static cl::opt<bool>
-    ClCheckConstantShadow("msan-check-constant-shadow",
-                          cl::desc("Insert checks for constant shadow values"),
-                          cl::Hidden, cl::init(true));
+static cl::opt<bool> ClCheckConstantShadow(
+    "msan-check-constant-shadow",
+    cl::desc("Insert checks for constant non-zero shadow values. "
+             "N.B. constant zero (fully initialized) shadow "
+             "values are never checked."),
+    cl::Hidden, cl::init(true));
+
+static cl::opt<bool> ClOrShadowForStrictInstructions(
+    "msan-or-shadow-for-strict-instructions",
+    cl::desc("[EXPERIMENTAL] For instructions with default strict semantics, "
+             "propagate the shadow by OR'ing the shadows of the parameters, "
+             "instead of propagating a clean shadow. The strict shadow check "
+             "is still applied beforehand to all the sized parameters."),
+    cl::Hidden, cl::init(false));
 
 // This is off by default because of a bug in gold:
 // https://sourceware.org/bugzilla/show_bug.cgi?id=19002
@@ -5596,13 +5606,29 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     if (ClDumpStrictInstructions)
       dumpInst(I);
     LLVM_DEBUG(dbgs() << "DEFAULT: " << I << "\n");
+
+    bool allSized = true;
     for (size_t i = 0, n = I.getNumOperands(); i < n; i++) {
       Value *Operand = I.getOperand(i);
       if (Operand->getType()->isSized())
         insertShadowCheck(Operand, &I);
+      else
+        allSized = false;
+    }
+
+    Type *RetTy = cast<Value>(I).getType();
+    if (ClOrShadowForStrictInstructions && allSized && !RetTy->isVoidTy()) {
+      // - In recover mode: the shadow will be computed instead of reset to
+      //   clean.
+      // - In non-recover mode: this is conceptually a no-op (the preceding
+      //   insertShadowCheck(s) imply the shadow must be clean), but it
+      //   sometimes pessimizes the compiler and makes it easier for MSan to
+      //   detect bugs.
+      handleShadowOr(I);
+    } else {
+      setShadow(&I, getCleanShadow(&I));
+      setOrigin(&I, getCleanOrigin());
     }
-    setShadow(&I, getCleanShadow(&I));
-    setOrigin(&I, getCleanOrigin());
   }
 };
 
diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/or-shadow-for-strict-instructions.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/or-shadow-for-strict-instructions.ll
new file mode 100644
index 0000000000000..1ed9fdb54185f
--- /dev/null
+++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/or-shadow-for-strict-instructions.ll
@@ -0,0 +1,54 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -passes=msan -S                                         | FileCheck %s --check-prefixes=CLEAN
+; RUN: opt < %s -passes=msan -S -msan-or-shadow-for-strict-instructions | FileCheck %s --check-prefixes=OR-SHADOW
+;
+; This tests the behavior of the experimental "-msan-or-shadow-for-strict-instructions"
+; flag, which pessimizes the output by calculating the shadow of the return
+; value, even though it should be zero after passing the shadow check.
+;
+; This currently uses 'vcvtfxu2fp' as the "unknown" instruction; this test case
+; will need to be manually updated if that instruction becomes handled
+; properly (not by 'visitInstruction').
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-android9001"
+
+define <2 x float> @ucvtf_2sc(<2 x i32> %A) nounwind #0 {
+; CLEAN-LABEL: define <2 x float> @ucvtf_2sc(
+; CLEAN-SAME: <2 x i32> [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+; CLEAN-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8
+; CLEAN-NEXT:    call void @llvm.donothing()
+; CLEAN-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to i64
+; CLEAN-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
+; CLEAN-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1:![0-9]+]]
+; CLEAN:       3:
+; CLEAN-NEXT:    call void @__msan_warning_noreturn() #[[ATTR3:[0-9]+]]
+; CLEAN-NEXT:    unreachable
+; CLEAN:       4:
+; CLEAN-NEXT:    [[TMPVAR3:%.*]] = call <2 x float> @llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> [[A]], i32 1)
+; CLEAN-NEXT:    store <2 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CLEAN-NEXT:    ret <2 x float> [[TMPVAR3]]
+;
+; OR-SHADOW-LABEL: define <2 x float> @ucvtf_2sc(
+; OR-SHADOW-SAME: <2 x i32> [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+; OR-SHADOW-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8
+; OR-SHADOW-NEXT:    call void @llvm.donothing()
+; OR-SHADOW-NEXT:    [[_MSPROP:%.*]] = or <2 x i32> [[TMP1]], zeroinitializer
+; OR-SHADOW-NEXT:    [[_MSPROP1:%.*]] = or <2 x i32> [[_MSPROP]], zeroinitializer
+; OR-SHADOW-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to i64
+; OR-SHADOW-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
+; OR-SHADOW-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1:![0-9]+]]
+; OR-SHADOW:       3:
+; OR-SHADOW-NEXT:    call void @__msan_warning_noreturn() #[[ATTR3:[0-9]+]]
+; OR-SHADOW-NEXT:    unreachable
+; OR-SHADOW:       4:
+; OR-SHADOW-NEXT:    [[TMPVAR3:%.*]] = call <2 x float> @llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> [[A]], i32 1)
+; OR-SHADOW-NEXT:    store <2 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
+; OR-SHADOW-NEXT:    ret <2 x float> [[TMPVAR3]]
+  %tmpvar3 = call <2 x float> @llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> %A, i32 1)
+  ret <2 x float> %tmpvar3
+}
+
+declare <2 x float> @llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone
+
+attributes #0 = { sanitize_memory }

``````````

</details>


https://github.com/llvm/llvm-project/pull/128036


More information about the llvm-commits mailing list