[llvm] [msan] Add experimental '-msan-or-shadow-for-strict-instructions' flag to pessimize output (PR #128036)

Thurston Dang via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 20 10:09:01 PST 2025


https://github.com/thurstond updated https://github.com/llvm/llvm-project/pull/128036

>From 77ed1c8c4628b2a18ad41c3e93eb040cf6cf961f Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston at google.com>
Date: Thu, 20 Feb 2025 00:17:00 +0000
Subject: [PATCH 1/3] [msan] Add experimental '-msan-or-shadow-for-unknown'
 flag to pessimize output

visitInstruction currently checks that each parameter is fully
initialized, and then propagates a constant zero (fully initialized) shadow. This patch introduces
an experimental flag, '-msan-or-shadow-for-unknown', which will always
compute the shadow (with OR of the shadow of each parameter) instead of
assuming a constant zero shadow.

This has two effects:
- for non-recover mode: the shadow will now be properly propagated
  instead of being reset to clean after each unknown instruction.
- for recover mode: this is conceptually a no-op (if the shadow check
  passes, the shadow must be zero), but it pessimizes the compiler,
  which sometimes makes it easier for MSan to detect bugs.
---
 .../Instrumentation/MemorySanitizer.cpp       | 38 ++++++++++---
 .../or-shadow-for-strict-instructions.ll      | 54 +++++++++++++++++++
 2 files changed, 86 insertions(+), 6 deletions(-)
 create mode 100644 llvm/test/Instrumentation/MemorySanitizer/AArch64/or-shadow-for-strict-instructions.ll

diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 8708489ac4fef..734330b6018b0 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -343,10 +343,20 @@ static cl::opt<bool>
                     cl::desc("Apply no_sanitize to the whole file"), cl::Hidden,
                     cl::init(false));
 
-static cl::opt<bool>
-    ClCheckConstantShadow("msan-check-constant-shadow",
-                          cl::desc("Insert checks for constant shadow values"),
-                          cl::Hidden, cl::init(true));
+static cl::opt<bool> ClCheckConstantShadow(
+    "msan-check-constant-shadow",
+    cl::desc("Insert checks for constant non-zero shadow values. "
+             "N.B. constant zero (fully initialized) shadow "
+             "values are never checked."),
+    cl::Hidden, cl::init(true));
+
+static cl::opt<bool> ClOrShadowForStrictInstructions(
+    "msan-or-shadow-for-strict-instructions",
+    cl::desc("[EXPERIMENTAL] For instructions with default strict semantics, "
+             "propagate the shadow by OR'ing the shadows of the parameters, "
+             "instead of propagating a clean shadow. The strict shadow check "
+             "is still applied beforehand to all the sized parameters."),
+    cl::Hidden, cl::init(false));
 
 // This is off by default because of a bug in gold:
 // https://sourceware.org/bugzilla/show_bug.cgi?id=19002
@@ -5596,13 +5606,29 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     if (ClDumpStrictInstructions)
       dumpInst(I);
     LLVM_DEBUG(dbgs() << "DEFAULT: " << I << "\n");
+
+    bool allSized = true;
     for (size_t i = 0, n = I.getNumOperands(); i < n; i++) {
       Value *Operand = I.getOperand(i);
       if (Operand->getType()->isSized())
         insertShadowCheck(Operand, &I);
+      else
+        allSized = false;
+    }
+
+    Type *RetTy = cast<Value>(I).getType();
+    if (ClOrShadowForStrictInstructions && allSized && !RetTy->isVoidTy()) {
+      // - In recover mode: the shadow will be computed instead of reset to
+      //   clean.
+      // - In non-recover mode: this is conceptually a no-op (the preceding
+      //   insertShadowCheck(s) imply the shadow must be clean), but it
+      //   sometimes pessimizes the compiler and makes it easier for MSan to
+      //   detect bugs.
+      handleShadowOr(I);
+    } else {
+      setShadow(&I, getCleanShadow(&I));
+      setOrigin(&I, getCleanOrigin());
     }
-    setShadow(&I, getCleanShadow(&I));
-    setOrigin(&I, getCleanOrigin());
   }
 };
 
diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/or-shadow-for-strict-instructions.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/or-shadow-for-strict-instructions.ll
new file mode 100644
index 0000000000000..1ed9fdb54185f
--- /dev/null
+++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/or-shadow-for-strict-instructions.ll
@@ -0,0 +1,54 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -passes=msan -S                                         | FileCheck %s --check-prefixes=CLEAN
+; RUN: opt < %s -passes=msan -S -msan-or-shadow-for-strict-instructions | FileCheck %s --check-prefixes=OR-SHADOW
+;
+; This tests the behavior of the experimental "-msan-or-shadow-for-strict-instructions"
+; flag, which pessimizes the output by calculating the shadow of the return
+; value, even though it should be zero after passing the shadow check.
+;
+; This currently uses 'vcvtfxu2fp' as the "unknown" instruction; this test case
+; will need to be manually updated if that instruction becomes handled
+; properly (not by 'visitInstruction').
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-android9001"
+
+define <2 x float> @ucvtf_2sc(<2 x i32> %A) nounwind #0 {
+; CLEAN-LABEL: define <2 x float> @ucvtf_2sc(
+; CLEAN-SAME: <2 x i32> [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+; CLEAN-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8
+; CLEAN-NEXT:    call void @llvm.donothing()
+; CLEAN-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to i64
+; CLEAN-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
+; CLEAN-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1:![0-9]+]]
+; CLEAN:       3:
+; CLEAN-NEXT:    call void @__msan_warning_noreturn() #[[ATTR3:[0-9]+]]
+; CLEAN-NEXT:    unreachable
+; CLEAN:       4:
+; CLEAN-NEXT:    [[TMPVAR3:%.*]] = call <2 x float> @llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> [[A]], i32 1)
+; CLEAN-NEXT:    store <2 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CLEAN-NEXT:    ret <2 x float> [[TMPVAR3]]
+;
+; OR-SHADOW-LABEL: define <2 x float> @ucvtf_2sc(
+; OR-SHADOW-SAME: <2 x i32> [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+; OR-SHADOW-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8
+; OR-SHADOW-NEXT:    call void @llvm.donothing()
+; OR-SHADOW-NEXT:    [[_MSPROP:%.*]] = or <2 x i32> [[TMP1]], zeroinitializer
+; OR-SHADOW-NEXT:    [[_MSPROP1:%.*]] = or <2 x i32> [[_MSPROP]], zeroinitializer
+; OR-SHADOW-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to i64
+; OR-SHADOW-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
+; OR-SHADOW-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1:![0-9]+]]
+; OR-SHADOW:       3:
+; OR-SHADOW-NEXT:    call void @__msan_warning_noreturn() #[[ATTR3:[0-9]+]]
+; OR-SHADOW-NEXT:    unreachable
+; OR-SHADOW:       4:
+; OR-SHADOW-NEXT:    [[TMPVAR3:%.*]] = call <2 x float> @llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> [[A]], i32 1)
+; OR-SHADOW-NEXT:    store <2 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
+; OR-SHADOW-NEXT:    ret <2 x float> [[TMPVAR3]]
+  %tmpvar3 = call <2 x float> @llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> %A, i32 1)
+  ret <2 x float> %tmpvar3
+}
+
+declare <2 x float> @llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone
+
+attributes #0 = { sanitize_memory }

>From 3caa35f60abab45f2535655d4b35d634377266a3 Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston at google.com>
Date: Thu, 20 Feb 2025 18:07:44 +0000
Subject: [PATCH 2/3] Add recover mode test output

---
 .../or-shadow-for-strict-instructions.ll      | 48 ++++++++++++++++++-
 1 file changed, 46 insertions(+), 2 deletions(-)

diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/or-shadow-for-strict-instructions.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/or-shadow-for-strict-instructions.ll
index 1ed9fdb54185f..dd0ab7903882b 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/or-shadow-for-strict-instructions.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/or-shadow-for-strict-instructions.ll
@@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
-; RUN: opt < %s -passes=msan -S                                         | FileCheck %s --check-prefixes=CLEAN
-; RUN: opt < %s -passes=msan -S -msan-or-shadow-for-strict-instructions | FileCheck %s --check-prefixes=OR-SHADOW
+; RUN: opt < %s '-passes=msan'          -S                                         | FileCheck %s --check-prefixes=CLEAN
+; RUN: opt < %s '-passes=msan'          -S -msan-or-shadow-for-strict-instructions | FileCheck %s --check-prefixes=OR-SHADOW
+; RUN: opt < %s '-passes=msan<recover>' -S                                         | FileCheck %s --check-prefixes=RECOVER-CLEAN
+; RUN: opt < %s '-passes=msan<recover>' -S -msan-or-shadow-for-strict-instructions | FileCheck %s --check-prefixes=RECOVER-OR-SHADOW
 ;
 ; This tests the behavior of the experimental "-msan-or-shadow-for-strict-instructions"
 ; flag, which pessimizes the output by calculating the shadow of the return
@@ -45,6 +47,39 @@ define <2 x float> @ucvtf_2sc(<2 x i32> %A) nounwind #0 {
 ; OR-SHADOW-NEXT:    [[TMPVAR3:%.*]] = call <2 x float> @llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> [[A]], i32 1)
 ; OR-SHADOW-NEXT:    store <2 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
 ; OR-SHADOW-NEXT:    ret <2 x float> [[TMPVAR3]]
+;
+; RECOVER-CLEAN-LABEL: define <2 x float> @ucvtf_2sc(
+; RECOVER-CLEAN-SAME: <2 x i32> [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+; RECOVER-CLEAN-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8
+; RECOVER-CLEAN-NEXT:    call void @llvm.donothing()
+; RECOVER-CLEAN-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to i64
+; RECOVER-CLEAN-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
+; RECOVER-CLEAN-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1:![0-9]+]]
+; RECOVER-CLEAN:       3:
+; RECOVER-CLEAN-NEXT:    call void @__msan_warning() #[[ATTR3:[0-9]+]]
+; RECOVER-CLEAN-NEXT:    br label [[TMP4]]
+; RECOVER-CLEAN:       4:
+; RECOVER-CLEAN-NEXT:    [[TMPVAR3:%.*]] = call <2 x float> @llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> [[A]], i32 1)
+; RECOVER-CLEAN-NEXT:    store <2 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; RECOVER-CLEAN-NEXT:    ret <2 x float> [[TMPVAR3]]
+;
+; RECOVER-OR-SHADOW-LABEL: define <2 x float> @ucvtf_2sc(
+; RECOVER-OR-SHADOW-SAME: <2 x i32> [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+; RECOVER-OR-SHADOW-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8
+; RECOVER-OR-SHADOW-NEXT:    call void @llvm.donothing()
+; RECOVER-OR-SHADOW-NEXT:    [[_MSPROP:%.*]] = or <2 x i32> [[TMP1]], zeroinitializer
+; RECOVER-OR-SHADOW-NEXT:    [[_MSPROP1:%.*]] = or <2 x i32> [[_MSPROP]], zeroinitializer
+; RECOVER-OR-SHADOW-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to i64
+; RECOVER-OR-SHADOW-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
+; RECOVER-OR-SHADOW-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1:![0-9]+]]
+; RECOVER-OR-SHADOW:       3:
+; RECOVER-OR-SHADOW-NEXT:    call void @__msan_warning() #[[ATTR3:[0-9]+]]
+; RECOVER-OR-SHADOW-NEXT:    br label [[TMP4]]
+; RECOVER-OR-SHADOW:       4:
+; RECOVER-OR-SHADOW-NEXT:    [[TMPVAR3:%.*]] = call <2 x float> @llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> [[A]], i32 1)
+; RECOVER-OR-SHADOW-NEXT:    store <2 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
+; RECOVER-OR-SHADOW-NEXT:    ret <2 x float> [[TMPVAR3]]
+;
   %tmpvar3 = call <2 x float> @llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> %A, i32 1)
   ret <2 x float> %tmpvar3
 }
@@ -52,3 +87,12 @@ define <2 x float> @ucvtf_2sc(<2 x i32> %A) nounwind #0 {
 declare <2 x float> @llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone
 
 attributes #0 = { sanitize_memory }
+;.
+; CLEAN: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575}
+;.
+; OR-SHADOW: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575}
+;.
+; RECOVER-CLEAN: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575}
+;.
+; RECOVER-OR-SHADOW: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575}
+;.

>From b628f27d765084512702a7984ee410ea777dc29f Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston at google.com>
Date: Thu, 20 Feb 2025 18:08:42 +0000
Subject: [PATCH 3/3] Remove branch weight assertions

---
 .../AArch64/or-shadow-for-strict-instructions.ll         | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/or-shadow-for-strict-instructions.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/or-shadow-for-strict-instructions.ll
index dd0ab7903882b..9131e50de3a4c 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/or-shadow-for-strict-instructions.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/or-shadow-for-strict-instructions.ll
@@ -87,12 +87,3 @@ define <2 x float> @ucvtf_2sc(<2 x i32> %A) nounwind #0 {
 declare <2 x float> @llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone
 
 attributes #0 = { sanitize_memory }
-;.
-; CLEAN: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575}
-;.
-; OR-SHADOW: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575}
-;.
-; RECOVER-CLEAN: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575}
-;.
-; RECOVER-OR-SHADOW: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575}
-;.



More information about the llvm-commits mailing list