[llvm] 33a92af - [msan] Add off-by-default flag to fix false negatives from partially undefined constant fixed-length vectors (#143837)

via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 20 10:11:15 PDT 2025


Author: Thurston Dang
Date: 2025-06-20T10:11:12-07:00
New Revision: 33a92af1b2260506356eb838125b356703bf02bb

URL: https://github.com/llvm/llvm-project/commit/33a92af1b2260506356eb838125b356703bf02bb
DIFF: https://github.com/llvm/llvm-project/commit/33a92af1b2260506356eb838125b356703bf02bb.diff

LOG: [msan] Add off-by-default flag to fix false negatives from partially undefined constant fixed-length vectors (#143837)

This patch adds an off-by-default flag which, when enabled via `-mllvm -msan-poison-undef-vectors=true`, fixes a false negative in MSan (partially-undefined constant fixed-length vectors). It is currently off by default since, by fixing the false positive, code/tests that previously passed MSan may start failing. The default will be changed in a future patch.

Prior to this patch, MSan computes that partially-undefined constant fixed-length vectors are fully initialized, which leads to false negatives; moreover, benign vector rewriting could theoretically flip MSan's shadow computation from initialized to uninitialized or vice-versa (*). `-msan-poison-undef-vectors=true` calculates the shadow precisely: for each element of the vector, the corresponding shadow is fully uninitialized if the element is undefined/poisoned, otherwise it is fully initialized.

Updates the test from https://github.com/llvm/llvm-project/pull/143823

(*) For example:
  ```
  %x = insertelement <2 x i64> <i64 0, i64 poison>, i64 42, i64 0
  %y = insertelement <2 x i64> <i64 poison, i64 poison>, i64 42, i64 0
  ```
%x and %y are equivalent but, prior to this patch, MSan incorrectly computes the shadow of %x as <0, 0> rather than <0, -1>.

Added: 
    

Modified: 
    llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
    llvm/test/Instrumentation/MemorySanitizer/partial-poison.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index fb55bd7bfe567..5aeca60b08d3d 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -265,9 +265,22 @@ static cl::opt<bool>
                       cl::desc("Print name of local stack variable"),
                       cl::Hidden, cl::init(true));
 
-static cl::opt<bool> ClPoisonUndef("msan-poison-undef",
-                                   cl::desc("poison undef temps"), cl::Hidden,
-                                   cl::init(true));
+static cl::opt<bool>
+    ClPoisonUndef("msan-poison-undef",
+                  cl::desc("Poison fully undef temporary values. "
+                           "Partially undefined constant vectors "
+                           "are unaffected by this flag (see "
+                           "-msan-poison-undef-vectors)."),
+                  cl::Hidden, cl::init(true));
+
+static cl::opt<bool> ClPoisonUndefVectors(
+    "msan-poison-undef-vectors",
+    cl::desc("Precisely poison partially undefined constant vectors. "
+             "If false (legacy behavior), the entire vector is "
+             "considered fully initialized, which may lead to false "
+             "negatives. Fully undefined constant vectors are "
+             "unaffected by this flag (see -msan-poison-undef)."),
+    cl::Hidden, cl::init(false));
 
 static cl::opt<bool>
     ClHandleICmp("msan-handle-icmp",
@@ -1181,6 +1194,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
   bool PropagateShadow;
   bool PoisonStack;
   bool PoisonUndef;
+  bool PoisonUndefVectors;
 
   struct ShadowOriginAndInsertPoint {
     Value *Shadow;
@@ -1207,6 +1221,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     PropagateShadow = SanitizeFunction;
     PoisonStack = SanitizeFunction && ClPoisonStack;
     PoisonUndef = SanitizeFunction && ClPoisonUndef;
+    PoisonUndefVectors = SanitizeFunction && ClPoisonUndefVectors;
 
     // In the presence of unreachable blocks, we may see Phi nodes with
     // incoming nodes from such blocks. Since InstVisitor skips unreachable
@@ -1989,6 +2004,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
       }
       return Shadow;
     }
+    // Handle fully undefined values
+    // (partially undefined constant vectors are handled later)
     if (UndefValue *U = dyn_cast<UndefValue>(V)) {
       Value *AllOnes = (PropagateShadow && PoisonUndef) ? getPoisonedShadow(V)
                                                         : getCleanShadow(V);
@@ -2086,8 +2103,27 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
       return ShadowPtr;
     }
 
-    // TODO: Partially undefined vectors are handled by the fall-through case
-    //       below (see partial-poison.ll); this causes false negatives.
+    // Check for partially-undefined constant vectors
+    // TODO: scalable vectors (this is hard because we do not have IRBuilder)
+    if (isa<FixedVectorType>(V->getType()) && isa<Constant>(V) &&
+        cast<Constant>(V)->containsUndefOrPoisonElement() && PropagateShadow &&
+        PoisonUndefVectors) {
+      unsigned NumElems = cast<FixedVectorType>(V->getType())->getNumElements();
+      SmallVector<Constant *, 32> ShadowVector(NumElems);
+      for (unsigned i = 0; i != NumElems; ++i) {
+        Constant *Elem = cast<Constant>(V)->getAggregateElement(i);
+        ShadowVector[i] = isa<UndefValue>(Elem) ? getPoisonedShadow(Elem)
+                                                : getCleanShadow(Elem);
+      }
+
+      Value *ShadowConstant = ConstantVector::get(ShadowVector);
+      LLVM_DEBUG(dbgs() << "Partial undef constant vector: " << *V << " ==> "
+                        << *ShadowConstant << "\n");
+
+      return ShadowConstant;
+    }
+
+    // TODO: partially-undefined constant arrays, structures, and nested types
 
     // For everything else the shadow is zero.
     return getCleanShadow(V);

diff  --git a/llvm/test/Instrumentation/MemorySanitizer/partial-poison.ll b/llvm/test/Instrumentation/MemorySanitizer/partial-poison.ll
index 5164441c17e10..025317a53c8d1 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/partial-poison.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/partial-poison.ll
@@ -1,8 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt < %s -S -passes='msan' 2>&1 | FileCheck %s
+; RUN: opt < %s -S -passes='msan' -msan-poison-undef-vectors=true  2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-PRECISE
+; RUN: opt < %s -S -passes='msan' -msan-poison-undef-vectors=false 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-IMPRECISE
 ;
-; Test case to show that MSan computes shadows for partially poisoned vectors
-; as fully initialized, resulting in false negatives.
+; Regression test case for computing shadows of partially poisoned vectors.
+; Partially poisoned structs and arrays are not correctly implemented.
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
@@ -11,7 +12,8 @@ define <2 x i64> @left_poison(ptr %add.ptr) sanitize_memory {
 ; CHECK-LABEL: define <2 x i64> @left_poison(
 ; CHECK-SAME: ptr [[ADD_PTR:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:    call void @llvm.donothing()
-; CHECK-NEXT:    store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-PRECISE:   store <2 x i64> <i64 -1, i64 0>, ptr @__msan_retval_tls, align 8
+; CHECK-IMPRECISE: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <2 x i64> <i64 poison, i64 42>
 ;
   ret <2 x i64> <i64 poison, i64 42>
@@ -21,7 +23,8 @@ define <2 x i64> @right_poison(ptr %add.ptr) sanitize_memory {
 ; CHECK-LABEL: define <2 x i64> @right_poison(
 ; CHECK-SAME: ptr [[ADD_PTR:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    call void @llvm.donothing()
-; CHECK-NEXT:    store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-PRECISE:   store <2 x i64> <i64 0, i64 -1>, ptr @__msan_retval_tls, align 8
+; CHECK-IMPRECISE: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <2 x i64> <i64 42, i64 poison>
 ;
   ret <2 x i64> <i64 42, i64 poison>
@@ -51,7 +54,8 @@ define <2 x i64> @left_undef(ptr %add.ptr) sanitize_memory {
 ; CHECK-LABEL: define <2 x i64> @left_undef(
 ; CHECK-SAME: ptr [[ADD_PTR:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    call void @llvm.donothing()
-; CHECK-NEXT:    store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-PRECISE:   store <2 x i64> <i64 -1, i64 0>, ptr @__msan_retval_tls, align 8
+; CHECK-IMPRECISE: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <2 x i64> <i64 undef, i64 42>
 ;
   ret <2 x i64> <i64 undef, i64 42>
@@ -61,7 +65,8 @@ define <2 x i64> @right_undef(ptr %add.ptr) sanitize_memory {
 ; CHECK-LABEL: define <2 x i64> @right_undef(
 ; CHECK-SAME: ptr [[ADD_PTR:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    call void @llvm.donothing()
-; CHECK-NEXT:    store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-PRECISE:   store <2 x i64> <i64 0, i64 -1>, ptr @__msan_retval_tls, align 8
+; CHECK-IMPRECISE: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <2 x i64> <i64 42, i64 undef>
 ;
   ret <2 x i64> <i64 42, i64 undef>
@@ -76,3 +81,23 @@ define <2 x i64> @full_undef(ptr %add.ptr) sanitize_memory {
 ;
   ret <2 x i64> <i64 undef, i64 undef>
 }
+
+define {i64, i64}  @struct_left_undef() sanitize_memory {
+; CHECK-LABEL: define { i64, i64 } @struct_left_undef(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    store { i64, i64 } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret { i64, i64 } { i64 undef, i64 42 }
+;
+  ret {i64, i64} { i64 undef, i64 42 }
+}
+
+define [2x i64]  @array_right_undef() sanitize_memory {
+; CHECK-LABEL: define [2 x i64] @array_right_undef(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    store [2 x i64] zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret [2 x i64] [i64 42, i64 undef]
+; 
+  ret [2x i64] [ i64 42, i64 undef ]
+}


        


More information about the llvm-commits mailing list