[llvm] [msan] Convert vector shadow to scalar before zext (PR #96722)

Wed Jul 3 11:14:31 PDT 2024

https://github.com/thurstond updated https://github.com/llvm/llvm-project/pull/96722

>From e6b4094b2a9dc469666c48a302790e903e8b3de7 Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston at google.com>
Date: Wed, 26 Jun 2024 02:48:02 +0000
Subject: [PATCH 1/2]     [msan] Convert vector shadow to scalar before zext

    zext does not allow converting vector shadow to scalar, so we must
    manually convert it prior to calling zext in materializeOneCheck,
    for which the 'convertedShadow' parameter isn't actually guaranteed
    to be scalar.

    In contrast, the storeOrigin function already converts the (potentially vector) shadow to scalar; we add a comment to note that it is load bearing.
---
 llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index d0dbb108b1eca..c7d41f6298372 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -1283,6 +1283,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     const DataLayout &DL = F.getDataLayout();
     const Align OriginAlignment = std::max(kMinOriginAlignment, Alignment);
     TypeSize StoreSize = DL.getTypeStoreSize(Shadow->getType());
+    // ZExt cannot convert between vector and scalar
     Value *ConvertedShadow = convertShadowToScalar(Shadow, IRB);
     if (auto *ConstantShadow = dyn_cast<Constant>(ConvertedShadow)) {
       if (!ClCheckConstantShadow || ConstantShadow->isZeroValue()) {
@@ -1398,6 +1399,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     if (instrumentWithCalls(ConvertedShadow) &&
         SizeIndex < kNumberOfAccessSizes && !MS.CompileKernel) {
       FunctionCallee Fn = MS.MaybeWarningFn[SizeIndex];
+      // ZExt cannot convert between vector and scalar
+      ConvertedShadow = convertShadowToScalar(ConvertedShadow, IRB);
       Value *ConvertedShadow2 =
           IRB.CreateZExt(ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex)));
       CallBase *CB = IRB.CreateCall(

>From cadc1e36b4407e1d7664eec70acd81eb8aead903 Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston at google.com>
Date: Wed, 3 Jul 2024 18:11:11 +0000
Subject: [PATCH 2/2] Update test case output

---
 .../vector-track-origins-neon.ll              | 45 +++++++------------
 .../vector-track-origins-struct.ll            | 23 ++++------
 2 files changed, 26 insertions(+), 42 deletions(-)

diff --git a/llvm/test/Instrumentation/MemorySanitizer/vector-track-origins-neon.ll b/llvm/test/Instrumentation/MemorySanitizer/vector-track-origins-neon.ll
index 0fe842e28ff92..98a9cdc696049 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/vector-track-origins-neon.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/vector-track-origins-neon.ll
@@ -1,17 +1,11 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool build-release/bin/opt --version 5
-; RUN: opt < %s -S -passes="msan<eager-checks;track-origins=2>" -msan-instrumentation-with-call-threshold=0 -disable-verify | FileCheck %s
-;
-; UNSUPPORTED: target={{.*}}
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool build/bin/opt --version 5
+; RUN: opt < %s -S -passes="msan<eager-checks;track-origins=2>" -msan-instrumentation-with-call-threshold=0 | FileCheck %s
 ;
 ; This test illustrates a bug in MemorySanitizer that will shortly be fixed
 ; (https://github.com/llvm/llvm-project/pull/96722).
 ;
 ; '-msan-instrumentation-with-call-threshold=0' makes it possible to detect the
 ; bug with a short test case.
-;
-; '-disable-verify' with a release build is needed to avoid a compiler crash
-; (e.g., to autogenerate the assertions).
-;
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
 target triple = "aarch64-grtev4-linux-gnu"
@@ -28,36 +22,31 @@ define dso_local void @_Z1cv() local_unnamed_addr #0 {
 ; CHECK-NEXT:    br label %[[FOR_COND:.*]]
 ; CHECK:       [[FOR_COND]]:
 ; CHECK-NEXT:    [[_MSPHI_S:%.*]] = phi <4 x i16> [ [[_MSLD]], %[[ENTRY]] ], [ [[_MSLD3:%.*]], %[[FOR_COND]] ]
-; CHECK-NEXT:    [[_MSPHI_O:%.*]] = phi i32 [ [[TMP0]], %[[ENTRY]] ], [ [[TMP15:%.*]], %[[FOR_COND]] ]
+; CHECK-NEXT:    [[_MSPHI_O:%.*]] = phi i32 [ [[TMP0]], %[[ENTRY]] ], [ [[TMP11:%.*]], %[[FOR_COND]] ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = phi <4 x i16> [ [[DOTPRE]], %[[ENTRY]] ], [ [[TMP5:%.*]], %[[FOR_COND]] ]
 ; CHECK-NEXT:    [[_MSPHI_S1:%.*]] = phi <4 x i16> [ <i16 -1, i16 -1, i16 -1, i16 -1>, %[[ENTRY]] ], [ [[_MSLD3]], %[[FOR_COND]] ]
-; CHECK-NEXT:    [[_MSPHI_O2:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[TMP15]], %[[FOR_COND]] ]
+; CHECK-NEXT:    [[_MSPHI_O2:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[TMP11]], %[[FOR_COND]] ]
 ; CHECK-NEXT:    [[E_0:%.*]] = phi <4 x i16> [ undef, %[[ENTRY]] ], [ [[TMP5]], %[[FOR_COND]] ]
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <4 x i16> [[_MSPHI_S1]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
 ; CHECK-NEXT:    [[LANE:%.*]] = shufflevector <4 x i16> [[E_0]], <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-;
-; Editor's note: the following zext instructions are invalid
-; ('zext source and destination must both be a vector or neither')
-;
-; CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i16> [[_MSPHI_S]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i16> [[_MSPHI_S]] to i64
 ; CHECK-NEXT:    call void @__msan_maybe_warning_8(i64 zeroext [[TMP2]], i32 zeroext [[_MSPHI_O]])
-; CHECK-NEXT:    [[TMP3:%.*]] = zext <4 x i16> [[_MSPROP]] to i64
-;
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i16> [[_MSPROP]] to i64
 ; CHECK-NEXT:    call void @__msan_maybe_warning_8(i64 zeroext [[TMP3]], i32 zeroext [[_MSPHI_O2]])
 ; CHECK-NEXT:    [[CALL:%.*]] = tail call noundef i32 @_Z1b11__Int16x4_tS_(<4 x i16> noundef [[TMP1]], <4 x i16> noundef [[LANE]])
 ; CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[CALL]] to i64
-; CHECK-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[CONV]] to ptr
-; CHECK-NEXT:    [[TMP5]] = load <4 x i16>, ptr [[TMP8]], align 8, !tbaa [[TBAA0]]
-; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[TMP8]] to i64
-; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
-; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
-; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[TMP11]], 35184372088832
-; CHECK-NEXT:    [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
-; CHECK-NEXT:    [[_MSLD3]] = load <4 x i16>, ptr [[TMP12]], align 8
-; CHECK-NEXT:    [[TMP15]] = load i32, ptr [[TMP14]], align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = inttoptr i64 [[CONV]] to ptr
+; CHECK-NEXT:    [[TMP5]] = load <4 x i16>, ptr [[TMP4]], align 8, !tbaa [[TBAA0]]
+; CHECK-NEXT:    [[TMP6:%.*]] = ptrtoint ptr [[TMP4]] to i64
+; CHECK-NEXT:    [[TMP7:%.*]] = xor i64 [[TMP6]], 193514046488576
+; CHECK-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
+; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[TMP7]], 35184372088832
+; CHECK-NEXT:    [[TMP10:%.*]] = inttoptr i64 [[TMP9]] to ptr
+; CHECK-NEXT:    [[_MSLD3]] = load <4 x i16>, ptr [[TMP8]], align 8
+; CHECK-NEXT:    [[TMP11]] = load i32, ptr [[TMP10]], align 8
 ; CHECK-NEXT:    store <4 x i16> [[_MSLD3]], ptr inttoptr (i64 xor (i64 ptrtoint (ptr @_Z1cv to i64), i64 193514046488576) to ptr), align 8
-; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <4 x i16> [[_MSLD3]] to i64
-; CHECK-NEXT:    call void @__msan_maybe_store_origin_8(i64 zeroext [[TMP16]], ptr @_Z1cv, i32 zeroext [[TMP15]])
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[_MSLD3]] to i64
+; CHECK-NEXT:    call void @__msan_maybe_store_origin_8(i64 zeroext [[TMP12]], ptr @_Z1cv, i32 zeroext [[TMP11]])
 ; CHECK-NEXT:    store <4 x i16> [[TMP5]], ptr @_Z1cv, align 8, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
 ;
diff --git a/llvm/test/Instrumentation/MemorySanitizer/vector-track-origins-struct.ll b/llvm/test/Instrumentation/MemorySanitizer/vector-track-origins-struct.ll
index 5eae441f05eae..c6d0f8fcebc03 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/vector-track-origins-struct.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/vector-track-origins-struct.ll
@@ -1,7 +1,5 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool build-release/bin/opt --version 5
-; RUN: opt < %s -S -passes="msan<eager-checks;track-origins=2>" -msan-instrumentation-with-call-threshold=0 -disable-verify | FileCheck %s
-;
-; UNSUPPORTED: target={{.*}}
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool build/bin/opt --version 5
+; RUN: opt < %s -S -passes="msan<eager-checks;track-origins=2>" -msan-instrumentation-with-call-threshold=0 | FileCheck %s
 ;
 ; This test illustrates a bug in MemorySanitizer that will shortly be fixed
 ; (https://github.com/llvm/llvm-project/pull/96722).
@@ -9,9 +7,6 @@
 ; '-msan-instrumentation-with-call-threshold=0' makes it possible to detect the
 ; bug with a short test case.
 ;
-; '-disable-verify' with a release build is needed to avoid a compiler crash
-; (e.g., to autogenerate the assertions).
-;
 ; This is based on check-struct.ll.
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -31,13 +26,13 @@ define { i32, i8 } @main() sanitize_memory {
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load { i32, i8 }, ptr [[TMP3]], align 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
 ; CHECK-NEXT:    store { i32, i8 } zeroinitializer, ptr @__msan_retval_tls, align 8
-;
-; Editor's note: the following zext instruction is invalid
-; ('ZExt only operates on integer')
-;
-; CHECK-NEXT:    [[TMP7:%.*]] = zext { i32, i8 } [[_MSLD]] to i64
-;
-; CHECK-NEXT:    call void @__msan_maybe_warning_8(i64 zeroext [[TMP7]], i32 zeroext [[TMP6]])
+; CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { i32, i8 } [[_MSLD]], 0
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
+; CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { i32, i8 } [[_MSLD]], 1
+; CHECK-NEXT:    [[TMP10:%.*]] = icmp ne i8 [[TMP9]], 0
+; CHECK-NEXT:    [[TMP11:%.*]] = or i1 [[TMP8]], [[TMP10]]
+; CHECK-NEXT:    [[TMP12:%.*]] = zext i1 [[TMP11]] to i64
+; CHECK-NEXT:    call void @__msan_maybe_warning_8(i64 zeroext [[TMP12]], i32 zeroext [[TMP6]])
 ; CHECK-NEXT:    ret { i32, i8 } [[O]]
 ;
   %p = inttoptr i64 0 to ptr