[llvm] [msan] Handle SSE/AVX pshuf intrinsic by applying to shadow (PR #153895)
Thurston Dang via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 15 15:47:37 PDT 2025
https://github.com/thurstond created https://github.com/llvm/llvm-project/pull/153895
llvm.x86.sse.pshuf.w(<1 x i64>, i8) and llvm.x86.avx512.pshuf.b.512(<64 x i8>, <64 x i8>) are currently handled strictly, which is suboptimal.
llvm.x86.ssse3.pshuf.b(<1 x i64>, <1 x i64>) llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) and llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>) are currently heuristically using maybeHandleSimpleNomemIntrinsic, which is incorrect.
Since the second argument is the shuffle order, we instrument these intrinsics using `handleIntrinsicByApplyingToShadow(..., /*trailingVerbatimArgs=*/1)`.
>From 3b55a0332cf8b1f460a9830b36447aaf1d54e465 Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston at google.com>
Date: Fri, 15 Aug 2025 22:43:21 +0000
Subject: [PATCH] [msan] Handle SSE/AVX pshuf intrinsic by applying to shadow
Currently, these are handled heuristically using
maybeHandleSimpleNomemIntrinsic, which is incorrect.
---
.../Instrumentation/MemorySanitizer.cpp | 20 +++++++++
.../X86/avx2-intrinsics-x86.ll | 5 ++-
.../X86/avx512bw-intrinsics-upgrade.ll | 11 ++---
.../X86/avx512bw-intrinsics.ll | 16 ++++----
.../MemorySanitizer/X86/mmx-intrinsics.ll | 41 ++++++++-----------
.../i386/avx2-intrinsics-i386.ll | 5 ++-
.../MemorySanitizer/i386/mmx-intrinsics.ll | 41 ++++++++-----------
7 files changed, 77 insertions(+), 62 deletions(-)
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 21bd4164385ab..8a18b33e1428c 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -5620,6 +5620,26 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
handleAVXVpermi2var(I);
break;
+ // Packed Shuffle
+ // llvm.x86.sse.pshuf.w(<1 x i64>, i8)
+ // llvm.x86.ssse3.pshuf.b(<1 x i64>, <1 x i64>)
+ // llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>)
+ // llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>)
+ // llvm.x86.avx512.pshuf.b.512(<64 x i8>, <64 x i8>)
+ //
+ // The following intrinsics are auto-upgraded:
+ // llvm.x86.sse2.pshuf.d(<4 x i32>, i8)
+ // llvm.x86.sse2.pshufh.w(<8 x i16>, i8)
+ // llvm.x86.sse2.pshufl.w(<8 x i16>, i8)
+ case Intrinsic::x86_avx2_pshuf_b:
+ case Intrinsic::x86_sse_pshuf_w:
+ case Intrinsic::x86_ssse3_pshuf_b_128:
+ case Intrinsic::x86_ssse3_pshuf_b:
+ case Intrinsic::x86_avx512_pshuf_b_512:
+ handleIntrinsicByApplyingToShadow(I, I.getIntrinsicID(),
+ /*trailingVerbatimArgs=*/1);
+ break;
+
case Intrinsic::x86_avx512_mask_cvtps2dq_512: {
handleAVX512VectorConvertFPToInt(I);
break;
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll
index f916130fe53e5..f52c01c98d549 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll
@@ -740,8 +740,9 @@ define <32 x i8> @test_x86_avx2_pshuf_b(<32 x i8> %a0, <32 x i8> %a1) #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i8> [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> [[A0:%.*]], <32 x i8> [[A1:%.*]])
+; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> [[TMP1]], <32 x i8> [[A1:%.*]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i8> [[TMP2]], [[TMP3]]
+; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> [[A0:%.*]], <32 x i8> [[A1]])
; CHECK-NEXT: store <32 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <32 x i8> [[RES]]
;
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics-upgrade.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics-upgrade.ll
index 02df9c49a010b..abbbb040edf1b 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics-upgrade.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics-upgrade.ll
@@ -22,7 +22,6 @@
; - llvm.x86.avx512.pavg.b.512, llvm.x86.avx512.pavg.w.512
; - llvm.x86.avx512.permvar.hi.512
; - llvm.x86.avx512.pmul.hr.sw.512, llvm.x86.avx512.pmulhu.w.512, llvm.x86.avx512.pmulh.w.512
-; - llvm.x86.avx512.pshuf.b.512
; - llvm.x86.avx512.psllv.w.512, llvm.x86.avx512.psrav.w.512, llvm.x86.avx512.psrlv.w.512
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
@@ -1968,8 +1967,9 @@ define <64 x i8> @test_int_x86_avx512_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1,
; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[TMP3:%.*]] = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[X0:%.*]], <64 x i8> [[X1:%.*]])
+; CHECK-NEXT: [[TMP4:%.*]] = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[TMP1]], <64 x i8> [[X1:%.*]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP2]], [[TMP4]]
+; CHECK-NEXT: [[TMP3:%.*]] = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[X0:%.*]], <64 x i8> [[X1]])
; CHECK-NEXT: store <64 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <64 x i8> [[TMP3]]
;
@@ -1984,8 +1984,9 @@ define <64 x i8> @test_int_x86_avx512_mask_pshuf_b_512(<64 x i8> %x0, <64 x i8>
; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
; CHECK-NEXT: [[TMP4:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[TMP5:%.*]] = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[X0:%.*]], <64 x i8> [[X1:%.*]])
+; CHECK-NEXT: [[TMP13:%.*]] = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[TMP1]], <64 x i8> [[X1:%.*]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP2]], [[TMP13]]
+; CHECK-NEXT: [[TMP5:%.*]] = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[X0:%.*]], <64 x i8> [[X1]])
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[TMP3]] to <64 x i1>
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i64 [[X3:%.*]] to <64 x i1>
; CHECK-NEXT: [[TMP8:%.*]] = select <64 x i1> [[TMP7]], <64 x i8> [[_MSPROP]], <64 x i8> [[TMP4]]
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics.ll
index 78c272c7b2c5a..00337da67af11 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics.ll
@@ -17,7 +17,6 @@
; - llvm.x86.avx512.pavg.b.512, llvm.x86.avx512.pavg.w.512
; - llvm.x86.avx512.permvar.hi.512
; - llvm.x86.avx512.pmul.hr.sw.512, llvm.x86.avx512.pmulhu.w.512, llvm.x86.avx512.pmulh.w.512
-; - llvm.x86.avx512.pshuf.b.512
; - llvm.x86.avx512.psllv.w.512
; - llvm.x86.avx512.psrav.w.512, llvm.x86.avx512.psrlv.w.512
@@ -1714,8 +1713,9 @@ define <64 x i8>@test_int_x86_avx512_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1) #
; CHECK-NEXT: [[TMP1:%.*]] = load <64 x i8>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[RES:%.*]] = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[X0:%.*]], <64 x i8> [[X1:%.*]])
+; CHECK-NEXT: [[TMP3:%.*]] = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[TMP1]], <64 x i8> [[X1:%.*]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP2]], [[TMP3]]
+; CHECK-NEXT: [[RES:%.*]] = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[X0:%.*]], <64 x i8> [[X1]])
; CHECK-NEXT: store <64 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <64 x i8> [[RES]]
;
@@ -1730,8 +1730,9 @@ define <64 x i8>@test_int_x86_avx512_pshuf_b_512_mask(<64 x i8> %x0, <64 x i8> %
; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
; CHECK-NEXT: [[TMP4:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[RES:%.*]] = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[X0:%.*]], <64 x i8> [[X1:%.*]])
+; CHECK-NEXT: [[TMP10:%.*]] = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[TMP1]], <64 x i8> [[X1:%.*]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP2]], [[TMP10]]
+; CHECK-NEXT: [[RES:%.*]] = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[X0:%.*]], <64 x i8> [[X1]])
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64 [[TMP3]] to <64 x i1>
; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1>
; CHECK-NEXT: [[TMP6:%.*]] = select <64 x i1> [[MASK_CAST]], <64 x i8> [[_MSPROP]], <64 x i8> [[TMP4]]
@@ -1755,8 +1756,9 @@ define <64 x i8>@test_int_x86_avx512_pshuf_b_512_maskz(<64 x i8> %x0, <64 x i8>
; CHECK-NEXT: [[TMP2:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[RES:%.*]] = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[X0:%.*]], <64 x i8> [[X1:%.*]])
+; CHECK-NEXT: [[TMP9:%.*]] = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[TMP1]], <64 x i8> [[X1:%.*]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <64 x i8> [[TMP2]], [[TMP9]]
+; CHECK-NEXT: [[RES:%.*]] = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[X0:%.*]], <64 x i8> [[X1]])
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP3]] to <64 x i1>
; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1>
; CHECK-NEXT: [[TMP5:%.*]] = select <64 x i1> [[MASK_CAST]], <64 x i8> [[_MSPROP]], <64 x i8> zeroinitializer
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll
index ac3bb56719038..ff8219a622c9f 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll
@@ -4,8 +4,6 @@
; Handled strictly:
; - i32 @llvm.x86.mmx.pmovmskb(<1 x i64> %mmx_var.i) #2
; - void @llvm.x86.mmx.maskmovq(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i, ptr %p) #2
-; - <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> %4, i8 3) #5
-; - <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> %4, i8 3) #5
; - <2 x double> @llvm.x86.sse.cvtpi2pd(<1 x i64> %4) #5
; - <1 x i64> @llvm.x86.sse.cvttpd2pi(<2 x double> %a) #5
; - <1 x i64> @llvm.x86.sse.cvtpd2pi(<2 x double> %a) #5
@@ -2778,19 +2776,17 @@ define i64 @test21(<1 x i64> %a) #0 {
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP8]] to <1 x i64>
; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP10]] to i64
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP9]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP12:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[TMP13:%.*]] = tail call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> [[TMP11]], i8 3) #[[ATTR5]]
+; CHECK-NEXT: [[TMP9:%.*]] = call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> [[TMP10]], i8 3)
+; CHECK-NEXT: [[TMP13:%.*]] = or <1 x i64> zeroinitializer, [[TMP9]]
+; CHECK-NEXT: [[TMP6:%.*]] = tail call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> [[TMP11]], i8 3) #[[ATTR5]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[TMP13]] to <4 x i16>
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <1 x i64> [[TMP6]] to <4 x i16>
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64>
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i16> [[TMP12]] to <1 x i64>
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
-; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret i64 [[TMP5]]
+; CHECK-NEXT: [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0
+; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret i64 [[TMP15]]
;
entry:
%0 = bitcast <1 x i64> %a to <4 x i16>
@@ -2812,19 +2808,17 @@ define i32 @test21_2(<1 x i64> %a) #0 {
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP8]] to <1 x i64>
; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP10]] to i64
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP9]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP12:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[TMP13:%.*]] = tail call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> [[TMP11]], i8 3) #[[ATTR5]]
+; CHECK-NEXT: [[TMP9:%.*]] = call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> [[TMP10]], i8 3)
+; CHECK-NEXT: [[TMP13:%.*]] = or <1 x i64> zeroinitializer, [[TMP9]]
+; CHECK-NEXT: [[TMP6:%.*]] = tail call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> [[TMP11]], i8 3) #[[ATTR5]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[TMP13]] to <4 x i16>
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <1 x i64> [[TMP6]] to <4 x i16>
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <2 x i32>
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i16> [[TMP12]] to <2 x i32>
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[TMP4]], i32 0
-; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret i32 [[TMP5]]
+; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i32> [[TMP14]], i32 0
+; CHECK-NEXT: store i32 [[TMP5]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret i32 [[TMP15]]
;
entry:
%0 = bitcast <1 x i64> %a to <4 x i16>
@@ -3235,7 +3229,8 @@ define i64 @test9(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP12]] to <1 x i64>
; CHECK-NEXT: [[TMP17:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP16]], [[TMP8]]
+; CHECK-NEXT: [[TMP20:%.*]] = call <1 x i64> @llvm.x86.ssse3.pshuf.b(<1 x i64> [[TMP16]], <1 x i64> [[TMP17]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP8]], [[TMP20]]
; CHECK-NEXT: [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.pshuf.b(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]]
; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <8 x i8>
; CHECK-NEXT: [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <8 x i8>
diff --git a/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-i386.ll b/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-i386.ll
index 5cc56baf0e0de..37211c06c4ca0 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-i386.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-i386.ll
@@ -780,8 +780,9 @@ define <32 x i8> @test_x86_avx2_pshuf_b(<32 x i8> %a0, <32 x i8> %a1) #0 {
; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i8> [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> [[A0:%.*]], <32 x i8> [[A1:%.*]])
+; CHECK-NEXT: [[TMP4:%.*]] = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> [[TMP1]], <32 x i8> [[A1:%.*]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i8> [[TMP2]], [[TMP4]]
+; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> [[A0:%.*]], <32 x i8> [[A1]])
; CHECK-NEXT: store <32 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <32 x i8> [[RES]]
;
diff --git a/llvm/test/Instrumentation/MemorySanitizer/i386/mmx-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/i386/mmx-intrinsics.ll
index 0a3efaaea149f..cca6e7d627bf2 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/i386/mmx-intrinsics.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/i386/mmx-intrinsics.ll
@@ -4,8 +4,6 @@
; Handled strictly:
; - i32 @llvm.x86.mmx.pmovmskb(<1 x i64> %mmx_var.i) #2
; - void @llvm.x86.mmx.maskmovq(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i, ptr %p) #2
-; - <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> %4, i8 3) #5
-; - <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> %4, i8 3) #5
; - <2 x double> @llvm.x86.sse.cvtpi2pd(<1 x i64> %4) #5
; - <1 x i64> @llvm.x86.sse.cvttpd2pi(<2 x double> %a) #5
; - <1 x i64> @llvm.x86.sse.cvtpd2pi(<2 x double> %a) #5
@@ -2849,19 +2847,17 @@ define i64 @test21(<1 x i64> %a) #0 {
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP8]] to <1 x i64>
; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP10]] to i64
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP9]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP12:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
-; CHECK: 7:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
-; CHECK-NEXT: unreachable
-; CHECK: 8:
-; CHECK-NEXT: [[TMP13:%.*]] = tail call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> [[TMP11]], i8 3) #[[ATTR5]]
+; CHECK-NEXT: [[TMP6:%.*]] = call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> [[TMP10]], i8 3)
+; CHECK-NEXT: [[TMP13:%.*]] = or <1 x i64> zeroinitializer, [[TMP6]]
+; CHECK-NEXT: [[TMP14:%.*]] = tail call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> [[TMP11]], i8 3) #[[ATTR5]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[TMP13]] to <4 x i16>
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP14]] to <4 x i16>
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64>
+; CHECK-NEXT: [[TMP15:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64>
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
-; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret i64 [[TMP5]]
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <1 x i64> [[TMP15]], i32 0
+; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret i64 [[TMP12]]
;
entry:
%0 = bitcast <1 x i64> %a to <4 x i16>
@@ -2884,19 +2880,17 @@ define i32 @test21_2(<1 x i64> %a) #0 {
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP8]] to <1 x i64>
; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP10]] to i64
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP9]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP12:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
-; CHECK: 7:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
-; CHECK-NEXT: unreachable
-; CHECK: 8:
-; CHECK-NEXT: [[TMP13:%.*]] = tail call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> [[TMP11]], i8 3) #[[ATTR5]]
+; CHECK-NEXT: [[TMP6:%.*]] = call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> [[TMP10]], i8 3)
+; CHECK-NEXT: [[TMP13:%.*]] = or <1 x i64> zeroinitializer, [[TMP6]]
+; CHECK-NEXT: [[TMP14:%.*]] = tail call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> [[TMP11]], i8 3) #[[ATTR5]]
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[TMP13]] to <4 x i16>
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP14]] to <4 x i16>
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <2 x i32>
+; CHECK-NEXT: [[TMP15:%.*]] = bitcast <4 x i16> [[TMP9]] to <2 x i32>
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[TMP4]], i32 0
-; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret i32 [[TMP5]]
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i32> [[TMP15]], i32 0
+; CHECK-NEXT: store i32 [[TMP5]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret i32 [[TMP12]]
;
entry:
%0 = bitcast <1 x i64> %a to <4 x i16>
@@ -3319,7 +3313,8 @@ define i64 @test9(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP12]] to <1 x i64>
; CHECK-NEXT: [[TMP17:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP16]], [[TMP8]]
+; CHECK-NEXT: [[TMP20:%.*]] = call <1 x i64> @llvm.x86.ssse3.pshuf.b(<1 x i64> [[TMP16]], <1 x i64> [[TMP17]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP8]], [[TMP20]]
; CHECK-NEXT: [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.pshuf.b(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]]
; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <8 x i8>
; CHECK-NEXT: [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <8 x i8>
More information about the llvm-commits
mailing list