[llvm] [msan] Use horizontal sub to compute shadow for horizontal add (PR #124835)
Thurston Dang via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 28 12:59:44 PST 2025
https://github.com/thurstond created https://github.com/llvm/llvm-project/pull/124835
This improves the horizontal sub handling (from https://github.com/llvm/llvm-project/pull/124159) by using horizontal add for the shadow, as recommended by Vitaly.
Fixes https://github.com/llvm/llvm-project/issues/124662
>From 5dcb768e9b983db2384592cfb4f8b74361cae7d8 Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston at google.com>
Date: Tue, 28 Jan 2025 20:56:47 +0000
Subject: [PATCH] [msan] Use horizontal sub to compute shadow for horizontal
add
This improves the horizontal sub handling (from https://github.com/llvm/llvm-project/pull/124159) by using horizontal add for the shadow, as recommended by Vitaly.
Fixes https://github.com/llvm/llvm-project/issues/124662
---
.../Instrumentation/MemorySanitizer.cpp | 81 ++++++++++++++++---
.../MemorySanitizer/X86/avx-intrinsics-x86.ll | 4 +-
.../X86/avx2-intrinsics-x86.ll | 6 +-
.../MemorySanitizer/X86/mmx-intrinsics.ll | 6 +-
.../i386/avx-intrinsics-i386.ll | 4 +-
.../i386/avx2-intrinsics-i386.ll | 6 +-
.../MemorySanitizer/i386/mmx-intrinsics.ll | 6 +-
7 files changed, 88 insertions(+), 25 deletions(-)
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index fadebc3ae42660..8f014ce195adbd 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -4037,20 +4037,83 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
void handleAVXHorizontalAddSubIntrinsic(IntrinsicInst &I) {
// Approximation only:
- // output = horizontal_add(A, B)
+ // output = horizontal_add/sub(A, B)
// => shadow[output] = horizontal_add(shadow[A], shadow[B])
//
- // - If we add/subtract two adjacent zero (initialized) shadow values, the
+ // We always use horizontal add instead of subtract, because subtracting
+ // a fully uninitialized shadow would result in a fully initialized shadow.
+ //
+ // - If we add two adjacent zero (initialized) shadow values, the
// result always be zero i.e., no false positives.
- // - If we add/subtract two shadows, one of which is uninitialized, the
- // result will always be non-zero i.e., no false negative.
- // - However, we can have false negatives if we subtract two non-zero
- // shadows of the same value (or do an addition that wraps to zero); we
- // consider this an acceptable tradeoff for performance.
+ // - If we add two shadows, one of which is uninitialized, the
+ // result will always be non-zero i.e., no false negatives.
+ // - However, we can have false negatives if we do an addition that wraps
+ // to zero; we consider this an acceptable tradeoff for performance.
+ //
// To make shadow propagation precise, we want the equivalent of
- // "horizontal OR", but this is not available.
+ // "horizontal OR", but this is not available for SSE3/SSSE3/AVX/AVX2.
+
+ Intrinsic::ID shadowIntrinsicID = I.getIntrinsicID();
+
+ switch(I.getIntrinsicID()) {
+ case Intrinsic::x86_sse3_hsub_ps:
+ shadowIntrinsicID = Intrinsic::x86_sse3_hadd_ps;
+ break;
+
+ case Intrinsic::x86_sse3_hsub_pd:
+ shadowIntrinsicID = Intrinsic::x86_sse3_hadd_pd;
+ break;
+
+ case Intrinsic::x86_ssse3_phsub_d:
+ shadowIntrinsicID = Intrinsic::x86_ssse3_phadd_d;
+ break;
+
+ case Intrinsic::x86_ssse3_phsub_d_128:
+ shadowIntrinsicID = Intrinsic::x86_ssse3_phadd_d_128;
+ break;
+
+ case Intrinsic::x86_ssse3_phsub_w:
+ shadowIntrinsicID = Intrinsic::x86_ssse3_phadd_w;
+ break;
+
+ case Intrinsic::x86_ssse3_phsub_w_128:
+ shadowIntrinsicID = Intrinsic::x86_ssse3_phadd_w_128;
+ break;
+
+ case Intrinsic::x86_ssse3_phsub_sw:
+ shadowIntrinsicID = Intrinsic::x86_ssse3_phadd_sw;
+ break;
+
+ case Intrinsic::x86_ssse3_phsub_sw_128:
+ shadowIntrinsicID = Intrinsic::x86_ssse3_phadd_sw_128;
+ break;
+
+ case Intrinsic::x86_avx_hsub_pd_256:
+ shadowIntrinsicID = Intrinsic::x86_avx_hadd_pd_256;
+ break;
+
+ case Intrinsic::x86_avx_hsub_ps_256:
+ shadowIntrinsicID = Intrinsic::x86_avx_hadd_ps_256;
+ break;
+
+ case Intrinsic::x86_avx2_phsub_d:
+ shadowIntrinsicID = Intrinsic::x86_avx2_phadd_d;
+ break;
+
+ case Intrinsic::x86_avx2_phsub_w:
+ shadowIntrinsicID = Intrinsic::x86_avx2_phadd_w;
+ break;
+
+ case Intrinsic::x86_avx2_phsub_sw:
+ shadowIntrinsicID = Intrinsic::x86_avx2_phadd_sw;
+ break;
+
+ default:
+ break;
+ }
+
return handleIntrinsicByApplyingToShadow(
- I, /*shadowIntrinsicID=*/I.getIntrinsicID(),
+ I, shadowIntrinsicID,
/*trailingVerbatimArgs*/ 0);
}
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll
index 43f51a810d0d2b..26e9c39696f707 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll
@@ -475,7 +475,7 @@ define <4 x double> @test_x86_avx_hsub_pd_256(<4 x double> %a0, <4 x double> %a1
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[A0:%.*]] = bitcast <4 x i64> [[TMP1]] to <4 x double>
; CHECK-NEXT: [[A1:%.*]] = bitcast <4 x i64> [[TMP2]] to <4 x double>
-; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> [[A0]], <4 x double> [[A1]])
+; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> [[A0]], <4 x double> [[A1]])
; CHECK-NEXT: [[_MSPROP:%.*]] = bitcast <4 x double> [[RES]] to <4 x i64>
; CHECK-NEXT: [[RES1:%.*]] = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> [[A2:%.*]], <4 x double> [[A3:%.*]])
; CHECK-NEXT: store <4 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
@@ -494,7 +494,7 @@ define <8 x float> @test_x86_avx_hsub_ps_256(<8 x float> %a0, <8 x float> %a1) #
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[A0:%.*]] = bitcast <8 x i32> [[TMP1]] to <8 x float>
; CHECK-NEXT: [[A1:%.*]] = bitcast <8 x i32> [[TMP2]] to <8 x float>
-; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> [[A0]], <8 x float> [[A1]])
+; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> [[A0]], <8 x float> [[A1]])
; CHECK-NEXT: [[_MSPROP:%.*]] = bitcast <8 x float> [[RES]] to <8 x i32>
; CHECK-NEXT: [[RES1:%.*]] = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> [[A2:%.*]], <8 x float> [[A3:%.*]])
; CHECK-NEXT: store <8 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll
index c68461dd367ee9..5597a9c96611f0 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll
@@ -617,7 +617,7 @@ define <8 x i32> @test_x86_avx2_phsub_d(<8 x i32> %a0, <8 x i32> %a1) #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSPROP:%.*]] = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]])
; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> [[A0:%.*]], <8 x i32> [[A1:%.*]])
; CHECK-NEXT: store <8 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <8 x i32> [[RES]]
@@ -633,7 +633,7 @@ define <16 x i16> @test_x86_avx2_phsub_sw(<16 x i16> %a0, <16 x i16> %a1) #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSPROP:%.*]] = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]])
; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]])
; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <16 x i16> [[RES]]
@@ -649,7 +649,7 @@ define <16 x i16> @test_x86_avx2_phsub_w(<16 x i16> %a0, <16 x i16> %a1) #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSPROP:%.*]] = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]])
; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]])
; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <16 x i16> [[RES]]
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll
index 61c90d0fb80d43..2b6c6ff2e2b929 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll
@@ -3339,7 +3339,7 @@ define i64 @test6(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP12]] to <1 x i64>
; CHECK-NEXT: [[TMP17:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
-; CHECK-NEXT: [[_MSPROP:%.*]] = call <1 x i64> @llvm.x86.ssse3.phsub.sw(<1 x i64> [[TMP16]], <1 x i64> [[TMP8]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = call <1 x i64> @llvm.x86.ssse3.phadd.sw(<1 x i64> [[TMP16]], <1 x i64> [[TMP8]])
; CHECK-NEXT: [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.phsub.sw(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]]
; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16>
; CHECK-NEXT: [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <4 x i16>
@@ -3379,7 +3379,7 @@ define i64 @test5(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64>
; CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP12]] to <1 x i64>
; CHECK-NEXT: [[TMP17:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64>
-; CHECK-NEXT: [[_MSPROP:%.*]] = call <1 x i64> @llvm.x86.ssse3.phsub.d(<1 x i64> [[TMP16]], <1 x i64> [[TMP8]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = call <1 x i64> @llvm.x86.ssse3.phadd.d(<1 x i64> [[TMP16]], <1 x i64> [[TMP8]])
; CHECK-NEXT: [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.phsub.d(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]]
; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <2 x i32>
; CHECK-NEXT: [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <2 x i32>
@@ -3419,7 +3419,7 @@ define i64 @test4(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP12]] to <1 x i64>
; CHECK-NEXT: [[TMP17:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
-; CHECK-NEXT: [[_MSPROP:%.*]] = call <1 x i64> @llvm.x86.ssse3.phsub.w(<1 x i64> [[TMP16]], <1 x i64> [[TMP8]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = call <1 x i64> @llvm.x86.ssse3.phadd.w(<1 x i64> [[TMP16]], <1 x i64> [[TMP8]])
; CHECK-NEXT: [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.phsub.w(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]]
; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16>
; CHECK-NEXT: [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <4 x i16>
diff --git a/llvm/test/Instrumentation/MemorySanitizer/i386/avx-intrinsics-i386.ll b/llvm/test/Instrumentation/MemorySanitizer/i386/avx-intrinsics-i386.ll
index a22ca6dd15da4d..08dd27ffcaaf12 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/i386/avx-intrinsics-i386.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/i386/avx-intrinsics-i386.ll
@@ -491,7 +491,7 @@ define <4 x double> @test_x86_avx_hsub_pd_256(<4 x double> %a0, <4 x double> %a1
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[A0:%.*]] = bitcast <4 x i64> [[TMP1]] to <4 x double>
; CHECK-NEXT: [[A1:%.*]] = bitcast <4 x i64> [[TMP2]] to <4 x double>
-; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> [[A0]], <4 x double> [[A1]])
+; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> [[A0]], <4 x double> [[A1]])
; CHECK-NEXT: [[_MSPROP:%.*]] = bitcast <4 x double> [[RES]] to <4 x i64>
; CHECK-NEXT: [[RES1:%.*]] = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> [[A2:%.*]], <4 x double> [[A3:%.*]])
; CHECK-NEXT: store <4 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
@@ -511,7 +511,7 @@ define <8 x float> @test_x86_avx_hsub_ps_256(<8 x float> %a0, <8 x float> %a1) #
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[A0:%.*]] = bitcast <8 x i32> [[TMP1]] to <8 x float>
; CHECK-NEXT: [[A1:%.*]] = bitcast <8 x i32> [[TMP2]] to <8 x float>
-; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> [[A0]], <8 x float> [[A1]])
+; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> [[A0]], <8 x float> [[A1]])
; CHECK-NEXT: [[_MSPROP:%.*]] = bitcast <8 x float> [[RES]] to <8 x i32>
; CHECK-NEXT: [[RES1:%.*]] = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> [[A2:%.*]], <8 x float> [[A3:%.*]])
; CHECK-NEXT: store <8 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
diff --git a/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-i386.ll b/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-i386.ll
index 442f0c422645af..109166975eee13 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-i386.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-i386.ll
@@ -651,7 +651,7 @@ define <8 x i32> @test_x86_avx2_phsub_d(<8 x i32> %a0, <8 x i32> %a1) #0 {
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSPROP:%.*]] = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]])
; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> [[A0:%.*]], <8 x i32> [[A1:%.*]])
; CHECK-NEXT: store <8 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <8 x i32> [[RES]]
@@ -668,7 +668,7 @@ define <16 x i16> @test_x86_avx2_phsub_sw(<16 x i16> %a0, <16 x i16> %a1) #0 {
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSPROP:%.*]] = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]])
; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]])
; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <16 x i16> [[RES]]
@@ -685,7 +685,7 @@ define <16 x i16> @test_x86_avx2_phsub_w(<16 x i16> %a0, <16 x i16> %a1) #0 {
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSPROP:%.*]] = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]])
; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]])
; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <16 x i16> [[RES]]
diff --git a/llvm/test/Instrumentation/MemorySanitizer/i386/mmx-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/i386/mmx-intrinsics.ll
index f5ef8a9837b70b..15bd1755d479bf 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/i386/mmx-intrinsics.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/i386/mmx-intrinsics.ll
@@ -3426,7 +3426,7 @@ define i64 @test6(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP12]] to <1 x i64>
; CHECK-NEXT: [[TMP17:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
-; CHECK-NEXT: [[_MSPROP:%.*]] = call <1 x i64> @llvm.x86.ssse3.phsub.sw(<1 x i64> [[TMP16]], <1 x i64> [[TMP8]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = call <1 x i64> @llvm.x86.ssse3.phadd.sw(<1 x i64> [[TMP16]], <1 x i64> [[TMP8]])
; CHECK-NEXT: [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.phsub.sw(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]]
; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16>
; CHECK-NEXT: [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <4 x i16>
@@ -3467,7 +3467,7 @@ define i64 @test5(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64>
; CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP12]] to <1 x i64>
; CHECK-NEXT: [[TMP17:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64>
-; CHECK-NEXT: [[_MSPROP:%.*]] = call <1 x i64> @llvm.x86.ssse3.phsub.d(<1 x i64> [[TMP16]], <1 x i64> [[TMP8]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = call <1 x i64> @llvm.x86.ssse3.phadd.d(<1 x i64> [[TMP16]], <1 x i64> [[TMP8]])
; CHECK-NEXT: [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.phsub.d(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]]
; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <2 x i32>
; CHECK-NEXT: [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <2 x i32>
@@ -3508,7 +3508,7 @@ define i64 @test4(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP12]] to <1 x i64>
; CHECK-NEXT: [[TMP17:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
-; CHECK-NEXT: [[_MSPROP:%.*]] = call <1 x i64> @llvm.x86.ssse3.phsub.w(<1 x i64> [[TMP16]], <1 x i64> [[TMP8]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = call <1 x i64> @llvm.x86.ssse3.phadd.w(<1 x i64> [[TMP16]], <1 x i64> [[TMP8]])
; CHECK-NEXT: [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.phsub.w(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]]
; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16>
; CHECK-NEXT: [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <4 x i16>
More information about the llvm-commits
mailing list