[llvm] [msan] Handle Intrinsic::vector_reduce_f{add, mul} (PR #125615)
Thurston Dang via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 3 17:23:39 PST 2025
https://github.com/thurstond created https://github.com/llvm/llvm-project/pull/125615
This adds handleVectorReduceWithStarterIntrinsic() (similar to handleVectorReduceIntrinsic but for intrinsics with an additional starting parameter) and uses it to handle Intrinsic::vector_reduce_f{add,mul}.
Updates the tests from https://github.com/llvm/llvm-project/pull/125597
>From 3656c5103773e17b60f43641954e41baa20d3ed4 Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston at google.com>
Date: Tue, 4 Feb 2025 01:18:06 +0000
Subject: [PATCH 1/2] [msan] Handle Intrinsic::vector_reduce_f{add,mul}
This adds handleVectorReduceWithStarterIntrinsic() (similar to handleVectorReduceIntrinsic but for intrinsics with an additional starting parameter) and uses it to handle Intrinsic::vector_reduce_f{add,mul}.
Updates the tests from https://github.com/llvm/llvm-project/pull/125597
---
.../Instrumentation/MemorySanitizer.cpp | 17 ++
.../MemorySanitizer/vector-reduce-fadd.ll | 206 +++++-------------
.../MemorySanitizer/vector-reduce-fmul.ll | 195 ++++-------------
3 files changed, 116 insertions(+), 302 deletions(-)
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index a4f7e43f041c389..1e2c10a9a83160d 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -3500,6 +3500,18 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
setOrigin(&I, getOrigin(&I, 0));
}
+ // Similar to handleVectorReduceIntrinsic but with an initial starting value.
+ // e.g., call float @llvm.vector.reduce.fadd.f32.v2f32(float %a0, <2 x float> %a1)
+ // shadow = shadow[a0] | shadow[a1.0] | shadow[a1.1]
+ void handleVectorReduceWithStarterIntrinsic(IntrinsicInst &I) {
+ IRBuilder<> IRB(&I);
+ Value *Shadow0 = getShadow(&I, 0);
+ Value *Shadow1 = IRB.CreateOrReduce(getShadow(&I, 1));
+ Value *S = IRB.CreateOr(Shadow0, Shadow1);
+ setShadow(&I, S);
+ setOriginForNaryOp(I);
+ }
+
// Instrument vector.reduce.or intrinsic.
// Valid (non-poisoned) set bits in the operand pull low the
// corresponding shadow bits.
@@ -4344,6 +4356,11 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
case Intrinsic::vector_reduce_mul:
handleVectorReduceIntrinsic(I);
break;
+ case Intrinsic::vector_reduce_fadd:
+ case Intrinsic::vector_reduce_fmul:
+ handleVectorReduceWithStarterIntrinsic(I);
+ break;
+
case Intrinsic::x86_sse_stmxcsr:
handleStmxcsr(I);
break;
diff --git a/llvm/test/Instrumentation/MemorySanitizer/vector-reduce-fadd.ll b/llvm/test/Instrumentation/MemorySanitizer/vector-reduce-fadd.ll
index 306a262b1c9caa3..5da4c7357b6ad3c 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/vector-reduce-fadd.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/vector-reduce-fadd.ll
@@ -15,17 +15,10 @@ define float @test_v2f32(float %a0, <2 x float> %a1) #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1:![0-9]+]]
-; CHECK: 4:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4:[0-9]+]]
-; CHECK-NEXT: unreachable
-; CHECK: 5:
+; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[TMP2]])
+; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = call float @llvm.vector.reduce.fadd.v2f32(float [[A0]], <2 x float> [[A1]])
-; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i32 [[TMP4]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret float [[TMP6]]
;
%1 = call float @llvm.vector.reduce.fadd.f32.v2f32(float %a0, <2 x float> %a1)
@@ -38,17 +31,10 @@ define float @test_v4f32(float %a0, <4 x float> %a1) #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
-; CHECK: 4:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 5:
+; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP2]])
+; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = call float @llvm.vector.reduce.fadd.v4f32(float [[A0]], <4 x float> [[A1]])
-; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i32 [[TMP4]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret float [[TMP6]]
;
%1 = call float @llvm.vector.reduce.fadd.f32.v4f32(float %a0, <4 x float> %a1)
@@ -61,17 +47,10 @@ define float @test_v8f32(float %a0, <8 x float> %a1) #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
-; CHECK: 4:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 5:
+; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> [[TMP2]])
+; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = call float @llvm.vector.reduce.fadd.v8f32(float [[A0]], <8 x float> [[A1]])
-; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i32 [[TMP4]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret float [[TMP6]]
;
%1 = call float @llvm.vector.reduce.fadd.f32.v8f32(float %a0, <8 x float> %a1)
@@ -84,17 +63,10 @@ define float @test_v16f32(float %a0, <16 x float> %a1) #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
-; CHECK: 4:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 5:
+; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> [[TMP2]])
+; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float [[A0]], <16 x float> [[A1]])
-; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i32 [[TMP4]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret float [[TMP6]]
;
%1 = call float @llvm.vector.reduce.fadd.f32.v16f32(float %a0, <16 x float> %a1)
@@ -107,15 +79,10 @@ define float @test_v2f32_zero(<2 x float> %a0) #0 {
; CHECK-SAME: <2 x float> [[A0:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to i64
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
-; CHECK: 3:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 4:
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[TMP1]])
+; CHECK-NEXT: [[TMP3:%.*]] = or i32 0, [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.vector.reduce.fadd.v2f32(float -0.000000e+00, <2 x float> [[A0]])
-; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i32 [[TMP3]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret float [[TMP5]]
;
%1 = call float @llvm.vector.reduce.fadd.f32.v2f32(float -0.0, <2 x float> %a0)
@@ -127,15 +94,10 @@ define float @test_v4f32_zero(<4 x float> %a0) #0 {
; CHECK-SAME: <4 x float> [[A0:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
-; CHECK: 3:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 4:
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP1]])
+; CHECK-NEXT: [[TMP3:%.*]] = or i32 0, [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[A0]])
-; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i32 [[TMP3]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret float [[TMP5]]
;
%1 = call float @llvm.vector.reduce.fadd.f32.v4f32(float -0.0, <4 x float> %a0)
@@ -147,15 +109,10 @@ define float @test_v8f32_zero(<8 x float> %a0) #0 {
; CHECK-SAME: <8 x float> [[A0:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i32> [[TMP1]] to i256
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
-; CHECK: 3:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 4:
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> [[TMP1]])
+; CHECK-NEXT: [[TMP3:%.*]] = or i32 0, [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[A0]])
-; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i32 [[TMP3]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret float [[TMP5]]
;
%1 = call float @llvm.vector.reduce.fadd.f32.v8f32(float -0.0, <8 x float> %a0)
@@ -167,15 +124,10 @@ define float @test_v16f32_zero(<16 x float> %a0) #0 {
; CHECK-SAME: <16 x float> [[A0:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
-; CHECK: 3:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 4:
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> [[TMP1]])
+; CHECK-NEXT: [[TMP3:%.*]] = or i32 0, [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> [[A0]])
-; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i32 [[TMP3]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret float [[TMP5]]
;
%1 = call float @llvm.vector.reduce.fadd.f32.v16f32(float -0.0, <16 x float> %a0)
@@ -188,17 +140,10 @@ define double @test_v2f64(double %a0, <2 x double> %a1) #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
-; CHECK: 4:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 5:
+; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> [[TMP2]])
+; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = call double @llvm.vector.reduce.fadd.v2f64(double [[A0]], <2 x double> [[A1]])
-; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i64 [[TMP4]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret double [[TMP6]]
;
%1 = call double @llvm.vector.reduce.fadd.f64.v2f64(double %a0, <2 x double> %a1)
@@ -211,17 +156,10 @@ define double @test_v4f64(double %a0, <4 x double> %a1) #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[TMP2]] to i256
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
-; CHECK: 4:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 5:
+; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> [[TMP2]])
+; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = call double @llvm.vector.reduce.fadd.v4f64(double [[A0]], <4 x double> [[A1]])
-; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i64 [[TMP4]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret double [[TMP6]]
;
%1 = call double @llvm.vector.reduce.fadd.f64.v4f64(double %a0, <4 x double> %a1)
@@ -234,17 +172,10 @@ define double @test_v8f64(double %a0, <8 x double> %a1) #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
-; CHECK: 4:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 5:
+; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> [[TMP2]])
+; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = call double @llvm.vector.reduce.fadd.v8f64(double [[A0]], <8 x double> [[A1]])
-; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i64 [[TMP4]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret double [[TMP6]]
;
%1 = call double @llvm.vector.reduce.fadd.f64.v8f64(double %a0, <8 x double> %a1)
@@ -257,17 +188,10 @@ define double @test_v16f64(double %a0, <16 x double> %a1) #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i64> [[TMP2]] to i1024
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i1024 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
-; CHECK: 4:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 5:
+; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> [[TMP2]])
+; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = call double @llvm.vector.reduce.fadd.v16f64(double [[A0]], <16 x double> [[A1]])
-; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i64 [[TMP4]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret double [[TMP6]]
;
%1 = call double @llvm.vector.reduce.fadd.f64.v16f64(double %a0, <16 x double> %a1)
@@ -280,15 +204,10 @@ define double @test_v2f64_zero(<2 x double> %a0) #0 {
; CHECK-SAME: <2 x double> [[A0:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
-; CHECK: 3:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 4:
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> [[TMP1]])
+; CHECK-NEXT: [[TMP3:%.*]] = or i64 0, [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = call double @llvm.vector.reduce.fadd.v2f64(double -0.000000e+00, <2 x double> [[A0]])
-; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i64 [[TMP3]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret double [[TMP5]]
;
%1 = call double @llvm.vector.reduce.fadd.f64.v2f64(double -0.0, <2 x double> %a0)
@@ -300,15 +219,10 @@ define double @test_v4f64_zero(<4 x double> %a0) #0 {
; CHECK-SAME: <4 x double> [[A0:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP1]] to i256
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
-; CHECK: 3:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 4:
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> [[TMP1]])
+; CHECK-NEXT: [[TMP3:%.*]] = or i64 0, [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = call double @llvm.vector.reduce.fadd.v4f64(double -0.000000e+00, <4 x double> [[A0]])
-; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i64 [[TMP3]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret double [[TMP5]]
;
%1 = call double @llvm.vector.reduce.fadd.f64.v4f64(double -0.0, <4 x double> %a0)
@@ -320,15 +234,10 @@ define double @test_v8f64_zero(<8 x double> %a0) #0 {
; CHECK-SAME: <8 x double> [[A0:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
-; CHECK: 3:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 4:
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> [[TMP1]])
+; CHECK-NEXT: [[TMP3:%.*]] = or i64 0, [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = call double @llvm.vector.reduce.fadd.v8f64(double -0.000000e+00, <8 x double> [[A0]])
-; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i64 [[TMP3]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret double [[TMP5]]
;
%1 = call double @llvm.vector.reduce.fadd.f64.v8f64(double -0.0, <8 x double> %a0)
@@ -340,15 +249,10 @@ define double @test_v16f64_zero(<16 x double> %a0) #0 {
; CHECK-SAME: <16 x double> [[A0:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i64> [[TMP1]] to i1024
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i1024 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
-; CHECK: 3:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 4:
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> [[TMP1]])
+; CHECK-NEXT: [[TMP3:%.*]] = or i64 0, [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = call double @llvm.vector.reduce.fadd.v16f64(double -0.000000e+00, <16 x double> [[A0]])
-; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i64 [[TMP3]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret double [[TMP5]]
;
%1 = call double @llvm.vector.reduce.fadd.f64.v16f64(double -0.0, <16 x double> %a0)
@@ -363,15 +267,10 @@ define float @PR64627() #0 {
; CHECK-NEXT: [[TMP2:%.*]] = select <5 x i1> [[TMP1]], <5 x i32> zeroinitializer, <5 x i32> zeroinitializer
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <5 x i1> zeroinitializer, <5 x i32> splat (i32 1065353216), <5 x i32> [[TMP2]]
; CHECK-NEXT: [[TMP3:%.*]] = select <5 x i1> [[TMP1]], <5 x float> zeroinitializer, <5 x float> splat (float 1.000000e+00)
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <5 x i32> [[_MSPROP_SELECT]] to i160
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i160 [[TMP4]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
+; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.or.v5i32(<5 x i32> [[_MSPROP_SELECT]])
+; CHECK-NEXT: [[TMP5:%.*]] = or i32 0, [[TMP4]]
; CHECK-NEXT: [[TMP7:%.*]] = call float @llvm.vector.reduce.fadd.v5f32(float -0.000000e+00, <5 x float> [[TMP3]])
-; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i32 [[TMP5]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret float [[TMP7]]
;
%1 = bitcast i5 0 to <5 x i1>
@@ -392,6 +291,3 @@ declare double @llvm.vector.reduce.fadd.f64.v8f64(double, <8 x double>)
declare double @llvm.vector.reduce.fadd.f64.v16f64(double, <16 x double>)
attributes #0 = { sanitize_memory }
-;.
-; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575}
-;.
diff --git a/llvm/test/Instrumentation/MemorySanitizer/vector-reduce-fmul.ll b/llvm/test/Instrumentation/MemorySanitizer/vector-reduce-fmul.ll
index 4223bf49b2adc81..0c1c4edc4367f13 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/vector-reduce-fmul.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/vector-reduce-fmul.ll
@@ -15,17 +15,10 @@ define float @test_v2f32(float %a0, <2 x float> %a1) #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1:![0-9]+]]
-; CHECK: 4:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4:[0-9]+]]
-; CHECK-NEXT: unreachable
-; CHECK: 5:
+; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[TMP2]])
+; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = call float @llvm.vector.reduce.fmul.v2f32(float [[A0]], <2 x float> [[A1]])
-; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i32 [[TMP4]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret float [[TMP6]]
;
%1 = call float @llvm.vector.reduce.fmul.f32.v2f32(float %a0, <2 x float> %a1)
@@ -38,17 +31,10 @@ define float @test_v4f32(float %a0, <4 x float> %a1) #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
-; CHECK: 4:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 5:
+; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP2]])
+; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = call float @llvm.vector.reduce.fmul.v4f32(float [[A0]], <4 x float> [[A1]])
-; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i32 [[TMP4]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret float [[TMP6]]
;
%1 = call float @llvm.vector.reduce.fmul.f32.v4f32(float %a0, <4 x float> %a1)
@@ -61,17 +47,10 @@ define float @test_v8f32(float %a0, <8 x float> %a1) #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
-; CHECK: 4:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 5:
+; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> [[TMP2]])
+; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = call float @llvm.vector.reduce.fmul.v8f32(float [[A0]], <8 x float> [[A1]])
-; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i32 [[TMP4]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret float [[TMP6]]
;
%1 = call float @llvm.vector.reduce.fmul.f32.v8f32(float %a0, <8 x float> %a1)
@@ -84,17 +63,10 @@ define float @test_v16f32(float %a0, <16 x float> %a1) #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
-; CHECK: 4:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 5:
+; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> [[TMP2]])
+; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = call float @llvm.vector.reduce.fmul.v16f32(float [[A0]], <16 x float> [[A1]])
-; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i32 [[TMP4]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret float [[TMP6]]
;
%1 = call float @llvm.vector.reduce.fmul.f32.v16f32(float %a0, <16 x float> %a1)
@@ -107,15 +79,10 @@ define float @test_v2f32_one(<2 x float> %a0) #0 {
; CHECK-SAME: <2 x float> [[A0:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to i64
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
-; CHECK: 3:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 4:
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[TMP1]])
+; CHECK-NEXT: [[TMP3:%.*]] = or i32 0, [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.vector.reduce.fmul.v2f32(float 1.000000e+00, <2 x float> [[A0]])
-; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i32 [[TMP3]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret float [[TMP5]]
;
%1 = call float @llvm.vector.reduce.fmul.f32.v2f32(float 1.0, <2 x float> %a0)
@@ -127,15 +94,10 @@ define float @test_v4f32_one(<4 x float> %a0) #0 {
; CHECK-SAME: <4 x float> [[A0:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
-; CHECK: 3:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 4:
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP1]])
+; CHECK-NEXT: [[TMP3:%.*]] = or i32 0, [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[A0]])
-; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i32 [[TMP3]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret float [[TMP5]]
;
%1 = call float @llvm.vector.reduce.fmul.f32.v4f32(float 1.0, <4 x float> %a0)
@@ -147,15 +109,10 @@ define float @test_v8f32_one(<8 x float> %a0) #0 {
; CHECK-SAME: <8 x float> [[A0:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i32> [[TMP1]] to i256
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
-; CHECK: 3:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 4:
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> [[TMP1]])
+; CHECK-NEXT: [[TMP3:%.*]] = or i32 0, [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.vector.reduce.fmul.v8f32(float 1.000000e+00, <8 x float> [[A0]])
-; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i32 [[TMP3]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret float [[TMP5]]
;
%1 = call float @llvm.vector.reduce.fmul.f32.v8f32(float 1.0, <8 x float> %a0)
@@ -167,15 +124,10 @@ define float @test_v16f32_one(<16 x float> %a0) #0 {
; CHECK-SAME: <16 x float> [[A0:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
-; CHECK: 3:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 4:
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> [[TMP1]])
+; CHECK-NEXT: [[TMP3:%.*]] = or i32 0, [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.vector.reduce.fmul.v16f32(float 1.000000e+00, <16 x float> [[A0]])
-; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i32 [[TMP3]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret float [[TMP5]]
;
%1 = call float @llvm.vector.reduce.fmul.f32.v16f32(float 1.0, <16 x float> %a0)
@@ -188,17 +140,10 @@ define double @test_v2f64(double %a0, <2 x double> %a1) #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
-; CHECK: 4:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 5:
+; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> [[TMP2]])
+; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = call double @llvm.vector.reduce.fmul.v2f64(double [[A0]], <2 x double> [[A1]])
-; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i64 [[TMP4]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret double [[TMP6]]
;
%1 = call double @llvm.vector.reduce.fmul.f64.v2f64(double %a0, <2 x double> %a1)
@@ -211,17 +156,10 @@ define double @test_v4f64(double %a0, <4 x double> %a1) #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[TMP2]] to i256
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
-; CHECK: 4:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 5:
+; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> [[TMP2]])
+; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = call double @llvm.vector.reduce.fmul.v4f64(double [[A0]], <4 x double> [[A1]])
-; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i64 [[TMP4]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret double [[TMP6]]
;
%1 = call double @llvm.vector.reduce.fmul.f64.v4f64(double %a0, <4 x double> %a1)
@@ -234,17 +172,10 @@ define double @test_v8f64(double %a0, <8 x double> %a1) #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
-; CHECK: 4:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 5:
+; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> [[TMP2]])
+; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = call double @llvm.vector.reduce.fmul.v8f64(double [[A0]], <8 x double> [[A1]])
-; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i64 [[TMP4]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret double [[TMP6]]
;
%1 = call double @llvm.vector.reduce.fmul.f64.v8f64(double %a0, <8 x double> %a1)
@@ -257,17 +188,10 @@ define double @test_v16f64(double %a0, <16 x double> %a1) #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i64> [[TMP2]] to i1024
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i1024 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
-; CHECK: 4:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 5:
+; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> [[TMP2]])
+; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = call double @llvm.vector.reduce.fmul.v16f64(double [[A0]], <16 x double> [[A1]])
-; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i64 [[TMP4]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret double [[TMP6]]
;
%1 = call double @llvm.vector.reduce.fmul.f64.v16f64(double %a0, <16 x double> %a1)
@@ -280,15 +204,10 @@ define double @test_v2f64_one(<2 x double> %a0) #0 {
; CHECK-SAME: <2 x double> [[A0:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
-; CHECK: 3:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 4:
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> [[TMP1]])
+; CHECK-NEXT: [[TMP3:%.*]] = or i64 0, [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = call double @llvm.vector.reduce.fmul.v2f64(double 1.000000e+00, <2 x double> [[A0]])
-; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i64 [[TMP3]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret double [[TMP5]]
;
%1 = call double @llvm.vector.reduce.fmul.f64.v2f64(double 1.0, <2 x double> %a0)
@@ -300,15 +219,10 @@ define double @test_v4f64_one(<4 x double> %a0) #0 {
; CHECK-SAME: <4 x double> [[A0:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP1]] to i256
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
-; CHECK: 3:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 4:
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> [[TMP1]])
+; CHECK-NEXT: [[TMP3:%.*]] = or i64 0, [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = call double @llvm.vector.reduce.fmul.v4f64(double 1.000000e+00, <4 x double> [[A0]])
-; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i64 [[TMP3]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret double [[TMP5]]
;
%1 = call double @llvm.vector.reduce.fmul.f64.v4f64(double 1.0, <4 x double> %a0)
@@ -320,15 +234,10 @@ define double @test_v8f64_one(<8 x double> %a0) #0 {
; CHECK-SAME: <8 x double> [[A0:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
-; CHECK: 3:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 4:
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> [[TMP1]])
+; CHECK-NEXT: [[TMP3:%.*]] = or i64 0, [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = call double @llvm.vector.reduce.fmul.v8f64(double 1.000000e+00, <8 x double> [[A0]])
-; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i64 [[TMP3]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret double [[TMP5]]
;
%1 = call double @llvm.vector.reduce.fmul.f64.v8f64(double 1.0, <8 x double> %a0)
@@ -340,15 +249,10 @@ define double @test_v16f64_one(<16 x double> %a0) #0 {
; CHECK-SAME: <16 x double> [[A0:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i64> [[TMP1]] to i1024
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i1024 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
-; CHECK: 3:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 4:
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> [[TMP1]])
+; CHECK-NEXT: [[TMP3:%.*]] = or i64 0, [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = call double @llvm.vector.reduce.fmul.v16f64(double 1.000000e+00, <16 x double> [[A0]])
-; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i64 [[TMP3]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret double [[TMP5]]
;
%1 = call double @llvm.vector.reduce.fmul.f64.v16f64(double 1.0, <16 x double> %a0)
@@ -366,6 +270,3 @@ declare double @llvm.vector.reduce.fmul.f64.v8f64(double, <8 x double>)
declare double @llvm.vector.reduce.fmul.f64.v16f64(double, <16 x double>)
attributes #0 = { sanitize_memory }
-;.
-; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575}
-;.
>From 0fffdb14cfd82fa28a9259141eb46abf8f5adcac Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston at google.com>
Date: Tue, 4 Feb 2025 01:23:02 +0000
Subject: [PATCH 2/2] clang-format
---
llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 1e2c10a9a83160d..8a56f42c5c4ca16 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -3501,7 +3501,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
}
// Similar to handleVectorReduceIntrinsic but with an initial starting value.
- // e.g., call float @llvm.vector.reduce.fadd.f32.v2f32(float %a0, <2 x float> %a1)
+ // e.g., call float @llvm.vector.reduce.fadd.f32.v2f32(float %a0, <2 x float>
+ // %a1)
// shadow = shadow[a0] | shadow[a1.0] | shadow[a1.1]
void handleVectorReduceWithStarterIntrinsic(IntrinsicInst &I) {
IRBuilder<> IRB(&I);
More information about the llvm-commits
mailing list