[llvm] [msan] Apply handleVectorReduceIntrinsic to max/min vector instructions (PR #129819)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 4 20:34:09 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Thurston Dang (thurstond)
<details>
<summary>Changes</summary>
Changes the handling of:
- llvm.aarch64.neon.smaxv
- llvm.aarch64.neon.sminv
- llvm.aarch64.neon.umaxv
- llvm.aarch64.neon.uminv
- llvm.vector.reduce.smax
- llvm.vector.reduce.smin
- llvm.vector.reduce.umax
- llvm.vector.reduce.umin
- llvm.vector.reduce.fmax
- llvm.vector.reduce.fmin
from the default strict handling (visitInstruction) to handleVectorReduceIntrinsic.
Also adds a parameter to handleVectorReduceIntrinsic to specify whether the return type must match the elements of the vector.
Updates the tests from https://github.com/llvm/llvm-project/pull/129741, https://github.com/llvm/llvm-project/pull/129810, https://github.com/llvm/llvm-project/pull/129768
---
Patch is 84.46 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/129819.diff
6 Files Affected:
- (modified) llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp (+46-18)
- (modified) llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-smaxv.ll (+40-99)
- (modified) llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-sminv.ll (+40-99)
- (modified) llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-umaxv.ll (+88-128)
- (modified) llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-uminv.ll (+88-128)
- (modified) llvm/test/Instrumentation/MemorySanitizer/expand-experimental-reductions.ll (+12-59)
``````````diff
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 52e42932fc751..fdf20218635f0 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -3653,14 +3653,18 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// Instrument generic vector reduction intrinsics
// by ORing together all their fields.
//
- // The return type does not need to be the same type as the fields
+ // If AllowShadowCast is true, the return type does not need to be the same
+ // type as the fields
// e.g., declare i32 @llvm.aarch64.neon.uaddv.i32.v16i8(<16 x i8>)
- void handleVectorReduceIntrinsic(IntrinsicInst &I) {
+ void handleVectorReduceIntrinsic(IntrinsicInst &I, bool AllowShadowCast) {
assert(I.arg_size() == 1);
IRBuilder<> IRB(&I);
Value *S = IRB.CreateOrReduce(getShadow(&I, 0));
- S = CreateShadowCast(IRB, S, getShadowTy(&I));
+ if (AllowShadowCast)
+ S = CreateShadowCast(IRB, S, getShadowTy(&I));
+ else
+ assert(S->getType() == getShadowTy(&I));
setShadow(&I, S);
setOriginForNaryOp(I);
}
@@ -3669,13 +3673,18 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// e.g., call float @llvm.vector.reduce.fadd.f32.v2f32(float %a0, <2 x float>
// %a1)
// shadow = shadow[a0] | shadow[a1.0] | shadow[a1.1]
+ //
+ // The type of the return value, initial starting value, and elements of the
+ // vector must be identical.
void handleVectorReduceWithStarterIntrinsic(IntrinsicInst &I) {
assert(I.arg_size() == 2);
IRBuilder<> IRB(&I);
Value *Shadow0 = getShadow(&I, 0);
Value *Shadow1 = IRB.CreateOrReduce(getShadow(&I, 1));
+ assert(Shadow0->getType() == Shadow1->getType());
Value *S = IRB.CreateOr(Shadow0, Shadow1);
+ assert(S->getType() == getShadowTy(&I));
setShadow(&I, S);
setOriginForNaryOp(I);
}
@@ -4458,21 +4467,17 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
case Intrinsic::vector_reduce_add:
case Intrinsic::vector_reduce_xor:
case Intrinsic::vector_reduce_mul:
- // Add reduction to scalar
- case Intrinsic::aarch64_neon_faddv:
- case Intrinsic::aarch64_neon_saddv:
- case Intrinsic::aarch64_neon_uaddv:
- // Floating-point min/max (vector)
- // The f{min,max}"nm"v variants handle NaN differently than f{min,max}v,
- // but our shadow propagation is the same.
- case Intrinsic::aarch64_neon_fmaxv:
- case Intrinsic::aarch64_neon_fminv:
- case Intrinsic::aarch64_neon_fmaxnmv:
- case Intrinsic::aarch64_neon_fminnmv:
- // Sum long across vector
- case Intrinsic::aarch64_neon_saddlv:
- case Intrinsic::aarch64_neon_uaddlv:
- handleVectorReduceIntrinsic(I);
+ // Signed/Unsigned Min/Max
+ // TODO: handling similarly to AND/OR may be more precise.
+ case Intrinsic::vector_reduce_smax:
+ case Intrinsic::vector_reduce_smin:
+ case Intrinsic::vector_reduce_umax:
+ case Intrinsic::vector_reduce_umin:
+ // TODO: this has no false positives, but arguably we should check that all
+ // the bits are initialized.
+ case Intrinsic::vector_reduce_fmax:
+ case Intrinsic::vector_reduce_fmin:
+ handleVectorReduceIntrinsic(I, /*AllowShadowCast=*/false);
break;
case Intrinsic::vector_reduce_fadd:
@@ -4903,6 +4908,29 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
break;
}
+ // Add reduction to scalar
+ case Intrinsic::aarch64_neon_faddv:
+ case Intrinsic::aarch64_neon_saddv:
+ case Intrinsic::aarch64_neon_uaddv:
+ // Signed/Unsigned min/max (Vector)
+ // TODO: handling similarly to AND/OR may be more precise.
+ case Intrinsic::aarch64_neon_smaxv:
+ case Intrinsic::aarch64_neon_sminv:
+ case Intrinsic::aarch64_neon_umaxv:
+ case Intrinsic::aarch64_neon_uminv:
+ // Floating-point min/max (vector)
+ // The f{min,max}"nm"v variants handle NaN differently than f{min,max}v,
+ // but our shadow propagation is the same.
+ case Intrinsic::aarch64_neon_fmaxv:
+ case Intrinsic::aarch64_neon_fminv:
+ case Intrinsic::aarch64_neon_fmaxnmv:
+ case Intrinsic::aarch64_neon_fminnmv:
+ // Sum long across vector
+ case Intrinsic::aarch64_neon_saddlv:
+ case Intrinsic::aarch64_neon_uaddlv:
+ handleVectorReduceIntrinsic(I, /*AllowShadowCast=*/true);
+ break;
+
// Saturating extract narrow
case Intrinsic::aarch64_neon_sqxtn:
case Intrinsic::aarch64_neon_sqxtun:
diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-smaxv.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-smaxv.ll
index cbbd55d3e3497..632268e08022c 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-smaxv.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-smaxv.ll
@@ -2,9 +2,6 @@
; RUN: opt < %s -passes=msan -S | FileCheck %s
;
; Forked from llvm/test/CodeGen/AArch64/arm64-uminv.ll
-;
-; Handled suboptimally (visitInstruction):
-; - llvm.aarch64.neon.smaxv
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64--linux-android9001"
@@ -15,16 +12,12 @@ define signext i8 @test_vmaxv_s8(<8 x i8> %a1) #0 {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to i64
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1:![0-9]+]]
-; CHECK: [[BB2]]:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3:[0-9]+]]
-; CHECK-NEXT: unreachable
-; CHECK: [[BB3]]:
+; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> [[TMP0]])
+; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32
; CHECK-NEXT: [[VMAXV_I:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8> [[A1]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = trunc i32 [[TMP2]] to i8
; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[VMAXV_I]] to i8
-; CHECK-NEXT: store i8 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i8 [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret i8 [[TMP4]]
;
entry:
@@ -39,16 +32,12 @@ define signext i16 @test_vmaxv_s16(<4 x i16> %a1) #0 {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to i64
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]]
-; CHECK: [[BB2]]:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
-; CHECK-NEXT: unreachable
-; CHECK: [[BB3]]:
+; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP0]])
+; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[TMP1]] to i32
; CHECK-NEXT: [[VMAXV_I:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16> [[A1]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = trunc i32 [[TMP2]] to i16
; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[VMAXV_I]] to i16
-; CHECK-NEXT: store i16 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i16 [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret i16 [[TMP4]]
;
entry:
@@ -63,15 +52,9 @@ define i32 @test_vmaxv_s32(<2 x i32> %a1) #0 {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to i64
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]]
-; CHECK: [[BB2]]:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
-; CHECK-NEXT: unreachable
-; CHECK: [[BB3]]:
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[TMP0]])
; CHECK-NEXT: [[VMAXV_I:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v2i32(<2 x i32> [[A1]])
-; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i32 [[TMP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret i32 [[VMAXV_I]]
;
entry:
@@ -85,16 +68,12 @@ define signext i8 @test_vmaxvq_s8(<16 x i8> %a1) #0 {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]]
-; CHECK: [[BB2]]:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
-; CHECK-NEXT: unreachable
-; CHECK: [[BB3]]:
+; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> [[TMP0]])
+; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32
; CHECK-NEXT: [[VMAXV_I:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8> [[A1]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = trunc i32 [[TMP2]] to i8
; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[VMAXV_I]] to i8
-; CHECK-NEXT: store i8 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i8 [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret i8 [[TMP4]]
;
entry:
@@ -109,16 +88,12 @@ define signext i16 @test_vmaxvq_s16(<8 x i16> %a1) #0 {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]]
-; CHECK: [[BB2]]:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
-; CHECK-NEXT: unreachable
-; CHECK: [[BB3]]:
+; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP0]])
+; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[TMP1]] to i32
; CHECK-NEXT: [[VMAXV_I:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16> [[A1]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = trunc i32 [[TMP2]] to i16
; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[VMAXV_I]] to i16
-; CHECK-NEXT: store i16 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i16 [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret i16 [[TMP4]]
;
entry:
@@ -133,15 +108,9 @@ define i32 @test_vmaxvq_s32(<4 x i32> %a1) #0 {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]]
-; CHECK: [[BB2]]:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
-; CHECK-NEXT: unreachable
-; CHECK: [[BB3]]:
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP0]])
; CHECK-NEXT: [[VMAXV_I:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32> [[A1]])
-; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i32 [[TMP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret i32 [[VMAXV_I]]
;
entry:
@@ -156,16 +125,12 @@ define <8 x i8> @test_vmaxv_s8_used_by_laneop(<8 x i8> %a1, <8 x i8> %a2) #0 {
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to i64
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]]
-; CHECK: [[BB3]]:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
-; CHECK-NEXT: unreachable
-; CHECK: [[BB4]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> [[TMP0]])
+; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP2]] to i32
; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8> [[A2]])
+; CHECK-NEXT: [[_MSPROP1:%.*]] = trunc i32 [[TMP3]] to i8
; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i8
-; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i8> [[TMP1]], i8 0, i32 3
+; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i8> [[TMP1]], i8 [[_MSPROP1]], i32 3
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i8> [[A1]], i8 [[TMP6]], i32 3
; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <8 x i8> [[TMP7]]
@@ -184,16 +149,12 @@ define <4 x i16> @test_vmaxv_s16_used_by_laneop(<4 x i16> %a1, <4 x i16> %a2) #0
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to i64
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]]
-; CHECK: [[BB3]]:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
-; CHECK-NEXT: unreachable
-; CHECK: [[BB4]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP0]])
+; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32
; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16> [[A2]])
+; CHECK-NEXT: [[_MSPROP1:%.*]] = trunc i32 [[TMP3]] to i16
; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i16
-; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i16> [[TMP1]], i16 0, i32 3
+; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i16> [[TMP1]], i16 [[_MSPROP1]], i32 3
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i16> [[A1]], i16 [[TMP6]], i32 3
; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <4 x i16> [[TMP7]]
@@ -212,15 +173,9 @@ define <2 x i32> @test_vmaxv_s32_used_by_laneop(<2 x i32> %a1, <2 x i32> %a2) #0
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to i64
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]]
-; CHECK: [[BB3]]:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
-; CHECK-NEXT: unreachable
-; CHECK: [[BB4]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[TMP0]])
; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v2i32(<2 x i32> [[A2]])
-; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <2 x i32> [[TMP1]], i32 0, i32 1
+; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP2]], i32 1
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[A1]], i32 [[TMP5]], i32 1
; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <2 x i32> [[TMP6]]
@@ -238,16 +193,12 @@ define <16 x i8> @test_vmaxvq_s8_used_by_laneop(<16 x i8> %a1, <16 x i8> %a2) #0
; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]]
-; CHECK: [[BB3]]:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
-; CHECK-NEXT: unreachable
-; CHECK: [[BB4]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> [[TMP0]])
+; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP2]] to i32
; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8> [[A2]])
+; CHECK-NEXT: [[_MSPROP1:%.*]] = trunc i32 [[TMP3]] to i8
; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i8
-; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i8> [[TMP1]], i8 0, i32 3
+; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i8> [[TMP1]], i8 [[_MSPROP1]], i32 3
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x i8> [[A1]], i8 [[TMP6]], i32 3
; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <16 x i8> [[TMP7]]
@@ -266,16 +217,12 @@ define <8 x i16> @test_vmaxvq_s16_used_by_laneop(<8 x i16> %a1, <8 x i16> %a2) #
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP0]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]]
-; CHECK: [[BB3]]:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
-; CHECK-NEXT: unreachable
-; CHECK: [[BB4]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP0]])
+; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32
; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16> [[A2]])
+; CHECK-NEXT: [[_MSPROP1:%.*]] = trunc i32 [[TMP3]] to i16
; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i16
-; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i16> [[TMP1]], i16 0, i32 3
+; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i16> [[TMP1]], i16 [[_MSPROP1]], i32 3
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i16> [[A1]], i16 [[TMP6]], i32 3
; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <8 x i16> [[TMP7]]
@@ -294,15 +241,9 @@ define <4 x i32> @test_vmaxvq_s32_used_by_laneop(<4 x i32> %a1, <4 x i32> %a2) #
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]]
-; CHECK: [[BB3]]:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
-; CHECK-NEXT: unreachable
-; CHECK: [[BB4]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP0]])
; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32> [[A2]])
-; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i32> [[TMP1]], i32 0, i32 3
+; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[TMP2]], i32 3
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[A1]], i32 [[TMP5]], i32 3
; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <4 x i32> [[TMP6]]
diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-sminv.ll b/llvm/test/Instrumentation/...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/129819
More information about the llvm-commits
mailing list