[llvm] [msan] Implement support for avx512fp16.mask.{add/sub/mul/div/max/min}.sh.round (PR #137441)
Thurston Dang via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 28 14:49:01 PDT 2025
https://github.com/thurstond updated https://github.com/llvm/llvm-project/pull/137441
>From 6c76dd0ef15dadec18582106e05e0264a85edd27 Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston at google.com>
Date: Sat, 26 Apr 2025 03:21:19 +0000
Subject: [PATCH 1/3] [msan] Implement support for
avx512fp16.mask.{add/sub/mul/div/max/min}.sh.round
This adds a handler, visitGenericScalarHalfwordInst, which works for
mask.{add/sub/mul/div/max/min}.sh.round.
Updates the tests in https://github.com/llvm/llvm-project/pull/136260
---
.../Instrumentation/MemorySanitizer.cpp | 69 +++
.../X86/avx512fp16-intrinsics.ll | 457 +++++++++---------
2 files changed, 306 insertions(+), 220 deletions(-)
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 8e31e8d2a4fbd..9f4708e14aa6a 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -4312,6 +4312,65 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
setOriginForNaryOp(I);
}
+ // For sh compiler intrinsics:
+ // llvm.x86.avx512fp16.mask.{add/sub/mul/div/max/min}.sh.round
+ // (<8 x half>, <8 x half>, <8 x half>, i8, i32)
+ // A B WriteThru Mask RoundingMode
+ //
+ // if (Mask[0])
+ // DstShadow[0] = AShadow[0] | BShadow[0]
+ // else
+ // DstShadow[0] = WriteThruShadow[0]
+ //
+ // DstShadow[1..7] = AShadow[1..7]
+ void visitGenericScalarHalfwordInst(IntrinsicInst &I) {
+ IRBuilder<> IRB(&I);
+
+ assert(I.arg_size() == 5);
+ Value *A = I.getOperand(0);
+ Value *B = I.getOperand(1);
+ Value *WriteThrough = I.getOperand(2);
+ Value *Mask = I.getOperand(3);
+ Value *RoundingMode = I.getOperand(4);
+
+ // Technically, we could probably just check whether the LSB is initialized
+ insertShadowCheck(Mask, &I);
+ insertShadowCheck(RoundingMode, &I);
+
+ assert(isa<FixedVectorType>(A->getType()));
+ unsigned NumElements =
+ cast<FixedVectorType>(A->getType())->getNumElements();
+ assert(NumElements == 8);
+ assert(A->getType() == B->getType());
+ assert(B->getType() == WriteThrough->getType());
+ assert(Mask->getType()->getPrimitiveSizeInBits() == NumElements);
+ assert(RoundingMode->getType()->isIntegerTy());
+
+ Mask = IRB.CreateBitCast(
+ Mask, FixedVectorType::get(IRB.getInt1Ty(), NumElements));
+
+ Value *AShadow = getShadow(A);
+ Value *BShadow = getShadow(B);
+ Value *ABLowerShadow =
+ IRB.CreateOr(IRB.CreateExtractElement(
+ AShadow, ConstantInt::get(IRB.getInt32Ty(), 0)),
+ IRB.CreateExtractElement(
+ BShadow, ConstantInt::get(IRB.getInt32Ty(), 0)));
+ Value *WriteThroughShadow = getShadow(WriteThrough);
+ Value *WriteThroughLowerShadow = IRB.CreateExtractElement(
+ WriteThroughShadow, ConstantInt::get(IRB.getInt32Ty(), 0));
+
+ Value *DstLowerShadow = IRB.CreateSelect(
+ IRB.CreateExtractElement(Mask, ConstantInt::get(IRB.getInt32Ty(), 0)),
+ ABLowerShadow, WriteThroughLowerShadow);
+ Value *DstShadow = IRB.CreateInsertElement(
+ AShadow, DstLowerShadow, ConstantInt::get(IRB.getInt32Ty(), 0),
+ "_msprop");
+
+ setShadow(&I, DstShadow);
+ setOriginForNaryOp(I);
+ }
+
// Handle Arm NEON vector load intrinsics (vld*).
//
// The WithLane instructions (ld[234]lane) are similar to:
@@ -5041,6 +5100,16 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
break;
}
+ case Intrinsic::x86_avx512fp16_mask_add_sh_round:
+ case Intrinsic::x86_avx512fp16_mask_sub_sh_round:
+ case Intrinsic::x86_avx512fp16_mask_mul_sh_round:
+ case Intrinsic::x86_avx512fp16_mask_div_sh_round:
+ case Intrinsic::x86_avx512fp16_mask_max_sh_round:
+ case Intrinsic::x86_avx512fp16_mask_min_sh_round: {
+ visitGenericScalarHalfwordInst(I);
+ break;
+ }
+
case Intrinsic::fshl:
case Intrinsic::fshr:
handleFunnelShift(I);
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512fp16-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512fp16-intrinsics.ll
index 61a32e5e2042e..b11b21da492d2 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512fp16-intrinsics.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512fp16-intrinsics.ll
@@ -13,9 +13,6 @@
; - llvm.x86.avx512fp16.mask.getexp.sh
; - llvm.x86.avx512fp16.mask.getmant.ph.512
; - llvm.x86.avx512fp16.mask.getmant.sh
-; - llvm.x86.avx512fp16.mask.max.sh.round
-; - llvm.x86.avx512fp16.mask.min.sh.round
-; - llvm.x86.avx512fp16.mask.mul.sh.round
; - llvm.x86.avx512fp16.mask.rcp.ph.512
; - llvm.x86.avx512fp16.mask.rcp.sh
; - llvm.x86.avx512fp16.mask.reduce.ph.512
@@ -27,7 +24,6 @@
; - llvm.x86.avx512fp16.mask.scalef.ph.512
; - llvm.x86.avx512fp16.mask.scalef.sh
; - llvm.x86.avx512fp16.mask.sqrt.sh
-; - llvm.x86.avx512fp16.mask.sub.sh.round
; - llvm.x86.avx512fp16.mask.vcvtph2dq.512
; - llvm.x86.avx512fp16.mask.vcvtph2qq.512
; - llvm.x86.avx512fp16.mask.vcvtph2udq.512
@@ -1393,8 +1389,8 @@ define <8 x half> @test_int_x86_avx512fp16_mask_add_sh(<8 x half> %x1, <8 x half
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
+; CHECK-NEXT: [[TMP6:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]]
@@ -1409,54 +1405,57 @@ define <8 x half> @test_int_x86_avx512fp16_mask_add_sh(<8 x half> %x1, <8 x half
; CHECK-NEXT: [[_MSLD:%.*]] = load i16, ptr [[TMP10]], align 2
; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i16> splat (i16 -1), i16 [[_MSLD]], i32 0
; CHECK-NEXT: [[VAL:%.*]] = insertelement <8 x half> poison, half [[VAL_HALF]], i32 0
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP11]], 0
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP12]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB13:.*]], label %[[BB14:.*]], !prof [[PROF1]]
-; CHECK: [[BB13]]:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
-; CHECK-NEXT: unreachable
-; CHECK: [[BB14]]:
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i16> [[TMP2]], i32 0
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0
+; CHECK-NEXT: [[TMP13:%.*]] = or i16 [[TMP11]], [[TMP12]]
+; CHECK-NEXT: [[TMP14:%.*]] = select i1 true, i16 [[TMP13]], i16 0
+; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP14]], i32 0
; CHECK-NEXT: [[RES0:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.add.sh.round(<8 x half> [[X1]], <8 x half> [[X2]], <8 x half> zeroinitializer, i8 -1, i32 4)
-; CHECK-NEXT: [[TMP15:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP15]], 0
-; CHECK-NEXT: [[TMP16:%.*]] = bitcast <8 x i16> [[TMP4]] to i128
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP16]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
+; CHECK-NEXT: [[TMP15:%.*]] = bitcast i8 [[MASK]] to <8 x i1>
+; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i16> [[_MSPROP1]], i32 0
+; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0
+; CHECK-NEXT: [[TMP18:%.*]] = or i16 [[TMP16]], [[TMP17]]
+; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i16> [[TMP6]], i32 0
+; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP15]], i32 0
+; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i16 [[TMP18]], i16 [[TMP19]]
+; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <8 x i16> [[_MSPROP1]], i16 [[TMP21]], i32 0
; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i8 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSOR5]], [[_MSCMP6]]
-; CHECK-NEXT: br i1 [[_MSOR7]], label %[[BB17:.*]], label %[[BB18:.*]], !prof [[PROF1]]
-; CHECK: [[BB17]]:
+; CHECK-NEXT: br i1 [[_MSCMP6]], label %[[BB22:.*]], label %[[BB23:.*]], !prof [[PROF1]]
+; CHECK: [[BB22]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB18]]:
+; CHECK: [[BB23]]:
; CHECK-NEXT: [[RES1:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.add.sh.round(<8 x half> [[RES0]], <8 x half> [[X2]], <8 x half> [[SRC]], i8 [[MASK]], i32 4)
-; CHECK-NEXT: [[TMP19:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i128 [[TMP19]], 0
+; CHECK-NEXT: [[TMP24:%.*]] = bitcast i8 [[MASK]] to <8 x i1>
+; CHECK-NEXT: [[TMP25:%.*]] = extractelement <8 x i16> [[_MSPROP2]], i32 0
+; CHECK-NEXT: [[TMP26:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0
+; CHECK-NEXT: [[TMP27:%.*]] = or i16 [[TMP25]], [[TMP26]]
+; CHECK-NEXT: [[TMP28:%.*]] = extractelement <8 x i1> [[TMP24]], i32 0
+; CHECK-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i16 [[TMP27]], i16 0
+; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <8 x i16> [[_MSPROP2]], i16 [[TMP29]], i32 0
; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i8 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSCMP8]], [[_MSCMP9]]
-; CHECK-NEXT: br i1 [[_MSOR10]], label %[[BB20:.*]], label %[[BB21:.*]], !prof [[PROF1]]
-; CHECK: [[BB20]]:
+; CHECK-NEXT: br i1 [[_MSCMP9]], label %[[BB30:.*]], label %[[BB31:.*]], !prof [[PROF1]]
+; CHECK: [[BB30]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB21]]:
+; CHECK: [[BB31]]:
; CHECK-NEXT: [[RES2:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.add.sh.round(<8 x half> [[RES1]], <8 x half> [[X2]], <8 x half> zeroinitializer, i8 [[MASK]], i32 4)
-; CHECK-NEXT: [[TMP22:%.*]] = bitcast <8 x i16> [[_MSPROP]] to i128
-; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i128 [[TMP22]], 0
-; CHECK-NEXT: [[TMP23:%.*]] = bitcast <8 x i16> [[TMP4]] to i128
-; CHECK-NEXT: [[_MSCMP12:%.*]] = icmp ne i128 [[TMP23]], 0
-; CHECK-NEXT: [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]]
+; CHECK-NEXT: [[TMP32:%.*]] = bitcast i8 [[MASK]] to <8 x i1>
+; CHECK-NEXT: [[TMP33:%.*]] = extractelement <8 x i16> [[_MSPROP3]], i32 0
+; CHECK-NEXT: [[TMP34:%.*]] = extractelement <8 x i16> [[_MSPROP]], i32 0
+; CHECK-NEXT: [[TMP35:%.*]] = or i16 [[TMP33]], [[TMP34]]
+; CHECK-NEXT: [[TMP36:%.*]] = extractelement <8 x i16> [[TMP6]], i32 0
+; CHECK-NEXT: [[TMP37:%.*]] = extractelement <8 x i1> [[TMP32]], i32 0
+; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], i16 [[TMP35]], i16 [[TMP36]]
+; CHECK-NEXT: [[_MSPROP4:%.*]] = insertelement <8 x i16> [[_MSPROP3]], i16 [[TMP38]], i32 0
; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i8 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]]
-; CHECK-NEXT: br i1 [[_MSOR15]], label %[[BB24:.*]], label %[[BB25:.*]], !prof [[PROF1]]
-; CHECK: [[BB24]]:
+; CHECK-NEXT: br i1 [[_MSCMP14]], label %[[BB39:.*]], label %[[BB40:.*]], !prof [[PROF1]]
+; CHECK: [[BB39]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB25]]:
+; CHECK: [[BB40]]:
; CHECK-NEXT: [[RES3:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.add.sh.round(<8 x half> [[RES2]], <8 x half> [[VAL]], <8 x half> [[SRC]], i8 [[MASK]], i32 4)
-; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store <8 x i16> [[_MSPROP4]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <8 x half> [[RES3]]
;
%val.half = load half,ptr %ptr
@@ -1476,8 +1475,8 @@ define <8 x half> @test_int_x86_avx512fp16_mask_sub_sh(<8 x half> %x1, <8 x half
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
+; CHECK-NEXT: [[TMP6:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]]
@@ -1492,54 +1491,57 @@ define <8 x half> @test_int_x86_avx512fp16_mask_sub_sh(<8 x half> %x1, <8 x half
; CHECK-NEXT: [[_MSLD:%.*]] = load i16, ptr [[TMP10]], align 2
; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i16> splat (i16 -1), i16 [[_MSLD]], i32 0
; CHECK-NEXT: [[VAL:%.*]] = insertelement <8 x half> poison, half [[VAL_HALF]], i32 0
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP11]], 0
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP12]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB13:.*]], label %[[BB14:.*]], !prof [[PROF1]]
-; CHECK: [[BB13]]:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
-; CHECK-NEXT: unreachable
-; CHECK: [[BB14]]:
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i16> [[TMP2]], i32 0
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0
+; CHECK-NEXT: [[TMP13:%.*]] = or i16 [[TMP11]], [[TMP12]]
+; CHECK-NEXT: [[TMP14:%.*]] = select i1 true, i16 [[TMP13]], i16 0
+; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP14]], i32 0
; CHECK-NEXT: [[RES0:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.sub.sh.round(<8 x half> [[X1]], <8 x half> [[X2]], <8 x half> zeroinitializer, i8 -1, i32 4)
-; CHECK-NEXT: [[TMP15:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP15]], 0
-; CHECK-NEXT: [[TMP16:%.*]] = bitcast <8 x i16> [[TMP4]] to i128
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP16]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
+; CHECK-NEXT: [[TMP15:%.*]] = bitcast i8 [[MASK]] to <8 x i1>
+; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i16> [[_MSPROP1]], i32 0
+; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0
+; CHECK-NEXT: [[TMP18:%.*]] = or i16 [[TMP16]], [[TMP17]]
+; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i16> [[TMP6]], i32 0
+; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP15]], i32 0
+; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i16 [[TMP18]], i16 [[TMP19]]
+; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <8 x i16> [[_MSPROP1]], i16 [[TMP21]], i32 0
; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i8 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSOR5]], [[_MSCMP6]]
-; CHECK-NEXT: br i1 [[_MSOR7]], label %[[BB17:.*]], label %[[BB18:.*]], !prof [[PROF1]]
-; CHECK: [[BB17]]:
+; CHECK-NEXT: br i1 [[_MSCMP6]], label %[[BB22:.*]], label %[[BB23:.*]], !prof [[PROF1]]
+; CHECK: [[BB22]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB18]]:
+; CHECK: [[BB23]]:
; CHECK-NEXT: [[RES1:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.sub.sh.round(<8 x half> [[RES0]], <8 x half> [[X2]], <8 x half> [[SRC]], i8 [[MASK]], i32 4)
-; CHECK-NEXT: [[TMP19:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i128 [[TMP19]], 0
+; CHECK-NEXT: [[TMP24:%.*]] = bitcast i8 [[MASK]] to <8 x i1>
+; CHECK-NEXT: [[TMP25:%.*]] = extractelement <8 x i16> [[_MSPROP2]], i32 0
+; CHECK-NEXT: [[TMP26:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0
+; CHECK-NEXT: [[TMP27:%.*]] = or i16 [[TMP25]], [[TMP26]]
+; CHECK-NEXT: [[TMP28:%.*]] = extractelement <8 x i1> [[TMP24]], i32 0
+; CHECK-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i16 [[TMP27]], i16 0
+; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <8 x i16> [[_MSPROP2]], i16 [[TMP29]], i32 0
; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i8 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSCMP8]], [[_MSCMP9]]
-; CHECK-NEXT: br i1 [[_MSOR10]], label %[[BB20:.*]], label %[[BB21:.*]], !prof [[PROF1]]
-; CHECK: [[BB20]]:
+; CHECK-NEXT: br i1 [[_MSCMP9]], label %[[BB30:.*]], label %[[BB31:.*]], !prof [[PROF1]]
+; CHECK: [[BB30]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB21]]:
+; CHECK: [[BB31]]:
; CHECK-NEXT: [[RES2:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.sub.sh.round(<8 x half> [[RES1]], <8 x half> [[X2]], <8 x half> zeroinitializer, i8 [[MASK]], i32 4)
-; CHECK-NEXT: [[TMP22:%.*]] = bitcast <8 x i16> [[_MSPROP]] to i128
-; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i128 [[TMP22]], 0
-; CHECK-NEXT: [[TMP23:%.*]] = bitcast <8 x i16> [[TMP4]] to i128
-; CHECK-NEXT: [[_MSCMP12:%.*]] = icmp ne i128 [[TMP23]], 0
-; CHECK-NEXT: [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]]
+; CHECK-NEXT: [[TMP32:%.*]] = bitcast i8 [[MASK]] to <8 x i1>
+; CHECK-NEXT: [[TMP33:%.*]] = extractelement <8 x i16> [[_MSPROP3]], i32 0
+; CHECK-NEXT: [[TMP34:%.*]] = extractelement <8 x i16> [[_MSPROP]], i32 0
+; CHECK-NEXT: [[TMP35:%.*]] = or i16 [[TMP33]], [[TMP34]]
+; CHECK-NEXT: [[TMP36:%.*]] = extractelement <8 x i16> [[TMP6]], i32 0
+; CHECK-NEXT: [[TMP37:%.*]] = extractelement <8 x i1> [[TMP32]], i32 0
+; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], i16 [[TMP35]], i16 [[TMP36]]
+; CHECK-NEXT: [[_MSPROP4:%.*]] = insertelement <8 x i16> [[_MSPROP3]], i16 [[TMP38]], i32 0
; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i8 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]]
-; CHECK-NEXT: br i1 [[_MSOR15]], label %[[BB24:.*]], label %[[BB25:.*]], !prof [[PROF1]]
-; CHECK: [[BB24]]:
+; CHECK-NEXT: br i1 [[_MSCMP14]], label %[[BB39:.*]], label %[[BB40:.*]], !prof [[PROF1]]
+; CHECK: [[BB39]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB25]]:
+; CHECK: [[BB40]]:
; CHECK-NEXT: [[RES3:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.sub.sh.round(<8 x half> [[RES2]], <8 x half> [[VAL]], <8 x half> [[SRC]], i8 [[MASK]], i32 4)
-; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store <8 x i16> [[_MSPROP4]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <8 x half> [[RES3]]
;
%val.half = load half,ptr %ptr
@@ -1559,8 +1561,8 @@ define <8 x half> @test_int_x86_avx512fp16_mask_mul_sh(<8 x half> %x1, <8 x half
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
+; CHECK-NEXT: [[TMP6:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]]
@@ -1575,54 +1577,57 @@ define <8 x half> @test_int_x86_avx512fp16_mask_mul_sh(<8 x half> %x1, <8 x half
; CHECK-NEXT: [[_MSLD:%.*]] = load i16, ptr [[TMP10]], align 2
; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i16> splat (i16 -1), i16 [[_MSLD]], i32 0
; CHECK-NEXT: [[VAL:%.*]] = insertelement <8 x half> poison, half [[VAL_HALF]], i32 0
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP11]], 0
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP12]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB13:.*]], label %[[BB14:.*]], !prof [[PROF1]]
-; CHECK: [[BB13]]:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
-; CHECK-NEXT: unreachable
-; CHECK: [[BB14]]:
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i16> [[TMP2]], i32 0
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0
+; CHECK-NEXT: [[TMP13:%.*]] = or i16 [[TMP11]], [[TMP12]]
+; CHECK-NEXT: [[TMP14:%.*]] = select i1 true, i16 [[TMP13]], i16 0
+; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP14]], i32 0
; CHECK-NEXT: [[RES0:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.mul.sh.round(<8 x half> [[X1]], <8 x half> [[X2]], <8 x half> zeroinitializer, i8 -1, i32 4)
-; CHECK-NEXT: [[TMP15:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP15]], 0
-; CHECK-NEXT: [[TMP16:%.*]] = bitcast <8 x i16> [[TMP4]] to i128
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP16]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
+; CHECK-NEXT: [[TMP15:%.*]] = bitcast i8 [[MASK]] to <8 x i1>
+; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i16> [[_MSPROP1]], i32 0
+; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0
+; CHECK-NEXT: [[TMP18:%.*]] = or i16 [[TMP16]], [[TMP17]]
+; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i16> [[TMP6]], i32 0
+; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP15]], i32 0
+; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i16 [[TMP18]], i16 [[TMP19]]
+; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <8 x i16> [[_MSPROP1]], i16 [[TMP21]], i32 0
; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i8 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSOR5]], [[_MSCMP6]]
-; CHECK-NEXT: br i1 [[_MSOR7]], label %[[BB17:.*]], label %[[BB18:.*]], !prof [[PROF1]]
-; CHECK: [[BB17]]:
+; CHECK-NEXT: br i1 [[_MSCMP6]], label %[[BB22:.*]], label %[[BB23:.*]], !prof [[PROF1]]
+; CHECK: [[BB22]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB18]]:
+; CHECK: [[BB23]]:
; CHECK-NEXT: [[RES1:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.mul.sh.round(<8 x half> [[RES0]], <8 x half> [[X2]], <8 x half> [[SRC]], i8 [[MASK]], i32 4)
-; CHECK-NEXT: [[TMP19:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i128 [[TMP19]], 0
+; CHECK-NEXT: [[TMP24:%.*]] = bitcast i8 [[MASK]] to <8 x i1>
+; CHECK-NEXT: [[TMP25:%.*]] = extractelement <8 x i16> [[_MSPROP2]], i32 0
+; CHECK-NEXT: [[TMP26:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0
+; CHECK-NEXT: [[TMP27:%.*]] = or i16 [[TMP25]], [[TMP26]]
+; CHECK-NEXT: [[TMP28:%.*]] = extractelement <8 x i1> [[TMP24]], i32 0
+; CHECK-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i16 [[TMP27]], i16 0
+; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <8 x i16> [[_MSPROP2]], i16 [[TMP29]], i32 0
; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i8 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSCMP8]], [[_MSCMP9]]
-; CHECK-NEXT: br i1 [[_MSOR10]], label %[[BB20:.*]], label %[[BB21:.*]], !prof [[PROF1]]
-; CHECK: [[BB20]]:
+; CHECK-NEXT: br i1 [[_MSCMP9]], label %[[BB30:.*]], label %[[BB31:.*]], !prof [[PROF1]]
+; CHECK: [[BB30]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB21]]:
+; CHECK: [[BB31]]:
; CHECK-NEXT: [[RES2:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.mul.sh.round(<8 x half> [[RES1]], <8 x half> [[X2]], <8 x half> zeroinitializer, i8 [[MASK]], i32 4)
-; CHECK-NEXT: [[TMP22:%.*]] = bitcast <8 x i16> [[_MSPROP]] to i128
-; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i128 [[TMP22]], 0
-; CHECK-NEXT: [[TMP23:%.*]] = bitcast <8 x i16> [[TMP4]] to i128
-; CHECK-NEXT: [[_MSCMP12:%.*]] = icmp ne i128 [[TMP23]], 0
-; CHECK-NEXT: [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]]
+; CHECK-NEXT: [[TMP32:%.*]] = bitcast i8 [[MASK]] to <8 x i1>
+; CHECK-NEXT: [[TMP33:%.*]] = extractelement <8 x i16> [[_MSPROP3]], i32 0
+; CHECK-NEXT: [[TMP34:%.*]] = extractelement <8 x i16> [[_MSPROP]], i32 0
+; CHECK-NEXT: [[TMP35:%.*]] = or i16 [[TMP33]], [[TMP34]]
+; CHECK-NEXT: [[TMP36:%.*]] = extractelement <8 x i16> [[TMP6]], i32 0
+; CHECK-NEXT: [[TMP37:%.*]] = extractelement <8 x i1> [[TMP32]], i32 0
+; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], i16 [[TMP35]], i16 [[TMP36]]
+; CHECK-NEXT: [[_MSPROP4:%.*]] = insertelement <8 x i16> [[_MSPROP3]], i16 [[TMP38]], i32 0
; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i8 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]]
-; CHECK-NEXT: br i1 [[_MSOR15]], label %[[BB24:.*]], label %[[BB25:.*]], !prof [[PROF1]]
-; CHECK: [[BB24]]:
+; CHECK-NEXT: br i1 [[_MSCMP14]], label %[[BB39:.*]], label %[[BB40:.*]], !prof [[PROF1]]
+; CHECK: [[BB39]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB25]]:
+; CHECK: [[BB40]]:
; CHECK-NEXT: [[RES3:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.mul.sh.round(<8 x half> [[RES2]], <8 x half> [[VAL]], <8 x half> [[SRC]], i8 [[MASK]], i32 4)
-; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store <8 x i16> [[_MSPROP4]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <8 x half> [[RES3]]
;
%val.half = load half,ptr %ptr
@@ -1642,8 +1647,8 @@ define <8 x half> @test_int_x86_avx512fp16_mask_div_sh(<8 x half> %x1, <8 x half
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
+; CHECK-NEXT: [[TMP6:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]]
@@ -1658,54 +1663,57 @@ define <8 x half> @test_int_x86_avx512fp16_mask_div_sh(<8 x half> %x1, <8 x half
; CHECK-NEXT: [[_MSLD:%.*]] = load i16, ptr [[TMP10]], align 2
; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i16> splat (i16 -1), i16 [[_MSLD]], i32 0
; CHECK-NEXT: [[VAL:%.*]] = insertelement <8 x half> poison, half [[VAL_HALF]], i32 0
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP11]], 0
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP12]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB13:.*]], label %[[BB14:.*]], !prof [[PROF1]]
-; CHECK: [[BB13]]:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
-; CHECK-NEXT: unreachable
-; CHECK: [[BB14]]:
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i16> [[TMP2]], i32 0
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0
+; CHECK-NEXT: [[TMP13:%.*]] = or i16 [[TMP11]], [[TMP12]]
+; CHECK-NEXT: [[TMP14:%.*]] = select i1 true, i16 [[TMP13]], i16 0
+; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP14]], i32 0
; CHECK-NEXT: [[RES0:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.div.sh.round(<8 x half> [[X1]], <8 x half> [[X2]], <8 x half> zeroinitializer, i8 -1, i32 4)
-; CHECK-NEXT: [[TMP15:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP15]], 0
-; CHECK-NEXT: [[TMP16:%.*]] = bitcast <8 x i16> [[TMP4]] to i128
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP16]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
+; CHECK-NEXT: [[TMP15:%.*]] = bitcast i8 [[MASK]] to <8 x i1>
+; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i16> [[_MSPROP1]], i32 0
+; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0
+; CHECK-NEXT: [[TMP18:%.*]] = or i16 [[TMP16]], [[TMP17]]
+; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i16> [[TMP6]], i32 0
+; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP15]], i32 0
+; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i16 [[TMP18]], i16 [[TMP19]]
+; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <8 x i16> [[_MSPROP1]], i16 [[TMP21]], i32 0
; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i8 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSOR5]], [[_MSCMP6]]
-; CHECK-NEXT: br i1 [[_MSOR7]], label %[[BB17:.*]], label %[[BB18:.*]], !prof [[PROF1]]
-; CHECK: [[BB17]]:
+; CHECK-NEXT: br i1 [[_MSCMP6]], label %[[BB22:.*]], label %[[BB23:.*]], !prof [[PROF1]]
+; CHECK: [[BB22]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB18]]:
+; CHECK: [[BB23]]:
; CHECK-NEXT: [[RES1:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.div.sh.round(<8 x half> [[RES0]], <8 x half> [[X2]], <8 x half> [[SRC]], i8 [[MASK]], i32 4)
-; CHECK-NEXT: [[TMP19:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i128 [[TMP19]], 0
+; CHECK-NEXT: [[TMP24:%.*]] = bitcast i8 [[MASK]] to <8 x i1>
+; CHECK-NEXT: [[TMP25:%.*]] = extractelement <8 x i16> [[_MSPROP2]], i32 0
+; CHECK-NEXT: [[TMP26:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0
+; CHECK-NEXT: [[TMP27:%.*]] = or i16 [[TMP25]], [[TMP26]]
+; CHECK-NEXT: [[TMP28:%.*]] = extractelement <8 x i1> [[TMP24]], i32 0
+; CHECK-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i16 [[TMP27]], i16 0
+; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <8 x i16> [[_MSPROP2]], i16 [[TMP29]], i32 0
; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i8 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSCMP8]], [[_MSCMP9]]
-; CHECK-NEXT: br i1 [[_MSOR10]], label %[[BB20:.*]], label %[[BB21:.*]], !prof [[PROF1]]
-; CHECK: [[BB20]]:
+; CHECK-NEXT: br i1 [[_MSCMP9]], label %[[BB30:.*]], label %[[BB31:.*]], !prof [[PROF1]]
+; CHECK: [[BB30]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB21]]:
+; CHECK: [[BB31]]:
; CHECK-NEXT: [[RES2:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.div.sh.round(<8 x half> [[RES1]], <8 x half> [[X2]], <8 x half> zeroinitializer, i8 [[MASK]], i32 4)
-; CHECK-NEXT: [[TMP22:%.*]] = bitcast <8 x i16> [[_MSPROP]] to i128
-; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i128 [[TMP22]], 0
-; CHECK-NEXT: [[TMP23:%.*]] = bitcast <8 x i16> [[TMP4]] to i128
-; CHECK-NEXT: [[_MSCMP12:%.*]] = icmp ne i128 [[TMP23]], 0
-; CHECK-NEXT: [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]]
+; CHECK-NEXT: [[TMP32:%.*]] = bitcast i8 [[MASK]] to <8 x i1>
+; CHECK-NEXT: [[TMP33:%.*]] = extractelement <8 x i16> [[_MSPROP3]], i32 0
+; CHECK-NEXT: [[TMP34:%.*]] = extractelement <8 x i16> [[_MSPROP]], i32 0
+; CHECK-NEXT: [[TMP35:%.*]] = or i16 [[TMP33]], [[TMP34]]
+; CHECK-NEXT: [[TMP36:%.*]] = extractelement <8 x i16> [[TMP6]], i32 0
+; CHECK-NEXT: [[TMP37:%.*]] = extractelement <8 x i1> [[TMP32]], i32 0
+; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], i16 [[TMP35]], i16 [[TMP36]]
+; CHECK-NEXT: [[_MSPROP4:%.*]] = insertelement <8 x i16> [[_MSPROP3]], i16 [[TMP38]], i32 0
; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i8 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]]
-; CHECK-NEXT: br i1 [[_MSOR15]], label %[[BB24:.*]], label %[[BB25:.*]], !prof [[PROF1]]
-; CHECK: [[BB24]]:
+; CHECK-NEXT: br i1 [[_MSCMP14]], label %[[BB39:.*]], label %[[BB40:.*]], !prof [[PROF1]]
+; CHECK: [[BB39]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB25]]:
+; CHECK: [[BB40]]:
; CHECK-NEXT: [[RES3:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.div.sh.round(<8 x half> [[RES2]], <8 x half> [[VAL]], <8 x half> [[SRC]], i8 [[MASK]], i32 4)
-; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store <8 x i16> [[_MSPROP4]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <8 x half> [[RES3]]
;
%val.half = load half,ptr %ptr
@@ -1725,8 +1733,8 @@ define <8 x half> @test_int_x86_avx512fp16_mask_min_sh(<8 x half> %x1, <8 x half
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
+; CHECK-NEXT: [[TMP6:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]]
@@ -1741,54 +1749,57 @@ define <8 x half> @test_int_x86_avx512fp16_mask_min_sh(<8 x half> %x1, <8 x half
; CHECK-NEXT: [[_MSLD:%.*]] = load i16, ptr [[TMP10]], align 2
; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i16> splat (i16 -1), i16 [[_MSLD]], i32 0
; CHECK-NEXT: [[VAL:%.*]] = insertelement <8 x half> poison, half [[VAL_HALF]], i32 0
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP11]], 0
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP12]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB13:.*]], label %[[BB14:.*]], !prof [[PROF1]]
-; CHECK: [[BB13]]:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
-; CHECK-NEXT: unreachable
-; CHECK: [[BB14]]:
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i16> [[TMP2]], i32 0
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0
+; CHECK-NEXT: [[TMP13:%.*]] = or i16 [[TMP11]], [[TMP12]]
+; CHECK-NEXT: [[TMP14:%.*]] = select i1 true, i16 [[TMP13]], i16 0
+; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP14]], i32 0
; CHECK-NEXT: [[RES0:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.min.sh.round(<8 x half> [[X1]], <8 x half> [[X2]], <8 x half> zeroinitializer, i8 -1, i32 4)
-; CHECK-NEXT: [[TMP15:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP15]], 0
-; CHECK-NEXT: [[TMP16:%.*]] = bitcast <8 x i16> [[TMP4]] to i128
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP16]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
+; CHECK-NEXT: [[TMP15:%.*]] = bitcast i8 [[MASK]] to <8 x i1>
+; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i16> [[_MSPROP1]], i32 0
+; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0
+; CHECK-NEXT: [[TMP18:%.*]] = or i16 [[TMP16]], [[TMP17]]
+; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i16> [[TMP6]], i32 0
+; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP15]], i32 0
+; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i16 [[TMP18]], i16 [[TMP19]]
+; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <8 x i16> [[_MSPROP1]], i16 [[TMP21]], i32 0
; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i8 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSOR5]], [[_MSCMP6]]
-; CHECK-NEXT: br i1 [[_MSOR7]], label %[[BB17:.*]], label %[[BB18:.*]], !prof [[PROF1]]
-; CHECK: [[BB17]]:
+; CHECK-NEXT: br i1 [[_MSCMP6]], label %[[BB22:.*]], label %[[BB23:.*]], !prof [[PROF1]]
+; CHECK: [[BB22]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB18]]:
+; CHECK: [[BB23]]:
; CHECK-NEXT: [[RES1:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.min.sh.round(<8 x half> [[RES0]], <8 x half> [[X2]], <8 x half> [[SRC]], i8 [[MASK]], i32 4)
-; CHECK-NEXT: [[TMP19:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i128 [[TMP19]], 0
+; CHECK-NEXT: [[TMP24:%.*]] = bitcast i8 [[MASK]] to <8 x i1>
+; CHECK-NEXT: [[TMP25:%.*]] = extractelement <8 x i16> [[_MSPROP2]], i32 0
+; CHECK-NEXT: [[TMP26:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0
+; CHECK-NEXT: [[TMP27:%.*]] = or i16 [[TMP25]], [[TMP26]]
+; CHECK-NEXT: [[TMP28:%.*]] = extractelement <8 x i1> [[TMP24]], i32 0
+; CHECK-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i16 [[TMP27]], i16 0
+; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <8 x i16> [[_MSPROP2]], i16 [[TMP29]], i32 0
; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i8 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSCMP8]], [[_MSCMP9]]
-; CHECK-NEXT: br i1 [[_MSOR10]], label %[[BB20:.*]], label %[[BB21:.*]], !prof [[PROF1]]
-; CHECK: [[BB20]]:
+; CHECK-NEXT: br i1 [[_MSCMP9]], label %[[BB30:.*]], label %[[BB31:.*]], !prof [[PROF1]]
+; CHECK: [[BB30]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB21]]:
+; CHECK: [[BB31]]:
; CHECK-NEXT: [[RES2:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.min.sh.round(<8 x half> [[RES1]], <8 x half> [[X2]], <8 x half> zeroinitializer, i8 [[MASK]], i32 4)
-; CHECK-NEXT: [[TMP22:%.*]] = bitcast <8 x i16> [[_MSPROP]] to i128
-; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i128 [[TMP22]], 0
-; CHECK-NEXT: [[TMP23:%.*]] = bitcast <8 x i16> [[TMP4]] to i128
-; CHECK-NEXT: [[_MSCMP12:%.*]] = icmp ne i128 [[TMP23]], 0
-; CHECK-NEXT: [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]]
+; CHECK-NEXT: [[TMP32:%.*]] = bitcast i8 [[MASK]] to <8 x i1>
+; CHECK-NEXT: [[TMP33:%.*]] = extractelement <8 x i16> [[_MSPROP3]], i32 0
+; CHECK-NEXT: [[TMP34:%.*]] = extractelement <8 x i16> [[_MSPROP]], i32 0
+; CHECK-NEXT: [[TMP35:%.*]] = or i16 [[TMP33]], [[TMP34]]
+; CHECK-NEXT: [[TMP36:%.*]] = extractelement <8 x i16> [[TMP6]], i32 0
+; CHECK-NEXT: [[TMP37:%.*]] = extractelement <8 x i1> [[TMP32]], i32 0
+; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], i16 [[TMP35]], i16 [[TMP36]]
+; CHECK-NEXT: [[_MSPROP4:%.*]] = insertelement <8 x i16> [[_MSPROP3]], i16 [[TMP38]], i32 0
; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i8 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]]
-; CHECK-NEXT: br i1 [[_MSOR15]], label %[[BB24:.*]], label %[[BB25:.*]], !prof [[PROF1]]
-; CHECK: [[BB24]]:
+; CHECK-NEXT: br i1 [[_MSCMP14]], label %[[BB39:.*]], label %[[BB40:.*]], !prof [[PROF1]]
+; CHECK: [[BB39]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB25]]:
+; CHECK: [[BB40]]:
; CHECK-NEXT: [[RES3:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.min.sh.round(<8 x half> [[RES2]], <8 x half> [[VAL]], <8 x half> [[SRC]], i8 [[MASK]], i32 4)
-; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store <8 x i16> [[_MSPROP4]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <8 x half> [[RES3]]
;
%val.half = load half,ptr %ptr
@@ -1808,8 +1819,8 @@ define <8 x half> @test_int_x86_avx512fp16_mask_max_sh(<8 x half> %x1, <8 x half
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
+; CHECK-NEXT: [[TMP6:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]]
@@ -1824,54 +1835,57 @@ define <8 x half> @test_int_x86_avx512fp16_mask_max_sh(<8 x half> %x1, <8 x half
; CHECK-NEXT: [[_MSLD:%.*]] = load i16, ptr [[TMP10]], align 2
; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i16> splat (i16 -1), i16 [[_MSLD]], i32 0
; CHECK-NEXT: [[VAL:%.*]] = insertelement <8 x half> poison, half [[VAL_HALF]], i32 0
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP11]], 0
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP12]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB13:.*]], label %[[BB14:.*]], !prof [[PROF1]]
-; CHECK: [[BB13]]:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
-; CHECK-NEXT: unreachable
-; CHECK: [[BB14]]:
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i16> [[TMP2]], i32 0
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0
+; CHECK-NEXT: [[TMP13:%.*]] = or i16 [[TMP11]], [[TMP12]]
+; CHECK-NEXT: [[TMP14:%.*]] = select i1 true, i16 [[TMP13]], i16 0
+; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP14]], i32 0
; CHECK-NEXT: [[RES0:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.max.sh.round(<8 x half> [[X1]], <8 x half> [[X2]], <8 x half> zeroinitializer, i8 -1, i32 4)
-; CHECK-NEXT: [[TMP15:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP15]], 0
-; CHECK-NEXT: [[TMP16:%.*]] = bitcast <8 x i16> [[TMP4]] to i128
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP16]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
+; CHECK-NEXT: [[TMP15:%.*]] = bitcast i8 [[MASK]] to <8 x i1>
+; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i16> [[_MSPROP1]], i32 0
+; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0
+; CHECK-NEXT: [[TMP18:%.*]] = or i16 [[TMP16]], [[TMP17]]
+; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i16> [[TMP6]], i32 0
+; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP15]], i32 0
+; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i16 [[TMP18]], i16 [[TMP19]]
+; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <8 x i16> [[_MSPROP1]], i16 [[TMP21]], i32 0
; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i8 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSOR5]], [[_MSCMP6]]
-; CHECK-NEXT: br i1 [[_MSOR7]], label %[[BB17:.*]], label %[[BB18:.*]], !prof [[PROF1]]
-; CHECK: [[BB17]]:
+; CHECK-NEXT: br i1 [[_MSCMP6]], label %[[BB22:.*]], label %[[BB23:.*]], !prof [[PROF1]]
+; CHECK: [[BB22]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB18]]:
+; CHECK: [[BB23]]:
; CHECK-NEXT: [[RES1:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.max.sh.round(<8 x half> [[RES0]], <8 x half> [[X2]], <8 x half> [[SRC]], i8 [[MASK]], i32 4)
-; CHECK-NEXT: [[TMP19:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i128 [[TMP19]], 0
+; CHECK-NEXT: [[TMP24:%.*]] = bitcast i8 [[MASK]] to <8 x i1>
+; CHECK-NEXT: [[TMP25:%.*]] = extractelement <8 x i16> [[_MSPROP2]], i32 0
+; CHECK-NEXT: [[TMP26:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0
+; CHECK-NEXT: [[TMP27:%.*]] = or i16 [[TMP25]], [[TMP26]]
+; CHECK-NEXT: [[TMP28:%.*]] = extractelement <8 x i1> [[TMP24]], i32 0
+; CHECK-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i16 [[TMP27]], i16 0
+; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <8 x i16> [[_MSPROP2]], i16 [[TMP29]], i32 0
; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i8 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSCMP8]], [[_MSCMP9]]
-; CHECK-NEXT: br i1 [[_MSOR10]], label %[[BB20:.*]], label %[[BB21:.*]], !prof [[PROF1]]
-; CHECK: [[BB20]]:
+; CHECK-NEXT: br i1 [[_MSCMP9]], label %[[BB30:.*]], label %[[BB31:.*]], !prof [[PROF1]]
+; CHECK: [[BB30]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB21]]:
+; CHECK: [[BB31]]:
; CHECK-NEXT: [[RES2:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.max.sh.round(<8 x half> [[RES1]], <8 x half> [[X2]], <8 x half> zeroinitializer, i8 [[MASK]], i32 4)
-; CHECK-NEXT: [[TMP22:%.*]] = bitcast <8 x i16> [[_MSPROP]] to i128
-; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i128 [[TMP22]], 0
-; CHECK-NEXT: [[TMP23:%.*]] = bitcast <8 x i16> [[TMP4]] to i128
-; CHECK-NEXT: [[_MSCMP12:%.*]] = icmp ne i128 [[TMP23]], 0
-; CHECK-NEXT: [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]]
+; CHECK-NEXT: [[TMP32:%.*]] = bitcast i8 [[MASK]] to <8 x i1>
+; CHECK-NEXT: [[TMP33:%.*]] = extractelement <8 x i16> [[_MSPROP3]], i32 0
+; CHECK-NEXT: [[TMP34:%.*]] = extractelement <8 x i16> [[_MSPROP]], i32 0
+; CHECK-NEXT: [[TMP35:%.*]] = or i16 [[TMP33]], [[TMP34]]
+; CHECK-NEXT: [[TMP36:%.*]] = extractelement <8 x i16> [[TMP6]], i32 0
+; CHECK-NEXT: [[TMP37:%.*]] = extractelement <8 x i1> [[TMP32]], i32 0
+; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], i16 [[TMP35]], i16 [[TMP36]]
+; CHECK-NEXT: [[_MSPROP4:%.*]] = insertelement <8 x i16> [[_MSPROP3]], i16 [[TMP38]], i32 0
; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i8 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]]
-; CHECK-NEXT: br i1 [[_MSOR15]], label %[[BB24:.*]], label %[[BB25:.*]], !prof [[PROF1]]
-; CHECK: [[BB24]]:
+; CHECK-NEXT: br i1 [[_MSCMP14]], label %[[BB39:.*]], label %[[BB40:.*]], !prof [[PROF1]]
+; CHECK: [[BB39]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB25]]:
+; CHECK: [[BB40]]:
; CHECK-NEXT: [[RES3:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.max.sh.round(<8 x half> [[RES2]], <8 x half> [[VAL]], <8 x half> [[SRC]], i8 [[MASK]], i32 4)
-; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store <8 x i16> [[_MSPROP4]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <8 x half> [[RES3]]
;
%val.half = load half,ptr %ptr
@@ -3246,3 +3260,6 @@ define <32 x half> @test_mm512_castph256_ph512_freeze(<16 x half> %a0) nounwind
}
attributes #0 = { sanitize_memory }
+;.
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575}
+;.
>From 24b7e868e389473461409b74b34b166f6f2a8f4c Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston at google.com>
Date: Sat, 26 Apr 2025 06:35:38 +0000
Subject: [PATCH 2/3] Clarify comment
---
llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 9f4708e14aa6a..12494f66e686e 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -4312,7 +4312,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
setOriginForNaryOp(I);
}
- // For sh compiler intrinsics:
+ // For sh.* compiler intrinsics:
// llvm.x86.avx512fp16.mask.{add/sub/mul/div/max/min}.sh.round
// (<8 x half>, <8 x half>, <8 x half>, i8, i32)
// A B WriteThru Mask RoundingMode
>From 33ee8544bb15376f861676a5a4c28d706671123f Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston at google.com>
Date: Mon, 28 Apr 2025 21:44:08 +0000
Subject: [PATCH 3/3] Resolve ambiguity in LLVM instruction order
---
.../Instrumentation/MemorySanitizer.cpp | 32 ++++++++---------
.../X86/avx512fp16-intrinsics.ll | 36 +++++++++----------
2 files changed, 34 insertions(+), 34 deletions(-)
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 12494f66e686e..21f601c84892d 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -4313,15 +4313,11 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
}
// For sh.* compiler intrinsics:
- // llvm.x86.avx512fp16.mask.{add/sub/mul/div/max/min}.sh.round
- // (<8 x half>, <8 x half>, <8 x half>, i8, i32)
- // A B WriteThru Mask RoundingMode
- //
- // if (Mask[0])
- // DstShadow[0] = AShadow[0] | BShadow[0]
- // else
- // DstShadow[0] = WriteThruShadow[0]
+ // llvm.x86.avx512fp16.mask.{add/sub/mul/div/max/min}.sh.round
+ // (<8 x half>, <8 x half>, <8 x half>, i8, i32)
+ // A B WriteThru Mask RoundingMode
//
+ // DstShadow[0] = Mask[0] ? (AShadow[0] | BShadow[0]) : WriteThruShadow[0]
// DstShadow[1..7] = AShadow[1..7]
void visitGenericScalarHalfwordInst(IntrinsicInst &I) {
IRBuilder<> IRB(&I);
@@ -4348,21 +4344,25 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Mask = IRB.CreateBitCast(
Mask, FixedVectorType::get(IRB.getInt1Ty(), NumElements));
+ Value *MaskLower =
+ IRB.CreateExtractElement(Mask, ConstantInt::get(IRB.getInt32Ty(), 0));
Value *AShadow = getShadow(A);
+ Value *AShadowLower = IRB.CreateExtractElement(
+ AShadow, ConstantInt::get(IRB.getInt32Ty(), 0));
+
Value *BShadow = getShadow(B);
- Value *ABLowerShadow =
- IRB.CreateOr(IRB.CreateExtractElement(
- AShadow, ConstantInt::get(IRB.getInt32Ty(), 0)),
- IRB.CreateExtractElement(
- BShadow, ConstantInt::get(IRB.getInt32Ty(), 0)));
+ Value *BShadowLower = IRB.CreateExtractElement(
+ BShadow, ConstantInt::get(IRB.getInt32Ty(), 0));
+
+ Value *ABLowerShadow = IRB.CreateOr(AShadowLower, BShadowLower);
+
Value *WriteThroughShadow = getShadow(WriteThrough);
Value *WriteThroughLowerShadow = IRB.CreateExtractElement(
WriteThroughShadow, ConstantInt::get(IRB.getInt32Ty(), 0));
- Value *DstLowerShadow = IRB.CreateSelect(
- IRB.CreateExtractElement(Mask, ConstantInt::get(IRB.getInt32Ty(), 0)),
- ABLowerShadow, WriteThroughLowerShadow);
+ Value *DstLowerShadow =
+ IRB.CreateSelect(MaskLower, ABLowerShadow, WriteThroughLowerShadow);
Value *DstShadow = IRB.CreateInsertElement(
AShadow, DstLowerShadow, ConstantInt::get(IRB.getInt32Ty(), 0),
"_msprop");
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512fp16-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512fp16-intrinsics.ll
index b11b21da492d2..9770794357a20 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512fp16-intrinsics.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512fp16-intrinsics.ll
@@ -1412,11 +1412,11 @@ define <8 x half> @test_int_x86_avx512fp16_mask_add_sh(<8 x half> %x1, <8 x half
; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP14]], i32 0
; CHECK-NEXT: [[RES0:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.add.sh.round(<8 x half> [[X1]], <8 x half> [[X2]], <8 x half> zeroinitializer, i8 -1, i32 4)
; CHECK-NEXT: [[TMP15:%.*]] = bitcast i8 [[MASK]] to <8 x i1>
+; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP15]], i32 0
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i16> [[_MSPROP1]], i32 0
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0
; CHECK-NEXT: [[TMP18:%.*]] = or i16 [[TMP16]], [[TMP17]]
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i16> [[TMP6]], i32 0
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP15]], i32 0
; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i16 [[TMP18]], i16 [[TMP19]]
; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <8 x i16> [[_MSPROP1]], i16 [[TMP21]], i32 0
; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i8 [[TMP5]], 0
@@ -1427,10 +1427,10 @@ define <8 x half> @test_int_x86_avx512fp16_mask_add_sh(<8 x half> %x1, <8 x half
; CHECK: [[BB23]]:
; CHECK-NEXT: [[RES1:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.add.sh.round(<8 x half> [[RES0]], <8 x half> [[X2]], <8 x half> [[SRC]], i8 [[MASK]], i32 4)
; CHECK-NEXT: [[TMP24:%.*]] = bitcast i8 [[MASK]] to <8 x i1>
+; CHECK-NEXT: [[TMP28:%.*]] = extractelement <8 x i1> [[TMP24]], i32 0
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <8 x i16> [[_MSPROP2]], i32 0
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0
; CHECK-NEXT: [[TMP27:%.*]] = or i16 [[TMP25]], [[TMP26]]
-; CHECK-NEXT: [[TMP28:%.*]] = extractelement <8 x i1> [[TMP24]], i32 0
; CHECK-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i16 [[TMP27]], i16 0
; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <8 x i16> [[_MSPROP2]], i16 [[TMP29]], i32 0
; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i8 [[TMP5]], 0
@@ -1441,11 +1441,11 @@ define <8 x half> @test_int_x86_avx512fp16_mask_add_sh(<8 x half> %x1, <8 x half
; CHECK: [[BB31]]:
; CHECK-NEXT: [[RES2:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.add.sh.round(<8 x half> [[RES1]], <8 x half> [[X2]], <8 x half> zeroinitializer, i8 [[MASK]], i32 4)
; CHECK-NEXT: [[TMP32:%.*]] = bitcast i8 [[MASK]] to <8 x i1>
+; CHECK-NEXT: [[TMP37:%.*]] = extractelement <8 x i1> [[TMP32]], i32 0
; CHECK-NEXT: [[TMP33:%.*]] = extractelement <8 x i16> [[_MSPROP3]], i32 0
; CHECK-NEXT: [[TMP34:%.*]] = extractelement <8 x i16> [[_MSPROP]], i32 0
; CHECK-NEXT: [[TMP35:%.*]] = or i16 [[TMP33]], [[TMP34]]
; CHECK-NEXT: [[TMP36:%.*]] = extractelement <8 x i16> [[TMP6]], i32 0
-; CHECK-NEXT: [[TMP37:%.*]] = extractelement <8 x i1> [[TMP32]], i32 0
; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], i16 [[TMP35]], i16 [[TMP36]]
; CHECK-NEXT: [[_MSPROP4:%.*]] = insertelement <8 x i16> [[_MSPROP3]], i16 [[TMP38]], i32 0
; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i8 [[TMP5]], 0
@@ -1498,11 +1498,11 @@ define <8 x half> @test_int_x86_avx512fp16_mask_sub_sh(<8 x half> %x1, <8 x half
; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP14]], i32 0
; CHECK-NEXT: [[RES0:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.sub.sh.round(<8 x half> [[X1]], <8 x half> [[X2]], <8 x half> zeroinitializer, i8 -1, i32 4)
; CHECK-NEXT: [[TMP15:%.*]] = bitcast i8 [[MASK]] to <8 x i1>
+; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP15]], i32 0
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i16> [[_MSPROP1]], i32 0
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0
; CHECK-NEXT: [[TMP18:%.*]] = or i16 [[TMP16]], [[TMP17]]
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i16> [[TMP6]], i32 0
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP15]], i32 0
; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i16 [[TMP18]], i16 [[TMP19]]
; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <8 x i16> [[_MSPROP1]], i16 [[TMP21]], i32 0
; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i8 [[TMP5]], 0
@@ -1513,10 +1513,10 @@ define <8 x half> @test_int_x86_avx512fp16_mask_sub_sh(<8 x half> %x1, <8 x half
; CHECK: [[BB23]]:
; CHECK-NEXT: [[RES1:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.sub.sh.round(<8 x half> [[RES0]], <8 x half> [[X2]], <8 x half> [[SRC]], i8 [[MASK]], i32 4)
; CHECK-NEXT: [[TMP24:%.*]] = bitcast i8 [[MASK]] to <8 x i1>
+; CHECK-NEXT: [[TMP28:%.*]] = extractelement <8 x i1> [[TMP24]], i32 0
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <8 x i16> [[_MSPROP2]], i32 0
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0
; CHECK-NEXT: [[TMP27:%.*]] = or i16 [[TMP25]], [[TMP26]]
-; CHECK-NEXT: [[TMP28:%.*]] = extractelement <8 x i1> [[TMP24]], i32 0
; CHECK-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i16 [[TMP27]], i16 0
; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <8 x i16> [[_MSPROP2]], i16 [[TMP29]], i32 0
; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i8 [[TMP5]], 0
@@ -1527,11 +1527,11 @@ define <8 x half> @test_int_x86_avx512fp16_mask_sub_sh(<8 x half> %x1, <8 x half
; CHECK: [[BB31]]:
; CHECK-NEXT: [[RES2:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.sub.sh.round(<8 x half> [[RES1]], <8 x half> [[X2]], <8 x half> zeroinitializer, i8 [[MASK]], i32 4)
; CHECK-NEXT: [[TMP32:%.*]] = bitcast i8 [[MASK]] to <8 x i1>
+; CHECK-NEXT: [[TMP37:%.*]] = extractelement <8 x i1> [[TMP32]], i32 0
; CHECK-NEXT: [[TMP33:%.*]] = extractelement <8 x i16> [[_MSPROP3]], i32 0
; CHECK-NEXT: [[TMP34:%.*]] = extractelement <8 x i16> [[_MSPROP]], i32 0
; CHECK-NEXT: [[TMP35:%.*]] = or i16 [[TMP33]], [[TMP34]]
; CHECK-NEXT: [[TMP36:%.*]] = extractelement <8 x i16> [[TMP6]], i32 0
-; CHECK-NEXT: [[TMP37:%.*]] = extractelement <8 x i1> [[TMP32]], i32 0
; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], i16 [[TMP35]], i16 [[TMP36]]
; CHECK-NEXT: [[_MSPROP4:%.*]] = insertelement <8 x i16> [[_MSPROP3]], i16 [[TMP38]], i32 0
; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i8 [[TMP5]], 0
@@ -1584,11 +1584,11 @@ define <8 x half> @test_int_x86_avx512fp16_mask_mul_sh(<8 x half> %x1, <8 x half
; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP14]], i32 0
; CHECK-NEXT: [[RES0:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.mul.sh.round(<8 x half> [[X1]], <8 x half> [[X2]], <8 x half> zeroinitializer, i8 -1, i32 4)
; CHECK-NEXT: [[TMP15:%.*]] = bitcast i8 [[MASK]] to <8 x i1>
+; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP15]], i32 0
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i16> [[_MSPROP1]], i32 0
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0
; CHECK-NEXT: [[TMP18:%.*]] = or i16 [[TMP16]], [[TMP17]]
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i16> [[TMP6]], i32 0
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP15]], i32 0
; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i16 [[TMP18]], i16 [[TMP19]]
; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <8 x i16> [[_MSPROP1]], i16 [[TMP21]], i32 0
; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i8 [[TMP5]], 0
@@ -1599,10 +1599,10 @@ define <8 x half> @test_int_x86_avx512fp16_mask_mul_sh(<8 x half> %x1, <8 x half
; CHECK: [[BB23]]:
; CHECK-NEXT: [[RES1:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.mul.sh.round(<8 x half> [[RES0]], <8 x half> [[X2]], <8 x half> [[SRC]], i8 [[MASK]], i32 4)
; CHECK-NEXT: [[TMP24:%.*]] = bitcast i8 [[MASK]] to <8 x i1>
+; CHECK-NEXT: [[TMP28:%.*]] = extractelement <8 x i1> [[TMP24]], i32 0
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <8 x i16> [[_MSPROP2]], i32 0
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0
; CHECK-NEXT: [[TMP27:%.*]] = or i16 [[TMP25]], [[TMP26]]
-; CHECK-NEXT: [[TMP28:%.*]] = extractelement <8 x i1> [[TMP24]], i32 0
; CHECK-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i16 [[TMP27]], i16 0
; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <8 x i16> [[_MSPROP2]], i16 [[TMP29]], i32 0
; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i8 [[TMP5]], 0
@@ -1613,11 +1613,11 @@ define <8 x half> @test_int_x86_avx512fp16_mask_mul_sh(<8 x half> %x1, <8 x half
; CHECK: [[BB31]]:
; CHECK-NEXT: [[RES2:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.mul.sh.round(<8 x half> [[RES1]], <8 x half> [[X2]], <8 x half> zeroinitializer, i8 [[MASK]], i32 4)
; CHECK-NEXT: [[TMP32:%.*]] = bitcast i8 [[MASK]] to <8 x i1>
+; CHECK-NEXT: [[TMP37:%.*]] = extractelement <8 x i1> [[TMP32]], i32 0
; CHECK-NEXT: [[TMP33:%.*]] = extractelement <8 x i16> [[_MSPROP3]], i32 0
; CHECK-NEXT: [[TMP34:%.*]] = extractelement <8 x i16> [[_MSPROP]], i32 0
; CHECK-NEXT: [[TMP35:%.*]] = or i16 [[TMP33]], [[TMP34]]
; CHECK-NEXT: [[TMP36:%.*]] = extractelement <8 x i16> [[TMP6]], i32 0
-; CHECK-NEXT: [[TMP37:%.*]] = extractelement <8 x i1> [[TMP32]], i32 0
; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], i16 [[TMP35]], i16 [[TMP36]]
; CHECK-NEXT: [[_MSPROP4:%.*]] = insertelement <8 x i16> [[_MSPROP3]], i16 [[TMP38]], i32 0
; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i8 [[TMP5]], 0
@@ -1670,11 +1670,11 @@ define <8 x half> @test_int_x86_avx512fp16_mask_div_sh(<8 x half> %x1, <8 x half
; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP14]], i32 0
; CHECK-NEXT: [[RES0:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.div.sh.round(<8 x half> [[X1]], <8 x half> [[X2]], <8 x half> zeroinitializer, i8 -1, i32 4)
; CHECK-NEXT: [[TMP15:%.*]] = bitcast i8 [[MASK]] to <8 x i1>
+; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP15]], i32 0
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i16> [[_MSPROP1]], i32 0
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0
; CHECK-NEXT: [[TMP18:%.*]] = or i16 [[TMP16]], [[TMP17]]
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i16> [[TMP6]], i32 0
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP15]], i32 0
; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i16 [[TMP18]], i16 [[TMP19]]
; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <8 x i16> [[_MSPROP1]], i16 [[TMP21]], i32 0
; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i8 [[TMP5]], 0
@@ -1685,10 +1685,10 @@ define <8 x half> @test_int_x86_avx512fp16_mask_div_sh(<8 x half> %x1, <8 x half
; CHECK: [[BB23]]:
; CHECK-NEXT: [[RES1:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.div.sh.round(<8 x half> [[RES0]], <8 x half> [[X2]], <8 x half> [[SRC]], i8 [[MASK]], i32 4)
; CHECK-NEXT: [[TMP24:%.*]] = bitcast i8 [[MASK]] to <8 x i1>
+; CHECK-NEXT: [[TMP28:%.*]] = extractelement <8 x i1> [[TMP24]], i32 0
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <8 x i16> [[_MSPROP2]], i32 0
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0
; CHECK-NEXT: [[TMP27:%.*]] = or i16 [[TMP25]], [[TMP26]]
-; CHECK-NEXT: [[TMP28:%.*]] = extractelement <8 x i1> [[TMP24]], i32 0
; CHECK-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i16 [[TMP27]], i16 0
; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <8 x i16> [[_MSPROP2]], i16 [[TMP29]], i32 0
; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i8 [[TMP5]], 0
@@ -1699,11 +1699,11 @@ define <8 x half> @test_int_x86_avx512fp16_mask_div_sh(<8 x half> %x1, <8 x half
; CHECK: [[BB31]]:
; CHECK-NEXT: [[RES2:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.div.sh.round(<8 x half> [[RES1]], <8 x half> [[X2]], <8 x half> zeroinitializer, i8 [[MASK]], i32 4)
; CHECK-NEXT: [[TMP32:%.*]] = bitcast i8 [[MASK]] to <8 x i1>
+; CHECK-NEXT: [[TMP37:%.*]] = extractelement <8 x i1> [[TMP32]], i32 0
; CHECK-NEXT: [[TMP33:%.*]] = extractelement <8 x i16> [[_MSPROP3]], i32 0
; CHECK-NEXT: [[TMP34:%.*]] = extractelement <8 x i16> [[_MSPROP]], i32 0
; CHECK-NEXT: [[TMP35:%.*]] = or i16 [[TMP33]], [[TMP34]]
; CHECK-NEXT: [[TMP36:%.*]] = extractelement <8 x i16> [[TMP6]], i32 0
-; CHECK-NEXT: [[TMP37:%.*]] = extractelement <8 x i1> [[TMP32]], i32 0
; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], i16 [[TMP35]], i16 [[TMP36]]
; CHECK-NEXT: [[_MSPROP4:%.*]] = insertelement <8 x i16> [[_MSPROP3]], i16 [[TMP38]], i32 0
; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i8 [[TMP5]], 0
@@ -1756,11 +1756,11 @@ define <8 x half> @test_int_x86_avx512fp16_mask_min_sh(<8 x half> %x1, <8 x half
; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP14]], i32 0
; CHECK-NEXT: [[RES0:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.min.sh.round(<8 x half> [[X1]], <8 x half> [[X2]], <8 x half> zeroinitializer, i8 -1, i32 4)
; CHECK-NEXT: [[TMP15:%.*]] = bitcast i8 [[MASK]] to <8 x i1>
+; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP15]], i32 0
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i16> [[_MSPROP1]], i32 0
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0
; CHECK-NEXT: [[TMP18:%.*]] = or i16 [[TMP16]], [[TMP17]]
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i16> [[TMP6]], i32 0
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP15]], i32 0
; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i16 [[TMP18]], i16 [[TMP19]]
; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <8 x i16> [[_MSPROP1]], i16 [[TMP21]], i32 0
; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i8 [[TMP5]], 0
@@ -1771,10 +1771,10 @@ define <8 x half> @test_int_x86_avx512fp16_mask_min_sh(<8 x half> %x1, <8 x half
; CHECK: [[BB23]]:
; CHECK-NEXT: [[RES1:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.min.sh.round(<8 x half> [[RES0]], <8 x half> [[X2]], <8 x half> [[SRC]], i8 [[MASK]], i32 4)
; CHECK-NEXT: [[TMP24:%.*]] = bitcast i8 [[MASK]] to <8 x i1>
+; CHECK-NEXT: [[TMP28:%.*]] = extractelement <8 x i1> [[TMP24]], i32 0
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <8 x i16> [[_MSPROP2]], i32 0
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0
; CHECK-NEXT: [[TMP27:%.*]] = or i16 [[TMP25]], [[TMP26]]
-; CHECK-NEXT: [[TMP28:%.*]] = extractelement <8 x i1> [[TMP24]], i32 0
; CHECK-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i16 [[TMP27]], i16 0
; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <8 x i16> [[_MSPROP2]], i16 [[TMP29]], i32 0
; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i8 [[TMP5]], 0
@@ -1785,11 +1785,11 @@ define <8 x half> @test_int_x86_avx512fp16_mask_min_sh(<8 x half> %x1, <8 x half
; CHECK: [[BB31]]:
; CHECK-NEXT: [[RES2:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.min.sh.round(<8 x half> [[RES1]], <8 x half> [[X2]], <8 x half> zeroinitializer, i8 [[MASK]], i32 4)
; CHECK-NEXT: [[TMP32:%.*]] = bitcast i8 [[MASK]] to <8 x i1>
+; CHECK-NEXT: [[TMP37:%.*]] = extractelement <8 x i1> [[TMP32]], i32 0
; CHECK-NEXT: [[TMP33:%.*]] = extractelement <8 x i16> [[_MSPROP3]], i32 0
; CHECK-NEXT: [[TMP34:%.*]] = extractelement <8 x i16> [[_MSPROP]], i32 0
; CHECK-NEXT: [[TMP35:%.*]] = or i16 [[TMP33]], [[TMP34]]
; CHECK-NEXT: [[TMP36:%.*]] = extractelement <8 x i16> [[TMP6]], i32 0
-; CHECK-NEXT: [[TMP37:%.*]] = extractelement <8 x i1> [[TMP32]], i32 0
; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], i16 [[TMP35]], i16 [[TMP36]]
; CHECK-NEXT: [[_MSPROP4:%.*]] = insertelement <8 x i16> [[_MSPROP3]], i16 [[TMP38]], i32 0
; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i8 [[TMP5]], 0
@@ -1842,11 +1842,11 @@ define <8 x half> @test_int_x86_avx512fp16_mask_max_sh(<8 x half> %x1, <8 x half
; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP14]], i32 0
; CHECK-NEXT: [[RES0:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.max.sh.round(<8 x half> [[X1]], <8 x half> [[X2]], <8 x half> zeroinitializer, i8 -1, i32 4)
; CHECK-NEXT: [[TMP15:%.*]] = bitcast i8 [[MASK]] to <8 x i1>
+; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP15]], i32 0
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i16> [[_MSPROP1]], i32 0
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0
; CHECK-NEXT: [[TMP18:%.*]] = or i16 [[TMP16]], [[TMP17]]
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i16> [[TMP6]], i32 0
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP15]], i32 0
; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i16 [[TMP18]], i16 [[TMP19]]
; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <8 x i16> [[_MSPROP1]], i16 [[TMP21]], i32 0
; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i8 [[TMP5]], 0
@@ -1857,10 +1857,10 @@ define <8 x half> @test_int_x86_avx512fp16_mask_max_sh(<8 x half> %x1, <8 x half
; CHECK: [[BB23]]:
; CHECK-NEXT: [[RES1:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.max.sh.round(<8 x half> [[RES0]], <8 x half> [[X2]], <8 x half> [[SRC]], i8 [[MASK]], i32 4)
; CHECK-NEXT: [[TMP24:%.*]] = bitcast i8 [[MASK]] to <8 x i1>
+; CHECK-NEXT: [[TMP28:%.*]] = extractelement <8 x i1> [[TMP24]], i32 0
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <8 x i16> [[_MSPROP2]], i32 0
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0
; CHECK-NEXT: [[TMP27:%.*]] = or i16 [[TMP25]], [[TMP26]]
-; CHECK-NEXT: [[TMP28:%.*]] = extractelement <8 x i1> [[TMP24]], i32 0
; CHECK-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i16 [[TMP27]], i16 0
; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <8 x i16> [[_MSPROP2]], i16 [[TMP29]], i32 0
; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i8 [[TMP5]], 0
@@ -1871,11 +1871,11 @@ define <8 x half> @test_int_x86_avx512fp16_mask_max_sh(<8 x half> %x1, <8 x half
; CHECK: [[BB31]]:
; CHECK-NEXT: [[RES2:%.*]] = call <8 x half> @llvm.x86.avx512fp16.mask.max.sh.round(<8 x half> [[RES1]], <8 x half> [[X2]], <8 x half> zeroinitializer, i8 [[MASK]], i32 4)
; CHECK-NEXT: [[TMP32:%.*]] = bitcast i8 [[MASK]] to <8 x i1>
+; CHECK-NEXT: [[TMP37:%.*]] = extractelement <8 x i1> [[TMP32]], i32 0
; CHECK-NEXT: [[TMP33:%.*]] = extractelement <8 x i16> [[_MSPROP3]], i32 0
; CHECK-NEXT: [[TMP34:%.*]] = extractelement <8 x i16> [[_MSPROP]], i32 0
; CHECK-NEXT: [[TMP35:%.*]] = or i16 [[TMP33]], [[TMP34]]
; CHECK-NEXT: [[TMP36:%.*]] = extractelement <8 x i16> [[TMP6]], i32 0
-; CHECK-NEXT: [[TMP37:%.*]] = extractelement <8 x i1> [[TMP32]], i32 0
; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], i16 [[TMP35]], i16 [[TMP36]]
; CHECK-NEXT: [[_MSPROP4:%.*]] = insertelement <8 x i16> [[_MSPROP3]], i16 [[TMP38]], i32 0
; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i8 [[TMP5]], 0
More information about the llvm-commits
mailing list