[llvm] [msan] Fix multiply-add-accumulate (#153927) to use ReductionFactor (PR #155748)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 27 22:00:34 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-compiler-rt-sanitizer
Author: Thurston Dang (thurstond)
<details>
<summary>Changes</summary>
https://github.com/llvm/llvm-project/pull/153927 incorrectly cast using a hardcoded reduction factor of two, rather than using the parameter.
This caused false negatives but not false positives. (The only incorrect case was a reduction factor of four; if four values {A,B,C,D} are being reduced, the result is fully zero iff {A,B} and {C,D} are both zero after pairwise reduction. If only one of those reduced pairs is zero, then the quadwise reduction is non-zero.)
---
Patch is 68.12 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/155748.diff
9 Files Affected:
- (modified) llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp (+6-4)
- (modified) llvm/test/Instrumentation/MemorySanitizer/X86/avx10_2_512ni-intrinsics.ll (+9-15)
- (modified) llvm/test/Instrumentation/MemorySanitizer/X86/avx10_2ni-intrinsics.ll (+12-20)
- (modified) llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl_vnni-intrinsics-upgrade.ll (+36-60)
- (modified) llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl_vnni-intrinsics.ll (+36-60)
- (modified) llvm/test/Instrumentation/MemorySanitizer/X86/avx512vnni-intrinsics-upgrade.ll (+18-30)
- (modified) llvm/test/Instrumentation/MemorySanitizer/X86/avx512vnni-intrinsics.ll (+18-30)
- (modified) llvm/test/Instrumentation/MemorySanitizer/X86/avx_vnni-intrinsics.ll (+12-20)
- (modified) llvm/test/Instrumentation/MemorySanitizer/X86/avxvnniint8-intrinsics.ll (+24-40)
``````````diff
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index b1ffe4f47f58a..a50e8b3db4d24 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -3910,8 +3910,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
FixedVectorType *ImplicitReturnType = ReturnType;
// Step 1: instrument multiplication of corresponding vector elements
if (EltSizeInBits) {
- ImplicitReturnType = cast<FixedVectorType>(getMMXVectorTy(
- EltSizeInBits * 2, ParamType->getPrimitiveSizeInBits()));
+ ImplicitReturnType = cast<FixedVectorType>(
+ getMMXVectorTy(EltSizeInBits * ReductionFactor,
+ ParamType->getPrimitiveSizeInBits()));
ParamType = cast<FixedVectorType>(
getMMXVectorTy(EltSizeInBits, ParamType->getPrimitiveSizeInBits()));
@@ -3959,7 +3960,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// Step 2: instrument horizontal add
// We don't need bit-precise horizontalReduce because we only want to check
- // if each pair of elements is fully zero.
+ // if each pair/quad of elements is fully zero.
// Cast to <4 x i32>.
Value *Horizontal = IRB.CreateBitCast(And, ImplicitReturnType);
@@ -3969,7 +3970,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Constant::getNullValue(Horizontal->getType())),
ImplicitReturnType);
- // Cast it back to the required fake return type (<1 x i64>).
+ // Cast it back to the required fake return type (if MMX: <1 x i64>; for
+ // AVX, it is already correct).
if (EltSizeInBits)
OutShadow = CreateShadowCast(IRB, OutShadow, getShadowTy(&I));
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx10_2_512ni-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx10_2_512ni-intrinsics.ll
index 298dc4b2c853a..802cffc8aebb1 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx10_2_512ni-intrinsics.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx10_2_512ni-intrinsics.ll
@@ -156,11 +156,9 @@ define <16 x i32> @test_mm512_dpbssd_epi32(<16 x i32> %__W, <16 x i32> %__A, ptr
; CHECK-NEXT: [[TMP20:%.*]] = or <64 x i1> [[TMP17]], [[TMP18]]
; CHECK-NEXT: [[TMP21:%.*]] = or <64 x i1> [[TMP20]], [[TMP19]]
; CHECK-NEXT: [[TMP22:%.*]] = sext <64 x i1> [[TMP21]] to <64 x i8>
-; CHECK-NEXT: [[TMP23:%.*]] = bitcast <64 x i8> [[TMP22]] to <32 x i16>
-; CHECK-NEXT: [[TMP24:%.*]] = icmp ne <32 x i16> [[TMP23]], zeroinitializer
-; CHECK-NEXT: [[TMP25:%.*]] = sext <32 x i1> [[TMP24]] to <32 x i16>
-; CHECK-NEXT: [[TMP26:%.*]] = bitcast <32 x i16> [[TMP25]] to i512
-; CHECK-NEXT: [[TMP27:%.*]] = bitcast i512 [[TMP26]] to <16 x i32>
+; CHECK-NEXT: [[TMP23:%.*]] = bitcast <64 x i8> [[TMP22]] to <16 x i32>
+; CHECK-NEXT: [[TMP24:%.*]] = icmp ne <16 x i32> [[TMP23]], zeroinitializer
+; CHECK-NEXT: [[TMP27:%.*]] = sext <16 x i1> [[TMP24]] to <16 x i32>
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[TMP27]], [[TMP4]]
; CHECK-NEXT: [[RES:%.*]] = tail call <16 x i32> @llvm.x86.avx10.vpdpbssd.512(<16 x i32> [[__W]], <16 x i32> [[__A]], <16 x i32> [[__B]])
; CHECK-NEXT: store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
@@ -193,11 +191,9 @@ define <16 x i32> @test_mm512_mask_dpbssds_epi32(<16 x i32> %__W, i16 zeroext %_
; CHECK-NEXT: [[TMP16:%.*]] = or <64 x i1> [[TMP13]], [[TMP14]]
; CHECK-NEXT: [[TMP17:%.*]] = or <64 x i1> [[TMP16]], [[TMP15]]
; CHECK-NEXT: [[TMP18:%.*]] = sext <64 x i1> [[TMP17]] to <64 x i8>
-; CHECK-NEXT: [[TMP19:%.*]] = bitcast <64 x i8> [[TMP18]] to <32 x i16>
-; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <32 x i16> [[TMP19]], zeroinitializer
-; CHECK-NEXT: [[TMP21:%.*]] = sext <32 x i1> [[TMP20]] to <32 x i16>
-; CHECK-NEXT: [[TMP22:%.*]] = bitcast <32 x i16> [[TMP21]] to i512
-; CHECK-NEXT: [[TMP23:%.*]] = bitcast i512 [[TMP22]] to <16 x i32>
+; CHECK-NEXT: [[TMP19:%.*]] = bitcast <64 x i8> [[TMP18]] to <16 x i32>
+; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <16 x i32> [[TMP19]], zeroinitializer
+; CHECK-NEXT: [[TMP23:%.*]] = sext <16 x i1> [[TMP20]] to <16 x i32>
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[TMP23]], [[TMP1]]
; CHECK-NEXT: [[DPI:%.*]] = tail call <16 x i32> @llvm.x86.avx10.vpdpbssds.512(<16 x i32> [[__W]], <16 x i32> [[__A]], <16 x i32> [[__B]])
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[TMP4]] to <16 x i1>
@@ -239,11 +235,9 @@ define <16 x i32> @test_mm512_maskz_dpbssd_epi32(i16 zeroext %__U, <16 x i32> %_
; CHECK-NEXT: [[TMP16:%.*]] = or <64 x i1> [[TMP13]], [[TMP14]]
; CHECK-NEXT: [[TMP17:%.*]] = or <64 x i1> [[TMP16]], [[TMP15]]
; CHECK-NEXT: [[TMP18:%.*]] = sext <64 x i1> [[TMP17]] to <64 x i8>
-; CHECK-NEXT: [[TMP19:%.*]] = bitcast <64 x i8> [[TMP18]] to <32 x i16>
-; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <32 x i16> [[TMP19]], zeroinitializer
-; CHECK-NEXT: [[TMP21:%.*]] = sext <32 x i1> [[TMP20]] to <32 x i16>
-; CHECK-NEXT: [[TMP22:%.*]] = bitcast <32 x i16> [[TMP21]] to i512
-; CHECK-NEXT: [[TMP23:%.*]] = bitcast i512 [[TMP22]] to <16 x i32>
+; CHECK-NEXT: [[TMP19:%.*]] = bitcast <64 x i8> [[TMP18]] to <16 x i32>
+; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <16 x i32> [[TMP19]], zeroinitializer
+; CHECK-NEXT: [[TMP23:%.*]] = sext <16 x i1> [[TMP20]] to <16 x i32>
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[TMP23]], [[TMP24]]
; CHECK-NEXT: [[DPI:%.*]] = tail call <16 x i32> @llvm.x86.avx10.vpdpbssd.512(<16 x i32> [[__W]], <16 x i32> [[__A]], <16 x i32> [[__B]])
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[TMP4]] to <16 x i1>
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx10_2ni-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx10_2ni-intrinsics.ll
index e3a26ae07ac1b..491a890ee0b26 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx10_2ni-intrinsics.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx10_2ni-intrinsics.ll
@@ -265,11 +265,9 @@ define <4 x i32> @test_mm_mask_dpbssd_epi32(<4 x i32> %__W, i4 zeroext %__U, <4
; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i1> [[TMP13]], [[TMP14]]
; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i1> [[TMP16]], [[TMP15]]
; CHECK-NEXT: [[TMP18:%.*]] = sext <16 x i1> [[TMP17]] to <16 x i8>
-; CHECK-NEXT: [[TMP19:%.*]] = bitcast <16 x i8> [[TMP18]] to <8 x i16>
-; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <8 x i16> [[TMP19]], zeroinitializer
-; CHECK-NEXT: [[TMP21:%.*]] = sext <8 x i1> [[TMP20]] to <8 x i16>
-; CHECK-NEXT: [[TMP22:%.*]] = bitcast <8 x i16> [[TMP21]] to i128
-; CHECK-NEXT: [[TMP23:%.*]] = bitcast i128 [[TMP22]] to <4 x i32>
+; CHECK-NEXT: [[TMP19:%.*]] = bitcast <16 x i8> [[TMP18]] to <4 x i32>
+; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <4 x i32> [[TMP19]], zeroinitializer
+; CHECK-NEXT: [[TMP23:%.*]] = sext <4 x i1> [[TMP20]] to <4 x i32>
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[TMP23]], [[TMP1]]
; CHECK-NEXT: [[DPI:%.*]] = tail call <4 x i32> @llvm.x86.avx2.vpdpbssd.128(<4 x i32> [[__W]], <4 x i32> [[__A]], <4 x i32> [[__B]])
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i4 [[TMP4]] to <4 x i1>
@@ -311,11 +309,9 @@ define <4 x i32> @test_mm_maskz_dpbssds_epi32(i4 zeroext %__U, <4 x i32> %__W, <
; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i1> [[TMP13]], [[TMP14]]
; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i1> [[TMP16]], [[TMP15]]
; CHECK-NEXT: [[TMP18:%.*]] = sext <16 x i1> [[TMP17]] to <16 x i8>
-; CHECK-NEXT: [[TMP19:%.*]] = bitcast <16 x i8> [[TMP18]] to <8 x i16>
-; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <8 x i16> [[TMP19]], zeroinitializer
-; CHECK-NEXT: [[TMP21:%.*]] = sext <8 x i1> [[TMP20]] to <8 x i16>
-; CHECK-NEXT: [[TMP22:%.*]] = bitcast <8 x i16> [[TMP21]] to i128
-; CHECK-NEXT: [[TMP23:%.*]] = bitcast i128 [[TMP22]] to <4 x i32>
+; CHECK-NEXT: [[TMP19:%.*]] = bitcast <16 x i8> [[TMP18]] to <4 x i32>
+; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <4 x i32> [[TMP19]], zeroinitializer
+; CHECK-NEXT: [[TMP23:%.*]] = sext <4 x i1> [[TMP20]] to <4 x i32>
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[TMP23]], [[TMP24]]
; CHECK-NEXT: [[DPI:%.*]] = tail call <4 x i32> @llvm.x86.avx2.vpdpbssds.128(<4 x i32> [[__W]], <4 x i32> [[__A]], <4 x i32> [[__B]])
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i4 [[TMP4]] to <4 x i1>
@@ -357,11 +353,9 @@ define <8 x i32> @test_mm256_maskz_dpbssds_epi32(<8 x i32> %__W, i8 zeroext %__U
; CHECK-NEXT: [[TMP16:%.*]] = or <32 x i1> [[TMP13]], [[TMP14]]
; CHECK-NEXT: [[TMP17:%.*]] = or <32 x i1> [[TMP16]], [[TMP15]]
; CHECK-NEXT: [[TMP18:%.*]] = sext <32 x i1> [[TMP17]] to <32 x i8>
-; CHECK-NEXT: [[TMP19:%.*]] = bitcast <32 x i8> [[TMP18]] to <16 x i16>
-; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <16 x i16> [[TMP19]], zeroinitializer
-; CHECK-NEXT: [[TMP21:%.*]] = sext <16 x i1> [[TMP20]] to <16 x i16>
-; CHECK-NEXT: [[TMP22:%.*]] = bitcast <16 x i16> [[TMP21]] to i256
-; CHECK-NEXT: [[TMP23:%.*]] = bitcast i256 [[TMP22]] to <8 x i32>
+; CHECK-NEXT: [[TMP19:%.*]] = bitcast <32 x i8> [[TMP18]] to <8 x i32>
+; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <8 x i32> [[TMP19]], zeroinitializer
+; CHECK-NEXT: [[TMP23:%.*]] = sext <8 x i1> [[TMP20]] to <8 x i32>
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i32> [[TMP23]], [[TMP1]]
; CHECK-NEXT: [[DPI:%.*]] = tail call <8 x i32> @llvm.x86.avx2.vpdpbssds.256(<8 x i32> [[__W]], <8 x i32> [[__A]], <8 x i32> [[__B]])
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
@@ -403,11 +397,9 @@ define <8 x i32> @test_mm256_mask_dpbssd_epi32(i8 zeroext %__U, <8 x i32> %__W,
; CHECK-NEXT: [[TMP16:%.*]] = or <32 x i1> [[TMP13]], [[TMP14]]
; CHECK-NEXT: [[TMP17:%.*]] = or <32 x i1> [[TMP16]], [[TMP15]]
; CHECK-NEXT: [[TMP18:%.*]] = sext <32 x i1> [[TMP17]] to <32 x i8>
-; CHECK-NEXT: [[TMP19:%.*]] = bitcast <32 x i8> [[TMP18]] to <16 x i16>
-; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <16 x i16> [[TMP19]], zeroinitializer
-; CHECK-NEXT: [[TMP21:%.*]] = sext <16 x i1> [[TMP20]] to <16 x i16>
-; CHECK-NEXT: [[TMP22:%.*]] = bitcast <16 x i16> [[TMP21]] to i256
-; CHECK-NEXT: [[TMP23:%.*]] = bitcast i256 [[TMP22]] to <8 x i32>
+; CHECK-NEXT: [[TMP19:%.*]] = bitcast <32 x i8> [[TMP18]] to <8 x i32>
+; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <8 x i32> [[TMP19]], zeroinitializer
+; CHECK-NEXT: [[TMP23:%.*]] = sext <8 x i1> [[TMP20]] to <8 x i32>
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i32> [[TMP23]], [[TMP24]]
; CHECK-NEXT: [[DPI:%.*]] = tail call <8 x i32> @llvm.x86.avx2.vpdpbssd.256(<8 x i32> [[__W]], <8 x i32> [[__A]], <8 x i32> [[__B]])
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl_vnni-intrinsics-upgrade.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl_vnni-intrinsics-upgrade.ll
index 822e546c84bca..331d434a3e0f7 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl_vnni-intrinsics-upgrade.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl_vnni-intrinsics-upgrade.ll
@@ -34,11 +34,9 @@ define <8 x i32>@test_int_x86_avx512_vpdpbusd_256(<8 x i32> %x0, <8 x i32> %x1,
; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i1> [[TMP12]], [[TMP13]]
; CHECK-NEXT: [[TMP16:%.*]] = or <32 x i1> [[TMP15]], [[TMP14]]
; CHECK-NEXT: [[TMP17:%.*]] = sext <32 x i1> [[TMP16]] to <32 x i8>
-; CHECK-NEXT: [[TMP18:%.*]] = bitcast <32 x i8> [[TMP17]] to <16 x i16>
-; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <16 x i16> [[TMP18]], zeroinitializer
-; CHECK-NEXT: [[TMP20:%.*]] = sext <16 x i1> [[TMP19]] to <16 x i16>
-; CHECK-NEXT: [[TMP21:%.*]] = bitcast <16 x i16> [[TMP20]] to i256
-; CHECK-NEXT: [[TMP22:%.*]] = bitcast i256 [[TMP21]] to <8 x i32>
+; CHECK-NEXT: [[TMP18:%.*]] = bitcast <32 x i8> [[TMP17]] to <8 x i32>
+; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <8 x i32> [[TMP18]], zeroinitializer
+; CHECK-NEXT: [[TMP22:%.*]] = sext <8 x i1> [[TMP19]] to <8 x i32>
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i32> [[TMP22]], [[TMP23]]
; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> [[X0]], <8 x i32> [[X1]], <8 x i32> [[X2]])
; CHECK-NEXT: store <8 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
@@ -82,11 +80,9 @@ define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpbusd_256(<8 x i32>
; CHECK-NEXT: [[TMP60:%.*]] = or <32 x i1> [[TMP38]], [[TMP58]]
; CHECK-NEXT: [[TMP61:%.*]] = or <32 x i1> [[TMP60]], [[TMP59]]
; CHECK-NEXT: [[TMP62:%.*]] = sext <32 x i1> [[TMP61]] to <32 x i8>
-; CHECK-NEXT: [[TMP63:%.*]] = bitcast <32 x i8> [[TMP62]] to <16 x i16>
-; CHECK-NEXT: [[TMP64:%.*]] = icmp ne <16 x i16> [[TMP63]], zeroinitializer
-; CHECK-NEXT: [[TMP65:%.*]] = sext <16 x i1> [[TMP64]] to <16 x i16>
-; CHECK-NEXT: [[TMP66:%.*]] = bitcast <16 x i16> [[TMP65]] to i256
-; CHECK-NEXT: [[TMP29:%.*]] = bitcast i256 [[TMP66]] to <8 x i32>
+; CHECK-NEXT: [[TMP53:%.*]] = bitcast <32 x i8> [[TMP62]] to <8 x i32>
+; CHECK-NEXT: [[TMP54:%.*]] = icmp ne <8 x i32> [[TMP53]], zeroinitializer
+; CHECK-NEXT: [[TMP29:%.*]] = sext <8 x i1> [[TMP54]] to <8 x i32>
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i32> [[TMP29]], [[TMP2]]
; CHECK-NEXT: [[TMP11:%.*]] = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> [[X0]], <8 x i32> [[X1]], <8 x i32> [[X2]])
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
@@ -111,11 +107,9 @@ define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpbusd_256(<8 x i32>
; CHECK-NEXT: [[TMP50:%.*]] = or <32 x i1> [[TMP47]], [[TMP48]]
; CHECK-NEXT: [[TMP51:%.*]] = or <32 x i1> [[TMP50]], [[TMP49]]
; CHECK-NEXT: [[TMP52:%.*]] = sext <32 x i1> [[TMP51]] to <32 x i8>
-; CHECK-NEXT: [[TMP53:%.*]] = bitcast <32 x i8> [[TMP52]] to <16 x i16>
-; CHECK-NEXT: [[TMP54:%.*]] = icmp ne <16 x i16> [[TMP53]], zeroinitializer
-; CHECK-NEXT: [[TMP55:%.*]] = sext <16 x i1> [[TMP54]] to <16 x i16>
-; CHECK-NEXT: [[TMP56:%.*]] = bitcast <16 x i16> [[TMP55]] to i256
-; CHECK-NEXT: [[TMP57:%.*]] = bitcast i256 [[TMP56]] to <8 x i32>
+; CHECK-NEXT: [[TMP55:%.*]] = bitcast <32 x i8> [[TMP52]] to <8 x i32>
+; CHECK-NEXT: [[TMP56:%.*]] = icmp ne <8 x i32> [[TMP55]], zeroinitializer
+; CHECK-NEXT: [[TMP57:%.*]] = sext <8 x i1> [[TMP56]] to <8 x i32>
; CHECK-NEXT: [[_MSPROP3:%.*]] = or <8 x i32> [[TMP57]], [[TMP2]]
; CHECK-NEXT: [[TMP19:%.*]] = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> [[X0]], <8 x i32> [[X1]], <8 x i32> [[X4]])
; CHECK-NEXT: [[TMP20:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
@@ -165,11 +159,9 @@ define <4 x i32>@test_int_x86_avx512_vpdpbusd_128(<4 x i32> %x0, <4 x i32> %x1,
; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i1> [[TMP12]], [[TMP13]]
; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i1> [[TMP15]], [[TMP14]]
; CHECK-NEXT: [[TMP17:%.*]] = sext <16 x i1> [[TMP16]] to <16 x i8>
-; CHECK-NEXT: [[TMP18:%.*]] = bitcast <16 x i8> [[TMP17]] to <8 x i16>
-; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <8 x i16> [[TMP18]], zeroinitializer
-; CHECK-NEXT: [[TMP20:%.*]] = sext <8 x i1> [[TMP19]] to <8 x i16>
-; CHECK-NEXT: [[TMP21:%.*]] = bitcast <8 x i16> [[TMP20]] to i128
-; CHECK-NEXT: [[TMP22:%.*]] = bitcast i128 [[TMP21]] to <4 x i32>
+; CHECK-NEXT: [[TMP18:%.*]] = bitcast <16 x i8> [[TMP17]] to <4 x i32>
+; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <4 x i32> [[TMP18]], zeroinitializer
+; CHECK-NEXT: [[TMP22:%.*]] = sext <4 x i1> [[TMP19]] to <4 x i32>
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[TMP22]], [[TMP23]]
; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> [[X0]], <4 x i32> [[X1]], <4 x i32> [[X2]])
; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
@@ -213,11 +205,9 @@ define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpdpbusd_128(<4 x i32>
; CHECK-NEXT: [[TMP60:%.*]] = or <16 x i1> [[TMP38]], [[TMP58]]
; CHECK-NEXT: [[TMP61:%.*]] = or <16 x i1> [[TMP60]], [[TMP59]]
; CHECK-NEXT: [[TMP62:%.*]] = sext <16 x i1> [[TMP61]] to <16 x i8>
-; CHECK-NEXT: [[TMP63:%.*]] = bitcast <16 x i8> [[TMP62]] to <8 x i16>
-; CHECK-NEXT: [[TMP64:%.*]] = icmp ne <8 x i16> [[TMP63]], zeroinitializer
-; CHECK-NEXT: [[TMP65:%.*]] = sext <8 x i1> [[TMP64]] to <8 x i16>
-; CHECK-NEXT: [[TMP66:%.*]] = bitcast <8 x i16> [[TMP65]] to i128
-; CHECK-NEXT: [[TMP29:%.*]] = bitcast i128 [[TMP66]] to <4 x i32>
+; CHECK-NEXT: [[TMP53:%.*]] = bitcast <16 x i8> [[TMP62]] to <4 x i32>
+; CHECK-NEXT: [[TMP54:%.*]] = icmp ne <4 x i32> [[TMP53]], zeroinitializer
+; CHECK-NEXT: [[TMP29:%.*]] = sext <4 x i1> [[TMP54]] to <4 x i32>
; CHECK-NEXT: [[_MSPROP2:%.*]] = or <4 x i32> [[TMP29]], [[TMP2]]
; CHECK-NEXT: [[TMP11:%.*]] = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> [[X0]], <4 x i32> [[X1]], <4 x i32> [[X2]])
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
@@ -244,11 +234,9 @@ define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpdpbusd_128(<4 x i32>
; CHECK-NEXT: [[TMP50:%.*]] = or <16 x i1> [[TMP47]], [[TMP48]]
; CHECK-NEXT: [[TMP51:%.*]] = or <16 x i1> [[TMP50]], [[TMP49]]
; CHECK-NEXT: [[TMP52:%.*]] = sext <16 x i1> [[TMP51]] to <16 x i8>
-; CHECK-NEXT: [[TMP53:%.*]] = bitcast <16 x i8> [[TMP52]] to <8 x i16>
-; CHECK-NEXT: [[TMP54:%.*]] = icmp ne <8 x i16> [[TMP53]], zeroinitializer
-; CHECK-NEXT: [[TMP55:%.*]] = sext <8 x i1> [[TMP54]] to <8 x i16>
-; CHECK-NEXT: [[TMP56:%.*]] = bitcast <8 x i16> [[TMP55]] to i128
-; CHECK-NEXT: [[TMP57:%.*]] = bitcast i128 [[TMP56]] to <4 x i32>
+; CHECK-NEXT: [[TMP55:%.*]] = bitcast <16 x i8> [[TMP52]] to <4 x i32>
+; CHECK-NEXT: [[TMP56:%.*]] = icmp ne <4 x i32> [[TMP55]], zeroinitializer
+; CHECK-NEXT: [[TMP57:%.*]] = sext <4 x i1> [[TMP56]] to <4 x i32>
; CHECK-NEXT: [[_MSPROP5:%.*]] = or <4 x i32> [[TMP57]], [[TMP2]]
; CHECK-NEXT: [[TMP19:%.*]] = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> [[X0]], <4 x i32> [[X1]], <4 x i32> [[X4]])
; CHECK-NEXT: [[TMP20:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
@@ -300,11 +288,9 @@ define <8 x i32>@test_int_x86_avx512_vpdpbusds_256(<8 x i32> %x0, <8 x i32> %x1,
; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i1> [[TMP12]], [[TMP13]]
; CHECK-NEXT: [[TMP16:%.*]] = or <32 x i1> [[TMP15]], [[TMP14]]
; CHECK-NEXT: [[TMP17:%.*]] = sext <32 x i1> [[TMP16]] to <32 x i8>
-; CHECK-NEXT: [[TMP18:%.*]] = bitcast <32 x i8> [[TMP17]] to <16 x i16>
-; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <16 x i16> [[TMP18]], zeroinitializer
-; CHECK-NEXT: [[TMP20:%.*]] = sext <16 x i1> [[TMP19]] to <16 x i16>
-; CHECK-NEXT: [[TMP21:%.*]] = bitcast <16 x i16> [[TMP20]] to i256
-; CHECK-NEXT: [[TMP22:%.*]] = bitcast i256 [[TMP21]] to <8 x i32>
+; CHECK-NEXT: [[TMP18:%.*]] = bitcast <32 x i8> [[TMP17]] to <8 x i32>
+; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <8 x i32> [[TMP18]], zeroinitializer
+; CHECK-NEXT: [[TMP22:%.*]] = sext <8 x i1> [[TMP19]] to <8 x i32>
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i32> [[TMP22]], [[TMP23]]
; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> [[X0]], <8 x i32> [[X1]], <8 x i32> [[X2]])
; CHECK-NEXT: store <8 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
@@ -348,11 +334,9 @@ define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpbusds_256(<8 x i32
; CHECK-NEXT: [[TMP60:%.*]] = or <32 x i1> [[TMP38]], [[TMP58]]
; CHECK-NEXT: [[TMP61:%.*]] = or <32 x i1> [[TMP60]], [[TMP59]]
; CHECK-NEXT: [[TMP62:%.*]] = sext <32 x i1> [[TMP61]] to <32 x i8>
-; CHECK-NEXT: [[TMP63:%.*]] = bitcast <32 x i8> [[TMP62]] to <16 x i16>
-; CHECK-NEXT: [[TMP64:%.*]] = icmp ne <16 x i16> [[TMP63]], zeroinitializer
-; CHECK-NEXT: [[TMP65:%.*]] = sext <16 x i1> [[TMP64]] to <16 x i16>
-; CHECK-NEXT: [[TMP66:%.*]] = bitcast <16 x i16> [[TMP65]] to i256
-; CHECK-NEXT: [[TMP29:%.*]] = bitcast i256 [[TMP66]] to <8 x i32>
+; CHECK-NEXT: [[TMP53:%.*]] = bitcast <32 x i8> [[TMP62]] to <8 x i32>
+; CHECK-NEXT: [[TMP54:%.*]] = icmp ne <8 x i32> [[TMP53]], zeroinitializer
+; CHECK-NEXT: [[TMP29:%.*]] = sext <8 x i1> [[TMP54]] to <8 x i32>
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i32> ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/155748
More information about the llvm-commits
mailing list