[llvm] cb5046d - [SLP]Do not ignore undefs when trying to replace with "poisonous" shuffles
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 24 07:50:43 PDT 2024
Author: Alexey Bataev
Date: 2024-10-24T07:47:23-07:00
New Revision: cb5046da26399c0ab545c3ed764e0e68e997ff89
URL: https://github.com/llvm/llvm-project/commit/cb5046da26399c0ab545c3ed764e0e68e997ff89
DIFF: https://github.com/llvm/llvm-project/commit/cb5046da26399c0ab545c3ed764e0e68e997ff89.diff
LOG: [SLP]Do not ignore undefs when trying to replace with "poisonous" shuffles
Need to consider undefs correctly, when trying to replace them with
potentially poisonous values in shuffles. Such elements should not be
silently replaced by poison values, instead complex analysis should be
implemented to see if it is safe to do it.
Fixes #113425
Added:
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/SLPVectorizer/RISCV/revec-getGatherCost.ll
llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll
llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll
llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll
llvm/test/Transforms/SLPVectorizer/insertelement-across-zero.ll
llvm/test/Transforms/SLPVectorizer/revec.ll
llvm/test/Transforms/SLPVectorizer/splat-buildvector.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 889c4d94ba5e55..f76e913a57d52c 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -9076,14 +9076,14 @@ class BaseShuffleAnalysis {
continue;
ExtMask[Idx] = SV->getMaskValue(I);
}
- bool IsOp1Undef =
- isUndefVector(SV->getOperand(0),
- buildUseMask(LocalVF, ExtMask, UseMask::FirstArg))
- .all();
- bool IsOp2Undef =
- isUndefVector(SV->getOperand(1),
- buildUseMask(LocalVF, ExtMask, UseMask::SecondArg))
- .all();
+ bool IsOp1Undef = isUndefVector</*isPoisonOnly=*/true>(
+ SV->getOperand(0),
+ buildUseMask(LocalVF, ExtMask, UseMask::FirstArg))
+ .all();
+ bool IsOp2Undef = isUndefVector</*isPoisonOnly=*/true>(
+ SV->getOperand(1),
+ buildUseMask(LocalVF, ExtMask, UseMask::SecondArg))
+ .all();
if (!IsOp1Undef && !IsOp2Undef) {
// Update mask and mark undef elems.
for (int &I : Mask) {
@@ -13305,8 +13305,17 @@ Value *BoUpSLP::gather(ArrayRef<Value *> VL, Value *Root, Type *ScalarTy) {
return Vec;
};
auto *VecTy = getWidenedType(ScalarTy, VL.size());
- Value *Vec = Root ? Root : PoisonValue::get(VecTy);
+ Value *Vec = PoisonValue::get(VecTy);
SmallVector<int> NonConsts;
+ SmallVector<int> Mask(VL.size());
+ std::iota(Mask.begin(), Mask.end(), 0);
+ Value *OriginalRoot = Root;
+ if (auto *SV = dyn_cast_or_null<ShuffleVectorInst>(Root);
+ SV && isa<PoisonValue>(SV->getOperand(1)) &&
+ SV->getOperand(0)->getType() == VecTy) {
+ Root = SV->getOperand(0);
+ Mask.assign(SV->getShuffleMask().begin(), SV->getShuffleMask().end());
+ }
// Insert constant values at first.
for (int I = 0, E = VL.size(); I < E; ++I) {
if (PostponedIndices.contains(I))
@@ -13315,19 +13324,20 @@ Value *BoUpSLP::gather(ArrayRef<Value *> VL, Value *Root, Type *ScalarTy) {
NonConsts.push_back(I);
continue;
}
- if (Root) {
- if (!isa<UndefValue>(VL[I])) {
- NonConsts.push_back(I);
- continue;
- }
- if (isa<PoisonValue>(VL[I]))
- continue;
- if (auto *SV = dyn_cast<ShuffleVectorInst>(Root)) {
- if (SV->getMaskValue(I) == PoisonMaskElem)
- continue;
- }
- }
+ if (isa<PoisonValue>(VL[I]))
+ continue;
Vec = CreateInsertElement(Vec, VL[I], I, ScalarTy);
+ Mask[I] = I + E;
+ }
+ if (Root) {
+ if (isa<PoisonValue>(Vec)) {
+ Vec = OriginalRoot;
+ } else {
+ Vec = Builder.CreateShuffleVector(Root, Vec, Mask);
+ if (auto *OI = dyn_cast<Instruction>(OriginalRoot);
+ OI && OI->hasNUses(0))
+ eraseInstruction(OI);
+ }
}
// Insert non-constant values.
for (int I : NonConsts)
@@ -14041,7 +14051,8 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Type *ScalarTy,
if (!ReorderMask.empty())
reorderScalars(GatheredScalars, ReorderMask);
auto FindReusedSplat = [&](MutableArrayRef<int> Mask, unsigned InputVF,
- unsigned I, unsigned SliceSize) {
+ unsigned I, unsigned SliceSize,
+ bool IsNotPoisonous) {
if (!isSplat(E->Scalars) || none_of(E->Scalars, [](Value *V) {
return isa<UndefValue>(V) && !isa<PoisonValue>(V);
}))
@@ -14050,14 +14061,29 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Type *ScalarTy,
unsigned EdgeIdx = E->UserTreeIndices.back().EdgeIdx;
if (UserTE->getNumOperands() != 2)
return false;
- auto *It =
- find_if(VectorizableTree, [=](const std::unique_ptr<TreeEntry> &TE) {
- return find_if(TE->UserTreeIndices, [=](const EdgeInfo &EI) {
- return EI.UserTE == UserTE && EI.EdgeIdx != EdgeIdx;
- }) != TE->UserTreeIndices.end();
- });
- if (It == VectorizableTree.end())
- return false;
+ if (!IsNotPoisonous) {
+ auto *It =
+ find_if(VectorizableTree, [=](const std::unique_ptr<TreeEntry> &TE) {
+ return find_if(TE->UserTreeIndices, [=](const EdgeInfo &EI) {
+ return EI.UserTE == UserTE && EI.EdgeIdx != EdgeIdx;
+ }) != TE->UserTreeIndices.end();
+ });
+ if (It == VectorizableTree.end())
+ return false;
+ SmallVector<Value *> GS((*It)->Scalars.begin(), (*It)->Scalars.end());
+ if (!(*It)->ReorderIndices.empty()) {
+ inversePermutation((*It)->ReorderIndices, ReorderMask);
+ reorderScalars(GS, ReorderMask);
+ }
+ if (!all_of(zip(GatheredScalars, GS), [&](const auto &P) {
+ Value *V0 = std::get<0>(P);
+ Value *V1 = std::get<1>(P);
+ return !isa<UndefValue>(V0) || isa<PoisonValue>(V0) ||
+ (isa<UndefValue>(V0) && !isa<PoisonValue>(V0) &&
+ is_contained(E->Scalars, V1));
+ }))
+ return false;
+ }
int Idx;
if ((Mask.size() < InputVF &&
ShuffleVectorInst::isExtractSubvectorMask(Mask, InputVF, Idx) &&
@@ -14330,12 +14356,13 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Type *ScalarTy,
isGuaranteedNotToBePoison(Vec1) && isGuaranteedNotToBePoison(Vec2);
ShuffleBuilder.add(Vec1, Vec2, ExtractMask);
} else if (Vec1) {
+ bool IsNotPoisonedVec = isGuaranteedNotToBePoison(Vec1);
IsUsedInExpr &= FindReusedSplat(
ExtractMask,
cast<FixedVectorType>(Vec1->getType())->getNumElements(), 0,
- ExtractMask.size());
+ ExtractMask.size(), IsNotPoisonedVec);
ShuffleBuilder.add(Vec1, ExtractMask, /*ForExtracts=*/true);
- IsNonPoisoned &= isGuaranteedNotToBePoison(Vec1);
+ IsNonPoisoned &= IsNotPoisonedVec;
} else {
IsUsedInExpr = false;
ShuffleBuilder.add(PoisonValue::get(VecTy), ExtractMask,
@@ -14358,12 +14385,15 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Type *ScalarTy,
VecMask.assign(VecMask.size(), PoisonMaskElem);
copy(SubMask, std::next(VecMask.begin(), I * SliceSize));
if (TEs.size() == 1) {
- IsUsedInExpr &= FindReusedSplat(
- VecMask, TEs.front()->getVectorFactor(), I, SliceSize);
+ bool IsNotPoisonedVec =
+ TEs.front()->VectorizedValue
+ ? isGuaranteedNotToBePoison(TEs.front()->VectorizedValue)
+ : true;
+ IsUsedInExpr &=
+ FindReusedSplat(VecMask, TEs.front()->getVectorFactor(), I,
+ SliceSize, IsNotPoisonedVec);
ShuffleBuilder.add(*TEs.front(), VecMask);
- if (TEs.front()->VectorizedValue)
- IsNonPoisoned &=
- isGuaranteedNotToBePoison(TEs.front()->VectorizedValue);
+ IsNonPoisoned &= IsNotPoisonedVec;
} else {
IsUsedInExpr = false;
ShuffleBuilder.add(*TEs.front(), *TEs.back(), VecMask);
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/revec-getGatherCost.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/revec-getGatherCost.ll
index a0cb52a853b7e6..2565d5be32e20b 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/revec-getGatherCost.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/revec-getGatherCost.ll
@@ -19,8 +19,7 @@ define void @test1(<4 x float> %load6, <4 x float> %load7, <4 x float> %load8, <
; CHECK-NEXT: [[VEXT309_I:%.*]] = shufflevector <4 x float> [[LOAD7]], <4 x float> [[LOAD8:%.*]], <4 x i32> <i32 2, i32 3, i32 4, i32 5>
; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> poison, <4 x float> [[VEXT165_I]], i64 0)
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP0]], <4 x float> [[VEXT309_I]], i64 4)
-; CHECK-NEXT: [[TMP2:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> poison, <4 x float> poison, i64 4)
-; CHECK-NEXT: [[TMP3:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP2]], <4 x float> [[LOAD17:%.*]], i64 0)
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> poison, <4 x float> [[LOAD17:%.*]], i64 0)
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP5:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> poison, <4 x float> [[FMULADD7:%.*]], i64 0)
; CHECK-NEXT: [[TMP6:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP5]], <4 x float> [[FMULADD16:%.*]], i64 4)
@@ -58,8 +57,7 @@ define void @test2(<8 x float> %load6, <8 x float> %load7, <8 x float> %load8, <
; CHECK-NEXT: [[VEXT309_I:%.*]] = shufflevector <8 x float> [[LOAD7]], <8 x float> [[LOAD8:%.*]], <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
; CHECK-NEXT: [[TMP0:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> [[VEXT165_I]], i64 0)
; CHECK-NEXT: [[TMP1:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> [[TMP0]], <8 x float> [[VEXT309_I]], i64 8)
-; CHECK-NEXT: [[TMP2:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> poison, i64 8)
-; CHECK-NEXT: [[TMP3:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> [[TMP2]], <8 x float> [[LOAD17:%.*]], i64 0)
+; CHECK-NEXT: [[TMP3:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> [[LOAD17:%.*]], i64 0)
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x float> [[TMP3]], <16 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[TMP5:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> [[FMULADD7:%.*]], i64 0)
; CHECK-NEXT: [[TMP6:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> [[TMP5]], <8 x float> [[FMULADD16:%.*]], i64 8)
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll
index d822a24220df2c..65d0078080d227 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll
@@ -7,8 +7,7 @@ define i32 @test() {
; CHECK-NEXT: br label [[IF_END_I87:%.*]]
; CHECK: if.end.i87:
; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> getelementptr (i32, <4 x ptr> <ptr inttoptr (i64 64036 to ptr), ptr inttoptr (i64 64036 to ptr), ptr inttoptr (i64 64064 to ptr), ptr inttoptr (i64 64064 to ptr)>, <4 x i64> <i64 0, i64 1, i64 0, i64 1>), i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
-; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> poison, <2 x i32> poison, i64 0)
-; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> [[TMP1]], <2 x i32> zeroinitializer, i64 2)
+; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> poison, <2 x i32> zeroinitializer, i64 2)
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
; CHECK-NEXT: switch i32 0, label [[SW_BB509_I:%.*]] [
; CHECK-NEXT: i32 1, label [[SW_BB509_I]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll b/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll
index 3d12da79ebf952..81845fed1134c3 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll
@@ -16,8 +16,8 @@ define i32 @bar() local_unnamed_addr {
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i32> [[TMP2]], i32 [[SUB86_1]], i32 7
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <16 x i32> [[TMP3]], i32 [[ADD78_2]], i32 9
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x i32> [[TMP4]], <16 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 9, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x i32> [[TMP4]], <16 x i32> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 7, i32 6, i32 5, i32 4, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x i32> [[TMP6]], i32 [[SUB102_3]], i32 12
+; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <16 x i32> [[TMP4]], <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 poison, i32 poison, i32 poison, i32 poison, i32 undef, i32 undef, i32 undef, i32 undef, i32 poison, i32 undef, i32 undef, i32 poison>, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 7, i32 6, i32 5, i32 4, i32 24, i32 25, i32 26, i32 27, i32 poison, i32 29, i32 30, i32 poison>
+; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x i32> [[TMP18]], i32 [[SUB102_3]], i32 12
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i32> [[TMP7]], <16 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 12>
; CHECK-NEXT: [[TMP9:%.*]] = add nsw <16 x i32> [[TMP5]], [[TMP8]]
; CHECK-NEXT: [[TMP10:%.*]] = sub nsw <16 x i32> [[TMP5]], [[TMP8]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll b/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll
index a439db003f7b34..049bb2e5073fe4 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll
@@ -8,7 +8,7 @@ define void @fextr(ptr %ptr) {
; CHECK-NEXT: [[LD:%.*]] = load <8 x i16>, ptr undef, align 16
; CHECK-NEXT: br label [[T:%.*]]
; CHECK: t:
-; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x i16> [[LD]], <8 x i16> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x i16> [[LD]], <8 x i16> <i16 poison, i16 undef, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison>, <8 x i32> <i32 0, i32 9, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
; CHECK-NEXT: [[TMP1:%.*]] = add <8 x i16> [[LD]], [[TMP0]]
; CHECK-NEXT: store <8 x i16> [[TMP1]], ptr [[PTR:%.*]], align 2
; CHECK-NEXT: ret void
diff --git a/llvm/test/Transforms/SLPVectorizer/insertelement-across-zero.ll b/llvm/test/Transforms/SLPVectorizer/insertelement-across-zero.ll
index 28afa40640bf63..dbf24bc7721c95 100644
--- a/llvm/test/Transforms/SLPVectorizer/insertelement-across-zero.ll
+++ b/llvm/test/Transforms/SLPVectorizer/insertelement-across-zero.ll
@@ -8,11 +8,11 @@ define void @test(i8 %0, i8 %1) {
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr getelementptr (i8, ptr null, i32 8), align 1
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> poison, <16 x i32> <i32 7, i32 6, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 poison, i32 poison>
; CHECK-NEXT: [[LUPTO132421:%.*]] = shufflevector <16 x i8> zeroinitializer, <16 x i8> [[TMP3]], <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 14, i32 15>
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <16 x i8> [[LUPTO132421]], i8 [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x i8> [[LUPTO132421]], <16 x i8> <i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 0, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 23, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <16 x i8> [[TMP6]], i8 [[TMP0]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x i8> [[TMP4]], i8 [[TMP1]], i32 1
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <16 x i8> [[TMP5]], i8 0, i32 7
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i8> [[TMP6]], <16 x i8> [[TMP7]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 17, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i8> [[TMP5]], <16 x i8> [[TMP7]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 17, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x i8> [[TMP8]], <16 x i8> poison, <16 x i32> <i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 9, i32 0, i32 0, i32 0, i32 0, i32 0, i32 15>
; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i8> zeroinitializer, [[TMP9]]
; CHECK-NEXT: ret void
diff --git a/llvm/test/Transforms/SLPVectorizer/revec.ll b/llvm/test/Transforms/SLPVectorizer/revec.ll
index 1d7a48981a3b9c..f32e315142767f 100644
--- a/llvm/test/Transforms/SLPVectorizer/revec.ll
+++ b/llvm/test/Transforms/SLPVectorizer/revec.ll
@@ -93,14 +93,12 @@ define void @test4(ptr %in, ptr %out) {
; CHECK-LABEL: @test4(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x float>, ptr [[IN:%.*]], align 4
-; CHECK-NEXT: [[TMP1:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> poison, i64 8)
-; CHECK-NEXT: [[TMP2:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> [[TMP1]], <8 x float> [[TMP0]], i64 0)
+; CHECK-NEXT: [[TMP2:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> [[TMP0]], i64 0)
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x float> [[TMP2]], <16 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[TMP4:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> zeroinitializer, i64 0)
; CHECK-NEXT: [[TMP5:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> [[TMP4]], <8 x float> zeroinitializer, i64 8)
; CHECK-NEXT: [[TMP6:%.*]] = fmul <16 x float> [[TMP3]], [[TMP5]]
-; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> poison, i64 0)
-; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> [[TMP7]], <8 x float> zeroinitializer, i64 8)
+; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> zeroinitializer, i64 8)
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x float> [[TMP2]], <16 x float> [[TMP8]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
; CHECK-NEXT: [[TMP10:%.*]] = fadd <16 x float> [[TMP9]], [[TMP6]]
; CHECK-NEXT: [[TMP11:%.*]] = fcmp ogt <16 x float> [[TMP10]], [[TMP5]]
@@ -153,20 +151,14 @@ define <4 x i1> @test6(ptr %in1, ptr %in2) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[IN1:%.*]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[IN2:%.*]], align 2
-; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v4i32(<16 x i32> poison, <4 x i32> poison, i64 4)
-; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v4i32(<16 x i32> [[TMP2]], <4 x i32> poison, i64 8)
-; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v4i32(<16 x i32> [[TMP3]], <4 x i32> poison, i64 12)
-; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v4i32(<16 x i32> [[TMP4]], <4 x i32> [[TMP0]], i64 0)
+; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v4i32(<16 x i32> poison, <4 x i32> [[TMP0]], i64 0)
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v4i32(<16 x i32> poison, <4 x i32> zeroinitializer, i64 0)
; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v4i32(<16 x i32> [[TMP7]], <4 x i32> zeroinitializer, i64 4)
; CHECK-NEXT: [[TMP9:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v4i32(<16 x i32> [[TMP8]], <4 x i32> zeroinitializer, i64 8)
; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v4i32(<16 x i32> [[TMP9]], <4 x i32> zeroinitializer, i64 12)
; CHECK-NEXT: [[TMP11:%.*]] = icmp ugt <16 x i32> [[TMP6]], [[TMP10]]
-; CHECK-NEXT: [[TMP12:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> poison, <4 x i16> poison, i64 4)
-; CHECK-NEXT: [[TMP13:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> [[TMP12]], <4 x i16> poison, i64 8)
-; CHECK-NEXT: [[TMP14:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> [[TMP13]], <4 x i16> poison, i64 12)
-; CHECK-NEXT: [[TMP15:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> [[TMP14]], <4 x i16> [[TMP1]], i64 0)
+; CHECK-NEXT: [[TMP15:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> poison, <4 x i16> [[TMP1]], i64 0)
; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <16 x i16> [[TMP15]], <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP17:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> poison, <4 x i16> zeroinitializer, i64 0)
; CHECK-NEXT: [[TMP18:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> [[TMP17]], <4 x i16> zeroinitializer, i64 4)
@@ -301,8 +293,7 @@ define void @test10() {
; CHECK-LABEL: @test10(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr null, align 1
-; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i8> @llvm.vector.insert.v32i8.v16i8(<32 x i8> poison, <16 x i8> poison, i64 16)
-; CHECK-NEXT: [[TMP2:%.*]] = call <32 x i8> @llvm.vector.insert.v32i8.v16i8(<32 x i8> [[TMP1]], <16 x i8> [[TMP0]], i64 0)
+; CHECK-NEXT: [[TMP2:%.*]] = call <32 x i8> @llvm.vector.insert.v32i8.v16i8(<32 x i8> poison, <16 x i8> [[TMP0]], i64 0)
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <32 x i8> [[TMP2]], <32 x i8> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <32 x i8> [[TMP2]], <32 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: [[TMP5:%.*]] = sext <16 x i8> [[TMP4]] to <16 x i16>
diff --git a/llvm/test/Transforms/SLPVectorizer/splat-buildvector.ll b/llvm/test/Transforms/SLPVectorizer/splat-buildvector.ll
index 7532cd728225d7..c60ef6a18cbaba 100644
--- a/llvm/test/Transforms/SLPVectorizer/splat-buildvector.ll
+++ b/llvm/test/Transforms/SLPVectorizer/splat-buildvector.ll
@@ -11,7 +11,7 @@ define i8 @foo(i64 %val_i64_57) {
; CHECK-NEXT: [[VAL_I64_58:%.*]] = call i64 @llvm.smax.i64(i64 0, i64 1)
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i64> <i64 0, i64 poison, i64 poison, i64 0>, i64 [[VAL_I64_57]], i32 1
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> [[TMP0]], i64 [[VAL_I64_58]], i32 2
-; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> poison, <4 x i32> <i32 2, i32 2, i32 0, i32 1>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> <i64 poison, i64 poison, i64 poison, i64 undef>, <4 x i32> <i32 2, i32 2, i32 2, i32 7>
; CHECK-NEXT: [[TMP3:%.*]] = icmp ule <4 x i64> [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = icmp sle <4 x i64> [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i1> [[TMP3]], <4 x i1> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
More information about the llvm-commits
mailing list