[llvm] [SLP]Support minbitwidth analisys for buildvector nodes. (PR #88504)
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 12 04:46:40 PDT 2024
https://github.com/alexey-bataev created https://github.com/llvm/llvm-project/pull/88504
Metric: size..text
Program size..text
exp ref diff
test-suite :: MultiSource/Benchmarks/mediabench/gsm/toast/toast.test 42906.00 42986.00 0.2%
test-suite :: MultiSource/Benchmarks/MiBench/telecomm-gsm/telecomm-gsm.test 42909.00 42989.00 0.2%
test-suite :: External/SPEC/CINT2017rate/525.x264_r/525.x264_r.test 664581.00 664661.00 0.0%
test-suite :: External/SPEC/CINT2017speed/625.x264_s/625.x264_s.test 664581.00 664661.00 0.0%
Less is better.
Replaces buildvector <p x in> + trunc <p x in> to <p x im> sequences to
buildvector <p x im> of { trunc in to im } scalars, which is free in
most cases, results in better code.
>From f6e3c5f0f1743f52f11ca755d6a3175a7840d0a0 Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev at outlook.com>
Date: Fri, 12 Apr 2024 11:46:29 +0000
Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20initia?=
=?UTF-8?q?l=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Created using spr 1.3.5
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 328 +++++++++++-------
...ather-buildvector-with-minbitwidth-user.ll | 7 +-
.../AArch64/gather-with-minbith-user.ll | 3 +-
.../AArch64/user-node-not-in-bitwidths.ll | 7 +-
.../SystemZ/minbitwidth-root-trunc.ll | 6 +-
.../X86/minbitwidth-node-with-multi-users.ll | 3 +-
6 files changed, 217 insertions(+), 137 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index df891371fdf758..9d25a8ef3ec09e 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2465,12 +2465,12 @@ class BoUpSLP {
/// which exploits values reused across lanes, and arranges the inserts
/// for ease of later optimization.
template <typename BVTy, typename ResTy, typename... Args>
- ResTy processBuildVector(const TreeEntry *E, Args &...Params);
+ ResTy processBuildVector(const TreeEntry *E, Type *ScalarTy, Args &...Params);
/// Create a new vector from a list of scalar values. Produces a sequence
/// which exploits values reused across lanes, and arranges the inserts
/// for ease of later optimization.
- Value *createBuildVector(const TreeEntry *E);
+ Value *createBuildVector(const TreeEntry *E, Type *ScalarTy);
/// Returns the instruction in the bundle, which can be used as a base point
/// for scheduling. Usually it is the last instruction in the bundle, except
@@ -2534,7 +2534,8 @@ class BoUpSLP {
/// this subtree gets vectorized, we may need to extract the values from the
/// roots. This method calculates the cost of extracting the values.
/// \param ForPoisonSrc true if initial vector is poison, false otherwise.
- InstructionCost getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc) const;
+ InstructionCost getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
+ Type *ScalarTy) const;
/// Set the Builder insert point to one after the last instruction in
/// the bundle
@@ -2542,7 +2543,7 @@ class BoUpSLP {
/// \returns a vector from a collection of scalars in \p VL. if \p Root is not
/// specified, the starting vector value is poison.
- Value *gather(ArrayRef<Value *> VL, Value *Root);
+ Value *gather(ArrayRef<Value *> VL, Value *Root, Type *ScalarTy);
/// \returns whether the VectorizableTree is fully vectorizable and will
/// be beneficial even the tree height is tiny.
@@ -7760,6 +7761,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
bool IsFinalized = false;
SmallVector<int> CommonMask;
SmallVector<PointerUnion<Value *, const TreeEntry *>, 2> InVectors;
+ Type *ScalarTy = nullptr;
const TargetTransformInfo &TTI;
InstructionCost Cost = 0;
SmallDenseSet<Value *> VectorizedVals;
@@ -7789,13 +7791,13 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
InstructionCost getBuildVectorCost(ArrayRef<Value *> VL, Value *Root) {
if ((!Root && allConstant(VL)) || all_of(VL, IsaPred<UndefValue>))
return TTI::TCC_Free;
- auto *VecTy = FixedVectorType::get(VL.front()->getType(), VL.size());
+ auto *VecTy = FixedVectorType::get(ScalarTy, VL.size());
InstructionCost GatherCost = 0;
SmallVector<Value *> Gathers(VL.begin(), VL.end());
// Improve gather cost for gather of loads, if we can group some of the
// loads into vector loads.
InstructionsState S = getSameOpcode(VL, *R.TLI);
- const unsigned Sz = R.DL->getTypeSizeInBits(VL.front()->getType());
+ const unsigned Sz = R.DL->getTypeSizeInBits(ScalarTy);
unsigned MinVF = R.getMinVF(2 * Sz);
if (VL.size() > 2 &&
((S.getOpcode() == Instruction::Load && !S.isAltShuffle()) ||
@@ -7809,7 +7811,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
}))) &&
!all_of(Gathers, [&](Value *V) { return R.getTreeEntry(V); }) &&
!isSplat(Gathers)) {
- InstructionCost BaseCost = R.getGatherCost(Gathers, !Root);
+ InstructionCost BaseCost = R.getGatherCost(Gathers, !Root, ScalarTy);
SetVector<Value *> VectorizedLoads;
SmallVector<std::pair<unsigned, LoadsState>> VectorizedStarts;
SmallVector<unsigned> ScatterVectorized;
@@ -7936,7 +7938,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
VecTy, Mask, CostKind);
}
} else {
- GatherCost += R.getGatherCost(PointerOps, /*ForPoisonSrc=*/true);
+ GatherCost += R.getGatherCost(PointerOps, /*ForPoisonSrc=*/true,
+ PointerOps.front()->getType());
}
}
if (NeedInsertSubvectorAnalysis) {
@@ -7970,18 +7973,19 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
transform(VL, ShuffleMask.begin(), [](Value *V) {
return isa<PoisonValue>(V) ? PoisonMaskElem : 0;
});
- InstructionCost InsertCost = TTI.getVectorInstrCost(
- Instruction::InsertElement, VecTy, CostKind, 0,
- PoisonValue::get(VecTy), *It);
- return InsertCost +
- TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy,
- ShuffleMask, CostKind, /*Index=*/0,
- /*SubTp=*/nullptr, /*Args=*/*It);
+ InstructionCost InsertCost =
+ TTI.getVectorInstrCost(Instruction::InsertElement, VecTy, CostKind, 0,
+ PoisonValue::get(VecTy), *It);
+ return InsertCost + TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast,
+ VecTy, ShuffleMask, CostKind,
+ /*Index=*/0, /*SubTp=*/nullptr,
+ /*Args=*/*It);
}
return GatherCost +
(all_of(Gathers, IsaPred<UndefValue>)
? TTI::TCC_Free
- : R.getGatherCost(Gathers, !Root && VL.equals(Gathers)));
+ : R.getGatherCost(Gathers, !Root && VL.equals(Gathers),
+ ScalarTy));
};
/// Compute the cost of creating a vector containing the extracted values from
@@ -8001,8 +8005,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
return Sz;
return std::max(Sz, VecTy->getNumElements());
});
- unsigned NumSrcRegs = TTI.getNumberOfParts(
- FixedVectorType::get(VL.front()->getType(), NumElts));
+ unsigned NumSrcRegs =
+ TTI.getNumberOfParts(FixedVectorType::get(ScalarTy, NumElts));
if (NumSrcRegs == 0)
NumSrcRegs = 1;
// FIXME: this must be moved to TTI for better estimation.
@@ -8048,17 +8052,16 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
std::optional<TTI::ShuffleKind> RegShuffleKind =
CheckPerRegistersShuffle(SubMask);
if (!RegShuffleKind) {
- Cost += ::getShuffleCost(
- TTI, *ShuffleKinds[Part],
- FixedVectorType::get(VL.front()->getType(), NumElts), MaskSlice);
+ Cost += ::getShuffleCost(TTI, *ShuffleKinds[Part],
+ FixedVectorType::get(ScalarTy, NumElts),
+ MaskSlice);
continue;
}
if (*RegShuffleKind != TTI::SK_PermuteSingleSrc ||
!ShuffleVectorInst::isIdentityMask(SubMask, EltsPerVector)) {
- Cost += ::getShuffleCost(
- TTI, *RegShuffleKind,
- FixedVectorType::get(VL.front()->getType(), EltsPerVector),
- SubMask);
+ Cost += ::getShuffleCost(TTI, *RegShuffleKind,
+ FixedVectorType::get(ScalarTy, EltsPerVector),
+ SubMask);
}
}
return Cost;
@@ -8175,6 +8178,48 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
SmallVector<int> CommonMask(Mask.begin(), Mask.end());
Value *V1 = P1.dyn_cast<Value *>(), *V2 = P2.dyn_cast<Value *>();
unsigned CommonVF = Mask.size();
+ InstructionCost ExtraCost = 0;
+ auto GetNodeMinBWAffectedCost = [&](const TreeEntry &E,
+ unsigned VF) -> InstructionCost {
+ if (E.State == TreeEntry::NeedToGather && allConstant(E.Scalars))
+ return TTI::TCC_Free;
+ Type *EScalarTy = E.Scalars.front()->getType();
+ bool IsSigned = true;
+ if (auto It = R.MinBWs.find(&E); It != R.MinBWs.end()) {
+ EScalarTy = IntegerType::get(EScalarTy->getContext(), It->second.first);
+ IsSigned = It->second.second;
+ }
+ if (EScalarTy != ScalarTy) {
+ unsigned CastOpcode = Instruction::Trunc;
+ unsigned DstSz = R.DL->getTypeSizeInBits(ScalarTy);
+ unsigned SrcSz = R.DL->getTypeSizeInBits(EScalarTy);
+ if (DstSz > SrcSz)
+ CastOpcode = IsSigned ? Instruction::SExt : Instruction::ZExt;
+ return TTI.getCastInstrCost(
+ CastOpcode, FixedVectorType::get(ScalarTy, VF),
+ FixedVectorType::get(EScalarTy, VF), TTI::CastContextHint::None,
+ TTI::TCK_RecipThroughput);
+ }
+ return TTI::TCC_Free;
+ };
+ auto GetValueMinBWAffectedCost = [&](const Value *V) -> InstructionCost {
+ if (isa<Constant>(V))
+ return TTI::TCC_Free;
+ auto *VecTy = cast<VectorType>(V->getType());
+ Type *EScalarTy = VecTy->getElementType();
+ if (EScalarTy != ScalarTy) {
+ bool IsSigned = !isKnownNonNegative(V, SimplifyQuery(*R.DL));
+ unsigned CastOpcode = Instruction::Trunc;
+ unsigned DstSz = R.DL->getTypeSizeInBits(ScalarTy);
+ unsigned SrcSz = R.DL->getTypeSizeInBits(EScalarTy);
+ if (DstSz > SrcSz)
+ CastOpcode = IsSigned ? Instruction::SExt : Instruction::ZExt;
+ return TTI.getCastInstrCost(
+ CastOpcode, VectorType::get(ScalarTy, VecTy->getElementCount()),
+ VecTy, TTI::CastContextHint::None, TTI::TCK_RecipThroughput);
+ }
+ return TTI::TCC_Free;
+ };
if (!V1 && !V2 && !P2.isNull()) {
// Shuffle 2 entry nodes.
const TreeEntry *E = P1.get<const TreeEntry *>();
@@ -8201,11 +8246,14 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
}
}
CommonVF = E->Scalars.size();
+ ExtraCost += GetNodeMinBWAffectedCost(*E, CommonVF) +
+ GetNodeMinBWAffectedCost(*E2, CommonVF);
+ } else {
+ ExtraCost += GetNodeMinBWAffectedCost(*E, E->getVectorFactor()) +
+ GetNodeMinBWAffectedCost(*E2, E2->getVectorFactor());
}
- V1 = Constant::getNullValue(
- FixedVectorType::get(E->Scalars.front()->getType(), CommonVF));
- V2 = getAllOnesValue(
- *R.DL, FixedVectorType::get(E->Scalars.front()->getType(), CommonVF));
+ V1 = Constant::getNullValue(FixedVectorType::get(ScalarTy, CommonVF));
+ V2 = getAllOnesValue(*R.DL, FixedVectorType::get(ScalarTy, CommonVF));
} else if (!V1 && P2.isNull()) {
// Shuffle single entry node.
const TreeEntry *E = P1.get<const TreeEntry *>();
@@ -8224,8 +8272,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
}
CommonVF = E->Scalars.size();
}
- V1 = Constant::getNullValue(
- FixedVectorType::get(E->Scalars.front()->getType(), CommonVF));
+ ExtraCost += GetNodeMinBWAffectedCost(*E, CommonVF);
+ V1 = Constant::getNullValue(FixedVectorType::get(ScalarTy, CommonVF));
// Not identity/broadcast? Try to see if the original vector is better.
if (!E->ReorderIndices.empty() && CommonVF == E->ReorderIndices.size() &&
CommonVF == CommonMask.size() &&
@@ -8242,6 +8290,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
}
} else if (V1 && P2.isNull()) {
// Shuffle single vector.
+ ExtraCost += GetValueMinBWAffectedCost(V1);
CommonVF = cast<FixedVectorType>(V1->getType())->getNumElements();
assert(
all_of(Mask,
@@ -8268,11 +8317,11 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
}
CommonVF = VF;
}
- V1 = Constant::getNullValue(
- FixedVectorType::get(E2->Scalars.front()->getType(), CommonVF));
- V2 = getAllOnesValue(
- *R.DL,
- FixedVectorType::get(E2->Scalars.front()->getType(), CommonVF));
+ ExtraCost += GetValueMinBWAffectedCost(V1);
+ V1 = Constant::getNullValue(FixedVectorType::get(ScalarTy, CommonVF));
+ ExtraCost += GetNodeMinBWAffectedCost(
+ *E2, std::min(CommonVF, E2->getVectorFactor()));
+ V2 = getAllOnesValue(*R.DL, FixedVectorType::get(ScalarTy, CommonVF));
} else if (!V1 && V2) {
// Shuffle vector and tree node.
unsigned VF = cast<FixedVectorType>(V2->getType())->getNumElements();
@@ -8296,11 +8345,11 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
}
CommonVF = VF;
}
- V1 = Constant::getNullValue(
- FixedVectorType::get(E1->Scalars.front()->getType(), CommonVF));
- V2 = getAllOnesValue(
- *R.DL,
- FixedVectorType::get(E1->Scalars.front()->getType(), CommonVF));
+ ExtraCost += GetNodeMinBWAffectedCost(
+ *E1, std::min(CommonVF, E1->getVectorFactor()));
+ V1 = Constant::getNullValue(FixedVectorType::get(ScalarTy, CommonVF));
+ ExtraCost += GetValueMinBWAffectedCost(V2);
+ V2 = getAllOnesValue(*R.DL, FixedVectorType::get(ScalarTy, CommonVF));
} else {
assert(V1 && V2 && "Expected both vectors.");
unsigned VF = cast<FixedVectorType>(V1->getType())->getNumElements();
@@ -8311,30 +8360,33 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
return Idx < 2 * static_cast<int>(CommonVF);
}) &&
"All elements in mask must be less than 2 * CommonVF.");
+ ExtraCost +=
+ GetValueMinBWAffectedCost(V1) + GetValueMinBWAffectedCost(V2);
if (V1->getType() != V2->getType()) {
- V1 = Constant::getNullValue(FixedVectorType::get(
- cast<FixedVectorType>(V1->getType())->getElementType(), CommonVF));
- V2 = getAllOnesValue(
- *R.DL, FixedVectorType::get(
- cast<FixedVectorType>(V1->getType())->getElementType(),
- CommonVF));
+ V1 = Constant::getNullValue(FixedVectorType::get(ScalarTy, CommonVF));
+ V2 = getAllOnesValue(*R.DL, FixedVectorType::get(ScalarTy, CommonVF));
+ } else {
+ if (cast<VectorType>(V1->getType())->getElementType() != ScalarTy)
+ V1 = Constant::getNullValue(FixedVectorType::get(ScalarTy, CommonVF));
+ if (cast<VectorType>(V2->getType())->getElementType() != ScalarTy)
+ V2 = getAllOnesValue(*R.DL, FixedVectorType::get(ScalarTy, CommonVF));
}
}
- InVectors.front() = Constant::getNullValue(FixedVectorType::get(
- cast<FixedVectorType>(V1->getType())->getElementType(),
- CommonMask.size()));
+ InVectors.front() = Constant::getNullValue(
+ FixedVectorType::get(ScalarTy, CommonMask.size()));
if (InVectors.size() == 2)
InVectors.pop_back();
- return BaseShuffleAnalysis::createShuffle<InstructionCost>(
- V1, V2, CommonMask, Builder);
+ return ExtraCost + BaseShuffleAnalysis::createShuffle<InstructionCost>(
+ V1, V2, CommonMask, Builder);
}
public:
- ShuffleCostEstimator(TargetTransformInfo &TTI,
+ ShuffleCostEstimator(Type *ScalarTy, TargetTransformInfo &TTI,
ArrayRef<Value *> VectorizedVals, BoUpSLP &R,
SmallPtrSetImpl<Value *> &CheckedExtracts)
- : TTI(TTI), VectorizedVals(VectorizedVals.begin(), VectorizedVals.end()),
- R(R), CheckedExtracts(CheckedExtracts) {}
+ : ScalarTy(ScalarTy), TTI(TTI),
+ VectorizedVals(VectorizedVals.begin(), VectorizedVals.end()), R(R),
+ CheckedExtracts(CheckedExtracts) {}
Value *adjustExtracts(const TreeEntry *E, MutableArrayRef<int> Mask,
ArrayRef<std::optional<TTI::ShuffleKind>> ShuffleKinds,
unsigned NumParts, bool &UseVecBaseAsInput) {
@@ -8424,7 +8476,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
if (NumParts != 1 && UniqueBases.size() != 1) {
UseVecBaseAsInput = true;
VecBase = Constant::getNullValue(
- FixedVectorType::get(VL.front()->getType(), CommonMask.size()));
+ FixedVectorType::get(ScalarTy, CommonMask.size()));
}
return VecBase;
}
@@ -8452,8 +8504,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
return;
}
assert(!CommonMask.empty() && "Expected non-empty common mask.");
- auto *MaskVecTy =
- FixedVectorType::get(E1.Scalars.front()->getType(), Mask.size());
+ auto *MaskVecTy = FixedVectorType::get(ScalarTy, Mask.size());
unsigned NumParts = TTI.getNumberOfParts(MaskVecTy);
if (NumParts == 0 || NumParts >= Mask.size())
NumParts = 1;
@@ -8470,8 +8521,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
return;
}
assert(!CommonMask.empty() && "Expected non-empty common mask.");
- auto *MaskVecTy =
- FixedVectorType::get(E1.Scalars.front()->getType(), Mask.size());
+ auto *MaskVecTy = FixedVectorType::get(ScalarTy, Mask.size());
unsigned NumParts = TTI.getNumberOfParts(MaskVecTy);
if (NumParts == 0 || NumParts >= Mask.size())
NumParts = 1;
@@ -8571,7 +8621,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
return ConstantVector::getSplat(
ElementCount::getFixed(
cast<FixedVectorType>(Root->getType())->getNumElements()),
- getAllOnesValue(*R.DL, VL.front()->getType()));
+ getAllOnesValue(*R.DL, ScalarTy));
}
InstructionCost createFreeze(InstructionCost Cost) { return Cost; }
/// Finalize emission of the shuffles.
@@ -8717,7 +8767,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
if (isa<InsertElementInst>(VL[0]))
return InstructionCost::getInvalid();
return processBuildVector<ShuffleCostEstimator, InstructionCost>(
- E, *TTI, VectorizedVals, *this, CheckedExtracts);
+ E, ScalarTy, *TTI, VectorizedVals, *this, CheckedExtracts);
}
InstructionCost CommonCost = 0;
SmallVector<int> Mask;
@@ -10726,12 +10776,8 @@ BoUpSLP::isGatherShuffledEntry(
return Res;
}
-InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL,
- bool ForPoisonSrc) const {
- // Find the type of the operands in VL.
- Type *ScalarTy = VL[0]->getType();
- if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
- ScalarTy = SI->getValueOperand()->getType();
+InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
+ Type *ScalarTy) const {
auto *VecTy = FixedVectorType::get(ScalarTy, VL.size());
bool DuplicateNonConst = false;
// Find the cost of inserting/extracting values from the vector.
@@ -10742,6 +10788,11 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL,
constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
InstructionCost Cost;
auto EstimateInsertCost = [&](unsigned I, Value *V) {
+ if (V->getType() != ScalarTy) {
+ Cost += TTI->getCastInstrCost(Instruction::Trunc, ScalarTy, V->getType(),
+ TTI::CastContextHint::None, CostKind);
+ V = nullptr;
+ }
if (!ForPoisonSrc)
Cost +=
TTI->getVectorInstrCost(Instruction::InsertElement, VecTy, CostKind,
@@ -10966,7 +11017,7 @@ void BoUpSLP::setInsertPointAfterBundle(const TreeEntry *E) {
Builder.SetCurrentDebugLocation(Front->getDebugLoc());
}
-Value *BoUpSLP::gather(ArrayRef<Value *> VL, Value *Root) {
+Value *BoUpSLP::gather(ArrayRef<Value *> VL, Value *Root, Type *ScalarTy) {
// List of instructions/lanes from current block and/or the blocks which are
// part of the current loop. These instructions will be inserted at the end to
// make it possible to optimize loops and hoist invariant instructions out of
@@ -10992,14 +11043,11 @@ Value *BoUpSLP::gather(ArrayRef<Value *> VL, Value *Root) {
auto &&CreateInsertElement = [this](Value *Vec, Value *V, unsigned Pos,
Type *Ty) {
Value *Scalar = V;
- if (cast<VectorType>(Vec->getType())->getElementType() != Ty) {
- assert(V->getType()->isIntegerTy() && Ty->isIntegerTy() &&
+ if (Scalar->getType() != Ty) {
+ assert(Scalar->getType()->isIntegerTy() && Ty->isIntegerTy() &&
"Expected integer types only.");
- Vec = Builder.CreateIntCast(
- Vec,
- VectorType::get(Ty,
- cast<VectorType>(Vec->getType())->getElementCount()),
- !isKnownNonNegative(Vec, SimplifyQuery(*DL)));
+ Scalar = Builder.CreateIntCast(
+ Scalar, Ty, !isKnownNonNegative(Scalar, SimplifyQuery(*DL)));
}
Vec = Builder.CreateInsertElement(Vec, Scalar, Builder.getInt32(Pos));
@@ -11027,10 +11075,7 @@ Value *BoUpSLP::gather(ArrayRef<Value *> VL, Value *Root) {
}
return Vec;
};
- Value *Val0 =
- isa<StoreInst>(VL[0]) ? cast<StoreInst>(VL[0])->getValueOperand() : VL[0];
- Type *ScalarTy = Val0->getType();
- FixedVectorType *VecTy = FixedVectorType::get(ScalarTy, VL.size());
+ auto *VecTy = FixedVectorType::get(ScalarTy, VL.size());
Value *Vec = Root ? Root : PoisonValue::get(VecTy);
SmallVector<int> NonConsts;
// Insert constant values at first.
@@ -11109,6 +11154,7 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
/// resulting shuffle and the second operand sets to be the newly added
/// operand. The \p CommonMask is transformed in the proper way after that.
SmallVector<Value *, 2> InVectors;
+ Type *ScalarTy = nullptr;
IRBuilderBase &Builder;
BoUpSLP &R;
@@ -11219,9 +11265,20 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
CommonMask[Idx] = Idx;
}
+ /// Cast value \p V to the vector type with the same number of elements, but
+ /// the base type \p ScalarTy.
+ Value *castToScalarTyElem(Value *V) {
+ auto *VecTy = cast<VectorType>(V->getType());
+ if (VecTy->getElementType() == ScalarTy)
+ return V;
+ return Builder.CreateIntCast(
+ V, VectorType::get(ScalarTy, VecTy->getElementCount()),
+ !isKnownNonNegative(V, SimplifyQuery(*R.DL)));
+ }
+
public:
- ShuffleInstructionBuilder(IRBuilderBase &Builder, BoUpSLP &R)
- : Builder(Builder), R(R) {}
+ ShuffleInstructionBuilder(Type *ScalarTy, IRBuilderBase &Builder, BoUpSLP &R)
+ : ScalarTy(ScalarTy), Builder(Builder), R(R) {}
/// Adjusts extractelements after reusing them.
Value *adjustExtracts(const TreeEntry *E, MutableArrayRef<int> Mask,
@@ -11258,8 +11315,10 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
continue;
R.eraseInstruction(EI);
}
- if (NumParts == 1 || UniqueBases.size() == 1)
+ if (NumParts == 1 || UniqueBases.size() == 1) {
+ VecBase = castToScalarTyElem(VecBase);
return VecBase;
+ }
UseVecBaseAsInput = true;
auto TransformToIdentity = [](MutableArrayRef<int> Mask) {
for (auto [I, Idx] : enumerate(Mask))
@@ -11296,6 +11355,7 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
"Expected vectors of the same size.");
PrevSize = Size;
#endif // NDEBUG
+ VecOp = castToScalarTyElem(VecOp);
Bases[SubMask[I] < Size ? 0 : 1] = VecOp;
}
if (!Bases.front())
@@ -11351,10 +11411,10 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
return std::nullopt;
// Postpone gather emission, will be emitted after the end of the
// process to keep correct order.
- auto *VecTy = FixedVectorType::get(E->Scalars.front()->getType(),
- E->getVectorFactor());
+ auto *ResVecTy = FixedVectorType::get(ScalarTy, E->getVectorFactor());
return Builder.CreateAlignedLoad(
- VecTy, PoisonValue::get(PointerType::getUnqual(VecTy->getContext())),
+ ResVecTy,
+ PoisonValue::get(PointerType::getUnqual(ScalarTy->getContext())),
MaybeAlign());
}
/// Adds 2 input vectors (in form of tree entries) and the mask for their
@@ -11370,6 +11430,8 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
/// Adds 2 input vectors and the mask for their shuffling.
void add(Value *V1, Value *V2, ArrayRef<int> Mask) {
assert(V1 && V2 && !Mask.empty() && "Expected non-empty input vectors.");
+ V1 = castToScalarTyElem(V1);
+ V2 = castToScalarTyElem(V2);
if (InVectors.empty()) {
InVectors.push_back(V1);
InVectors.push_back(V2);
@@ -11397,6 +11459,7 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
}
/// Adds another one input vector and the mask for the shuffling.
void add(Value *V1, ArrayRef<int> Mask, bool = false) {
+ V1 = castToScalarTyElem(V1);
if (InVectors.empty()) {
if (!isa<FixedVectorType>(V1->getType())) {
V1 = createShuffle(V1, nullptr, CommonMask);
@@ -11460,7 +11523,7 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
}
Value *gather(ArrayRef<Value *> VL, unsigned MaskVF = 0,
Value *Root = nullptr) {
- return R.gather(VL, Root);
+ return R.gather(VL, Root, ScalarTy);
}
Value *createFreeze(Value *V) { return Builder.CreateFreeze(V); }
/// Finalize emission of the shuffles.
@@ -11560,7 +11623,8 @@ Value *BoUpSLP::vectorizeOperand(TreeEntry *E, unsigned NodeIdx,
}
if (IsSameVE) {
auto FinalShuffle = [&](Value *V, ArrayRef<int> Mask) {
- ShuffleInstructionBuilder ShuffleBuilder(Builder, *this);
+ ShuffleInstructionBuilder ShuffleBuilder(
+ cast<VectorType>(V->getType())->getElementType(), Builder, *this);
ShuffleBuilder.add(V, Mask);
return ShuffleBuilder.finalize(std::nullopt);
};
@@ -11635,7 +11699,8 @@ Value *BoUpSLP::vectorizeOperand(TreeEntry *E, unsigned NodeIdx,
}
template <typename BVTy, typename ResTy, typename... Args>
-ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
+ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Type *ScalarTy,
+ Args &...Params) {
assert(E->State == TreeEntry::NeedToGather && "Expected gather node.");
unsigned VF = E->getVectorFactor();
@@ -11683,7 +11748,7 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
}
return true;
};
- BVTy ShuffleBuilder(Params...);
+ BVTy ShuffleBuilder(ScalarTy, Params...);
ResTy Res = ResTy();
SmallVector<int> Mask;
SmallVector<int> ExtractMask(GatheredScalars.size(), PoisonMaskElem);
@@ -11692,7 +11757,7 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
bool UseVecBaseAsInput = false;
SmallVector<std::optional<TargetTransformInfo::ShuffleKind>> GatherShuffles;
SmallVector<SmallVector<const TreeEntry *>> Entries;
- Type *ScalarTy = GatheredScalars.front()->getType();
+ Type *OrigScalarTy = GatheredScalars.front()->getType();
auto *VecTy = FixedVectorType::get(ScalarTy, GatheredScalars.size());
unsigned NumParts = TTI->getNumberOfParts(VecTy);
if (NumParts == 0 || NumParts >= GatheredScalars.size())
@@ -11727,7 +11792,7 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
GatheredScalars.size() != VF) {
Resized = true;
GatheredScalars.append(VF - GatheredScalars.size(),
- PoisonValue::get(ScalarTy));
+ PoisonValue::get(OrigScalarTy));
}
}
}
@@ -11787,12 +11852,12 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
});
}))
GatheredScalars.append(VF - GatheredScalars.size(),
- PoisonValue::get(ScalarTy));
+ PoisonValue::get(OrigScalarTy));
}
// Remove shuffled elements from list of gathers.
for (int I = 0, Sz = Mask.size(); I < Sz; ++I) {
if (Mask[I] != PoisonMaskElem)
- GatheredScalars[I] = PoisonValue::get(ScalarTy);
+ GatheredScalars[I] = PoisonValue::get(OrigScalarTy);
}
}
}
@@ -11803,7 +11868,7 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
// such sequences.
bool IsSplat = IsRootPoison && isSplat(Scalars) &&
(Scalars.size() > 2 || Scalars.front() == Scalars.back());
- Scalars.append(VF - Scalars.size(), PoisonValue::get(ScalarTy));
+ Scalars.append(VF - Scalars.size(), PoisonValue::get(OrigScalarTy));
SmallVector<int> UndefPos;
DenseMap<Value *, unsigned> UniquePositions;
// Gather unique non-const values and all constant values.
@@ -11825,7 +11890,7 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
++NumNonConsts;
SinglePos = I;
Value *OrigV = V;
- Scalars[I] = PoisonValue::get(ScalarTy);
+ Scalars[I] = PoisonValue::get(OrigScalarTy);
if (IsSplat) {
Scalars.front() = OrigV;
ReuseMask[I] = 0;
@@ -11841,7 +11906,7 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
ReuseMask.assign(VF, PoisonMaskElem);
std::swap(Scalars.front(), Scalars[SinglePos]);
if (!UndefPos.empty() && UndefPos.front() == 0)
- Scalars.front() = UndefValue::get(ScalarTy);
+ Scalars.front() = UndefValue::get(OrigScalarTy);
}
ReuseMask[SinglePos] = SinglePos;
} else if (!UndefPos.empty() && IsSplat) {
@@ -11871,7 +11936,7 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
// Replace the undef by the poison, in the mask it is replaced by
// non-poisoned scalar already.
if (I != Pos)
- Scalars[I] = PoisonValue::get(ScalarTy);
+ Scalars[I] = PoisonValue::get(OrigScalarTy);
}
} else {
// Replace undefs by the poisons, emit broadcast and then emit
@@ -11879,7 +11944,7 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
for (int I : UndefPos) {
ReuseMask[I] = PoisonMaskElem;
if (isa<UndefValue>(Scalars[I]))
- Scalars[I] = PoisonValue::get(ScalarTy);
+ Scalars[I] = PoisonValue::get(OrigScalarTy);
}
NeedFreeze = true;
}
@@ -11934,9 +11999,8 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
IsNonPoisoned &= isGuaranteedNotToBePoison(Vec1);
} else {
IsUsedInExpr = false;
- ShuffleBuilder.add(PoisonValue::get(FixedVectorType::get(
- ScalarTy, GatheredScalars.size())),
- ExtractMask, /*ForExtracts=*/true);
+ ShuffleBuilder.add(PoisonValue::get(VecTy), ExtractMask,
+ /*ForExtracts=*/true);
}
}
if (!GatherShuffles.empty()) {
@@ -12017,9 +12081,9 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
// contains only constant to build final vector and then shuffle.
for (int I = 0, Sz = GatheredScalars.size(); I < Sz; ++I) {
if (EnoughConstsForShuffle && isa<Constant>(GatheredScalars[I]))
- NonConstants[I] = PoisonValue::get(ScalarTy);
+ NonConstants[I] = PoisonValue::get(OrigScalarTy);
else
- GatheredScalars[I] = PoisonValue::get(ScalarTy);
+ GatheredScalars[I] = PoisonValue::get(OrigScalarTy);
}
// Generate constants for final shuffle and build a mask for them.
if (!all_of(GatheredScalars, IsaPred<PoisonValue>)) {
@@ -12065,9 +12129,9 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
return Res;
}
-Value *BoUpSLP::createBuildVector(const TreeEntry *E) {
- return processBuildVector<ShuffleInstructionBuilder, Value *>(E, Builder,
- *this);
+Value *BoUpSLP::createBuildVector(const TreeEntry *E, Type *ScalarTy) {
+ return processBuildVector<ShuffleInstructionBuilder, Value *>(E, ScalarTy,
+ Builder, *this);
}
Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
@@ -12080,18 +12144,28 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
return E->VectorizedValue;
}
+ Value *V = E->Scalars.front();
+ Type *ScalarTy = V->getType();
+ if (auto *Store = dyn_cast<StoreInst>(V))
+ ScalarTy = Store->getValueOperand()->getType();
+ else if (auto *IE = dyn_cast<InsertElementInst>(V))
+ ScalarTy = IE->getOperand(1)->getType();
+ auto It = MinBWs.find(E);
+ if (It != MinBWs.end())
+ ScalarTy = IntegerType::get(F->getContext(), It->second.first);
+ auto *VecTy = FixedVectorType::get(ScalarTy, E->Scalars.size());
if (E->State == TreeEntry::NeedToGather) {
// Set insert point for non-reduction initial nodes.
if (E->getMainOp() && E->Idx == 0 && !UserIgnoreList)
setInsertPointAfterBundle(E);
- Value *Vec = createBuildVector(E);
+ Value *Vec = createBuildVector(E, ScalarTy);
E->VectorizedValue = Vec;
return Vec;
}
bool IsReverseOrder = isReverseOrder(E->ReorderIndices);
auto FinalShuffle = [&](Value *V, const TreeEntry *E, VectorType *VecTy) {
- ShuffleInstructionBuilder ShuffleBuilder(Builder, *this);
+ ShuffleInstructionBuilder ShuffleBuilder(ScalarTy, Builder, *this);
if (E->getOpcode() == Instruction::Store) {
ArrayRef<int> Mask =
ArrayRef(reinterpret_cast<const int *>(E->ReorderIndices.begin()),
@@ -12112,14 +12186,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
unsigned ShuffleOrOp =
E->isAltShuffle() ? (unsigned)Instruction::ShuffleVector : E->getOpcode();
Instruction *VL0 = E->getMainOp();
- Type *ScalarTy = VL0->getType();
- if (auto *Store = dyn_cast<StoreInst>(VL0))
- ScalarTy = Store->getValueOperand()->getType();
- else if (auto *IE = dyn_cast<InsertElementInst>(VL0))
- ScalarTy = IE->getOperand(1)->getType();
- auto It = MinBWs.find(E);
- if (It != MinBWs.end())
- ScalarTy = IntegerType::get(F->getContext(), It->second.first);
auto GetOperandSignedness = [&](unsigned Idx) {
const TreeEntry *OpE = getOperandEntry(E, Idx);
bool IsSigned = false;
@@ -12132,7 +12198,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
});
return IsSigned;
};
- auto *VecTy = FixedVectorType::get(ScalarTy, E->Scalars.size());
switch (ShuffleOrOp) {
case Instruction::PHI: {
assert((E->ReorderIndices.empty() ||
@@ -13324,7 +13389,8 @@ Value *BoUpSLP::vectorizeTree(
else
CombinedMask2[I] = Mask[I] - VF;
}
- ShuffleInstructionBuilder ShuffleBuilder(Builder, *this);
+ ShuffleInstructionBuilder ShuffleBuilder(
+ cast<VectorType>(V1->getType())->getElementType(), Builder, *this);
ShuffleBuilder.add(V1, CombinedMask1);
if (V2)
ShuffleBuilder.add(V2, CombinedMask2);
@@ -14351,13 +14417,27 @@ bool BoUpSLP::collectValuesToDemote(
return false;
bool Res = all_of(
E.Scalars, std::bind(IsPotentiallyTruncated, _1, std::ref(BitWidth)));
- // Gather demoted constant operands.
- if (Res && E.State == TreeEntry::NeedToGather &&
- all_of(E.Scalars, IsaPred<Constant>))
- ToDemote.push_back(E.Idx);
+ // Demote gathers.
+ if (Res && E.State == TreeEntry::NeedToGather) {
+ // Check possible extractelement instructions bases and final vector
+ // length.
+ SmallPtrSet<Value *, 4> UniqueBases;
+ for (Value *V : E.Scalars) {
+ auto *EE = dyn_cast<ExtractElementInst>(V);
+ if (!EE)
+ continue;
+ UniqueBases.insert(EE->getVectorOperand());
+ }
+ const unsigned VF = E.Scalars.size();
+ Type *OrigScalarTy = E.Scalars.front()->getType();
+ if (UniqueBases.size() <= 2 ||
+ TTI->getNumberOfParts(FixedVectorType::get(OrigScalarTy, VF)) ==
+ TTI->getNumberOfParts(FixedVectorType::get(
+ IntegerType::get(OrigScalarTy->getContext(), BitWidth), VF)))
+ ToDemote.push_back(E.Idx);
+ }
return Res;
};
- // TODO: improve handling of gathered values and others.
if (E.State == TreeEntry::NeedToGather || !Visited.insert(&E).second ||
any_of(E.Scalars, [&](Value *V) {
return all_of(V->users(), [&](User *U) {
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-buildvector-with-minbitwidth-user.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-buildvector-with-minbitwidth-user.ll
index 6907724729759f..3771ec4bda88b6 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-buildvector-with-minbitwidth-user.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-buildvector-with-minbitwidth-user.ll
@@ -5,12 +5,7 @@ define void @h() {
; CHECK-LABEL: define void @h() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr i8, ptr null, i64 16
-; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, i32 0, i32 0
-; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i32> [[TMP0]] to <8 x i1>
-; CHECK-NEXT: [[TMP2:%.*]] = or <8 x i1> zeroinitializer, [[TMP1]]
-; CHECK-NEXT: [[TMP4:%.*]] = or <8 x i1> [[TMP2]], zeroinitializer
-; CHECK-NEXT: [[TMP3:%.*]] = zext <8 x i1> [[TMP4]] to <8 x i16>
-; CHECK-NEXT: store <8 x i16> [[TMP3]], ptr [[ARRAYIDX2]], align 2
+; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr [[ARRAYIDX2]], align 2
; CHECK-NEXT: ret void
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-with-minbith-user.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-with-minbith-user.ll
index d51ef0bce3a4e0..76bb882171b174 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-with-minbith-user.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-with-minbith-user.ll
@@ -5,7 +5,8 @@ define void @h() {
; CHECK-LABEL: define void @h() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr i8, ptr null, i64 16
-; CHECK-NEXT: [[TMP0:%.*]] = trunc <8 x i32> zeroinitializer to <8 x i1>
+; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 0 to i1
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i1> <i1 false, i1 false, i1 false, i1 false, i1 poison, i1 false, i1 false, i1 false>, i1 [[TMP6]], i32 4
; CHECK-NEXT: [[TMP1:%.*]] = sub <8 x i1> [[TMP0]], zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = add <8 x i1> [[TMP0]], zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i1> [[TMP1]], <8 x i1> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/user-node-not-in-bitwidths.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/user-node-not-in-bitwidths.ll
index 6404cf4a2cd1d6..2ab6e919c23b2d 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/user-node-not-in-bitwidths.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/user-node-not-in-bitwidths.ll
@@ -5,7 +5,12 @@ define void @h() {
; CHECK-LABEL: define void @h() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr i8, ptr null, i64 16
-; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr [[ARRAYIDX2]], align 2
+; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 0 to i1
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i1> <i1 false, i1 false, i1 false, i1 false, i1 poison, i1 false, i1 false, i1 false>, i1 [[TMP0]], i32 4
+; CHECK-NEXT: [[TMP2:%.*]] = or <8 x i1> zeroinitializer, [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i1> zeroinitializer, [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = zext <8 x i1> [[TMP3]] to <8 x i16>
+; CHECK-NEXT: store <8 x i16> [[TMP4]], ptr [[ARRAYIDX2]], align 2
; CHECK-NEXT: ret void
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/minbitwidth-root-trunc.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/minbitwidth-root-trunc.ll
index 7b4e2b0ce9112e..1bb87bf6205f19 100644
--- a/llvm/test/Transforms/SLPVectorizer/SystemZ/minbitwidth-root-trunc.ll
+++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/minbitwidth-root-trunc.ll
@@ -7,9 +7,9 @@ define void @test(ptr %a, i8 %0, i16 %b.promoted.i) {
; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP0]] to i128
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[B_PROMOTED_I]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i128> poison, i128 [[TMP2]], i32 0
-; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i128> [[TMP5]], <4 x i128> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP7:%.*]] = trunc <4 x i128> [[TMP6]] to <4 x i16>
+; CHECK-NEXT: [[TMP5:%.*]] = trunc i128 [[TMP2]] to i16
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i16> poison, i16 [[TMP5]], i32 0
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i16> [[TMP6]], <4 x i16> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i16> [[TMP4]], [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> [[TMP8]])
; CHECK-NEXT: [[TMP11:%.*]] = zext i16 [[TMP9]] to i64
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-node-with-multi-users.ll b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-node-with-multi-users.ll
index 668d3c3c8c82c5..0ab56279fe47cb 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-node-with-multi-users.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-node-with-multi-users.ll
@@ -16,8 +16,7 @@ define void @test() {
; CHECK-NEXT: [[TMP9:%.*]] = trunc <4 x i8> [[TMP8]] to <4 x i1>
; CHECK-NEXT: [[TMP10:%.*]] = or <4 x i1> zeroinitializer, [[TMP15]]
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <4 x i1> [[TMP9]], [[TMP10]]
-; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <4 x i1> [[TMP15]], <4 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i1> [[TMP16]] to <4 x i32>
+; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i1> [[TMP15]] to <4 x i32>
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP13]])
More information about the llvm-commits
mailing list