[llvm] [VectorCombine] Pull out TargetCostKind argument to allow globally set cost kind value (PR #118652)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 9 03:05:58 PST 2024
https://github.com/RKSimon updated https://github.com/llvm/llvm-project/pull/118652
>From ce7152a96cd491d1add4b83971aabea626cbd1fc Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Wed, 4 Dec 2024 14:59:51 +0000
Subject: [PATCH 1/2] [VectorCombine] Pull out TargetCostKind argument to allow
globally set cost kind value
Some prep work to allow a future patch to potentially use VectorCombine to target code size for -Os/Oz builds (setting TCK_CodeSize instead of TCK_RecipThroughput).
There's still more cleanup to do as a lot of get*Cost calls are relying on the default TargetCostKind value (usually TCK_RecipThroughput but not always).
---
.../Transforms/Vectorize/VectorCombine.cpp | 199 +++++++++---------
1 file changed, 100 insertions(+), 99 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index b9caf8c0df9be1..e655e73fa9e148 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -91,38 +91,40 @@ class VectorCombine {
// TODO: Direct calls from the top-level "run" loop use a plain "Instruction"
// parameter. That should be updated to specific sub-classes because the
// run loop was changed to dispatch on opcode.
- bool vectorizeLoadInsert(Instruction &I);
- bool widenSubvectorLoad(Instruction &I);
+ bool vectorizeLoadInsert(Instruction &I, TTI::TargetCostKind CostKind);
+ bool widenSubvectorLoad(Instruction &I, TTI::TargetCostKind CostKind);
ExtractElementInst *getShuffleExtract(ExtractElementInst *Ext0,
ExtractElementInst *Ext1,
+ TTI::TargetCostKind CostKind,
unsigned PreferredExtractIndex) const;
bool isExtractExtractCheap(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
const Instruction &I,
ExtractElementInst *&ConvertToShuffle,
+ TTI::TargetCostKind CostKind,
unsigned PreferredExtractIndex);
void foldExtExtCmp(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
Instruction &I);
void foldExtExtBinop(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
Instruction &I);
- bool foldExtractExtract(Instruction &I);
- bool foldInsExtFNeg(Instruction &I);
- bool foldInsExtVectorToShuffle(Instruction &I);
- bool foldBitcastShuffle(Instruction &I);
- bool scalarizeBinopOrCmp(Instruction &I);
- bool scalarizeVPIntrinsic(Instruction &I);
- bool foldExtractedCmps(Instruction &I);
+ bool foldExtractExtract(Instruction &I, TTI::TargetCostKind CostKind);
+ bool foldInsExtFNeg(Instruction &I, TTI::TargetCostKind CostKind);
+ bool foldInsExtVectorToShuffle(Instruction &I, TTI::TargetCostKind CostKind);
+ bool foldBitcastShuffle(Instruction &I, TTI::TargetCostKind CostKind);
+ bool scalarizeBinopOrCmp(Instruction &I, TTI::TargetCostKind CostKind);
+ bool scalarizeVPIntrinsic(Instruction &I, TTI::TargetCostKind CostKind);
+ bool foldExtractedCmps(Instruction &I, TTI::TargetCostKind CostKind);
bool foldSingleElementStore(Instruction &I);
- bool scalarizeLoadExtract(Instruction &I);
- bool foldPermuteOfBinops(Instruction &I);
- bool foldShuffleOfBinops(Instruction &I);
- bool foldShuffleOfCastops(Instruction &I);
- bool foldShuffleOfShuffles(Instruction &I);
- bool foldShuffleOfIntrinsics(Instruction &I);
- bool foldShuffleToIdentity(Instruction &I);
+ bool scalarizeLoadExtract(Instruction &I, TTI::TargetCostKind CostKind);
+ bool foldPermuteOfBinops(Instruction &I, TTI::TargetCostKind CostKind);
+ bool foldShuffleOfBinops(Instruction &I, TTI::TargetCostKind CostKind);
+ bool foldShuffleOfCastops(Instruction &I, TTI::TargetCostKind CostKind);
+ bool foldShuffleOfShuffles(Instruction &I, TTI::TargetCostKind CostKind);
+ bool foldShuffleOfIntrinsics(Instruction &I, TTI::TargetCostKind CostKind);
+ bool foldShuffleToIdentity(Instruction &I, TTI::TargetCostKind CostKind);
bool foldShuffleFromReductions(Instruction &I);
- bool foldCastFromReductions(Instruction &I);
+ bool foldCastFromReductions(Instruction &I, TTI::TargetCostKind CostKind);
bool foldSelectShuffle(Instruction &I, bool FromReduction = false);
- bool shrinkType(Instruction &I);
+ bool shrinkType(Instruction &I, TTI::TargetCostKind CostKind);
void replaceValue(Value &Old, Value &New) {
Old.replaceAllUsesWith(&New);
@@ -172,7 +174,8 @@ static bool canWidenLoad(LoadInst *Load, const TargetTransformInfo &TTI) {
return true;
}
-bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
+bool VectorCombine::vectorizeLoadInsert(Instruction &I,
+ TTI::TargetCostKind CostKind) {
// Match insert into fixed vector of scalar value.
// TODO: Handle non-zero insert index.
Value *Scalar;
@@ -249,7 +252,6 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
InstructionCost OldCost =
TTI.getMemoryOpCost(Instruction::Load, LoadTy, Alignment, AS);
APInt DemandedElts = APInt::getOneBitSet(MinVecNumElts, 0);
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
OldCost +=
TTI.getScalarizationOverhead(MinVecTy, DemandedElts,
/* Insert */ true, HasExtract, CostKind);
@@ -293,7 +295,8 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
/// If we are loading a vector and then inserting it into a larger vector with
/// undefined elements, try to load the larger vector and eliminate the insert.
/// This removes a shuffle in IR and may allow combining of other loaded values.
-bool VectorCombine::widenSubvectorLoad(Instruction &I) {
+bool VectorCombine::widenSubvectorLoad(Instruction &I,
+ TTI::TargetCostKind CostKind) {
// Match subvector insert of fixed vector.
auto *Shuf = cast<ShuffleVectorInst>(&I);
if (!Shuf->isIdentityWithPadding())
@@ -329,11 +332,11 @@ bool VectorCombine::widenSubvectorLoad(Instruction &I) {
// undef value is 0. We could add that cost if the cost model accurately
// reflects the real cost of that operation.
InstructionCost OldCost =
- TTI.getMemoryOpCost(Instruction::Load, LoadTy, Alignment, AS);
+ TTI.getMemoryOpCost(Instruction::Load, LoadTy, Alignment, AS, CostKind);
// New pattern: load PtrOp
InstructionCost NewCost =
- TTI.getMemoryOpCost(Instruction::Load, Ty, Alignment, AS);
+ TTI.getMemoryOpCost(Instruction::Load, Ty, Alignment, AS, CostKind);
// We can aggressively convert to the vector form because the backend can
// invert this transform if it does not result in a performance win.
@@ -353,6 +356,7 @@ bool VectorCombine::widenSubvectorLoad(Instruction &I) {
/// followed by extract from a different index.
ExtractElementInst *VectorCombine::getShuffleExtract(
ExtractElementInst *Ext0, ExtractElementInst *Ext1,
+ TTI::TargetCostKind CostKind,
unsigned PreferredExtractIndex = InvalidIndex) const {
auto *Index0C = dyn_cast<ConstantInt>(Ext0->getIndexOperand());
auto *Index1C = dyn_cast<ConstantInt>(Ext1->getIndexOperand());
@@ -366,7 +370,6 @@ ExtractElementInst *VectorCombine::getShuffleExtract(
return nullptr;
Type *VecTy = Ext0->getVectorOperand()->getType();
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
assert(VecTy == Ext1->getVectorOperand()->getType() && "Need matching types");
InstructionCost Cost0 =
TTI.getVectorInstrCost(*Ext0, VecTy, CostKind, Index0);
@@ -405,6 +408,7 @@ bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
ExtractElementInst *Ext1,
const Instruction &I,
ExtractElementInst *&ConvertToShuffle,
+ TTI::TargetCostKind CostKind,
unsigned PreferredExtractIndex) {
auto *Ext0IndexC = dyn_cast<ConstantInt>(Ext0->getIndexOperand());
auto *Ext1IndexC = dyn_cast<ConstantInt>(Ext1->getIndexOperand());
@@ -436,7 +440,6 @@ bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
// both sequences.
unsigned Ext0Index = Ext0IndexC->getZExtValue();
unsigned Ext1Index = Ext1IndexC->getZExtValue();
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
InstructionCost Extract0Cost =
TTI.getVectorInstrCost(*Ext0, VecTy, CostKind, Ext0Index);
@@ -475,7 +478,8 @@ bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
!Ext1->hasOneUse() * Extract1Cost;
}
- ConvertToShuffle = getShuffleExtract(Ext0, Ext1, PreferredExtractIndex);
+ ConvertToShuffle =
+ getShuffleExtract(Ext0, Ext1, CostKind, PreferredExtractIndex);
if (ConvertToShuffle) {
if (IsBinOp && DisableBinopExtractShuffle)
return true;
@@ -589,7 +593,8 @@ void VectorCombine::foldExtExtBinop(ExtractElementInst *Ext0,
}
/// Match an instruction with extracted vector operands.
-bool VectorCombine::foldExtractExtract(Instruction &I) {
+bool VectorCombine::foldExtractExtract(Instruction &I,
+ TTI::TargetCostKind CostKind) {
// It is not safe to transform things like div, urem, etc. because we may
// create undefined behavior when executing those on unknown vector elements.
if (!isSafeToSpeculativelyExecute(&I))
@@ -621,7 +626,8 @@ bool VectorCombine::foldExtractExtract(Instruction &I) {
m_InsertElt(m_Value(), m_Value(), m_ConstantInt(InsertIndex)));
ExtractElementInst *ExtractToChange;
- if (isExtractExtractCheap(Ext0, Ext1, I, ExtractToChange, InsertIndex))
+ if (isExtractExtractCheap(Ext0, Ext1, I, ExtractToChange, CostKind,
+ InsertIndex))
return false;
if (ExtractToChange) {
@@ -648,7 +654,8 @@ bool VectorCombine::foldExtractExtract(Instruction &I) {
/// Try to replace an extract + scalar fneg + insert with a vector fneg +
/// shuffle.
-bool VectorCombine::foldInsExtFNeg(Instruction &I) {
+bool VectorCombine::foldInsExtFNeg(Instruction &I,
+ TTI::TargetCostKind CostKind) {
// Match an insert (op (extract)) pattern.
Value *DestVec;
uint64_t Index;
@@ -683,7 +690,6 @@ bool VectorCombine::foldInsExtFNeg(Instruction &I) {
Mask[Index] = Index + NumElts;
Type *ScalarTy = VecTy->getScalarType();
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
InstructionCost OldCost =
TTI.getArithmeticInstrCost(Instruction::FNeg, ScalarTy) +
TTI.getVectorInstrCost(I, VecTy, CostKind, Index);
@@ -712,7 +718,8 @@ bool VectorCombine::foldInsExtFNeg(Instruction &I) {
/// If this is a bitcast of a shuffle, try to bitcast the source vector to the
/// destination type followed by shuffle. This can enable further transforms by
/// moving bitcasts or shuffles together.
-bool VectorCombine::foldBitcastShuffle(Instruction &I) {
+bool VectorCombine::foldBitcastShuffle(Instruction &I,
+ TTI::TargetCostKind CostKind) {
Value *V0, *V1;
ArrayRef<int> Mask;
if (!match(&I, m_BitCast(m_OneUse(
@@ -772,21 +779,20 @@ bool VectorCombine::foldBitcastShuffle(Instruction &I) {
unsigned NumOps = IsUnary ? 1 : 2;
// The new shuffle must not cost more than the old shuffle.
- TargetTransformInfo::TargetCostKind CK =
- TargetTransformInfo::TCK_RecipThroughput;
TargetTransformInfo::ShuffleKind SK =
IsUnary ? TargetTransformInfo::SK_PermuteSingleSrc
: TargetTransformInfo::SK_PermuteTwoSrc;
InstructionCost DestCost =
- TTI.getShuffleCost(SK, NewShuffleTy, NewMask, CK) +
+ TTI.getShuffleCost(SK, NewShuffleTy, NewMask, CostKind) +
(NumOps * TTI.getCastInstrCost(Instruction::BitCast, NewShuffleTy, SrcTy,
TargetTransformInfo::CastContextHint::None,
- CK));
+ CostKind));
InstructionCost SrcCost =
- TTI.getShuffleCost(SK, SrcTy, Mask, CK) +
+ TTI.getShuffleCost(SK, SrcTy, Mask, CostKind) +
TTI.getCastInstrCost(Instruction::BitCast, DestTy, OldShuffleTy,
- TargetTransformInfo::CastContextHint::None, CK);
+ TargetTransformInfo::CastContextHint::None,
+ CostKind);
if (DestCost > SrcCost || !DestCost.isValid())
return false;
@@ -802,7 +808,8 @@ bool VectorCombine::foldBitcastShuffle(Instruction &I) {
/// VP Intrinsics whose vector operands are both splat values may be simplified
/// into the scalar version of the operation and the result splatted. This
/// can lead to scalarization down the line.
-bool VectorCombine::scalarizeVPIntrinsic(Instruction &I) {
+bool VectorCombine::scalarizeVPIntrinsic(Instruction &I,
+ TTI::TargetCostKind CostKind) {
if (!isa<VPIntrinsic>(I))
return false;
VPIntrinsic &VPI = cast<VPIntrinsic>(I);
@@ -841,7 +848,6 @@ bool VectorCombine::scalarizeVPIntrinsic(Instruction &I) {
// Calculate cost of splatting both operands into vectors and the vector
// intrinsic
VectorType *VecTy = cast<VectorType>(VPI.getType());
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
SmallVector<int> Mask;
if (auto *FVTy = dyn_cast<FixedVectorType>(VecTy))
Mask.resize(FVTy->getNumElements(), 0);
@@ -923,7 +929,8 @@ bool VectorCombine::scalarizeVPIntrinsic(Instruction &I) {
/// Match a vector binop or compare instruction with at least one inserted
/// scalar operand and convert to scalar binop/cmp followed by insertelement.
-bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
+bool VectorCombine::scalarizeBinopOrCmp(Instruction &I,
+ TTI::TargetCostKind CostKind) {
CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE;
Value *Ins0, *Ins1;
if (!match(&I, m_BinOp(m_Value(Ins0), m_Value(Ins1))) &&
@@ -1003,7 +1010,6 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
// Get cost estimate for the insert element. This cost will factor into
// both sequences.
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
InstructionCost InsertCost = TTI.getVectorInstrCost(
Instruction::InsertElement, VecTy, CostKind, Index);
InstructionCost OldCost =
@@ -1052,7 +1058,8 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
/// Try to combine a scalar binop + 2 scalar compares of extracted elements of
/// a vector into vector operations followed by extract. Note: The SLP pass
/// may miss this pattern because of implementation problems.
-bool VectorCombine::foldExtractedCmps(Instruction &I) {
+bool VectorCombine::foldExtractedCmps(Instruction &I,
+ TTI::TargetCostKind CostKind) {
auto *BI = dyn_cast<BinaryOperator>(&I);
// We are looking for a scalar binop of booleans.
@@ -1080,7 +1087,7 @@ bool VectorCombine::foldExtractedCmps(Instruction &I) {
auto *Ext0 = cast<ExtractElementInst>(I0);
auto *Ext1 = cast<ExtractElementInst>(I1);
- ExtractElementInst *ConvertToShuf = getShuffleExtract(Ext0, Ext1);
+ ExtractElementInst *ConvertToShuf = getShuffleExtract(Ext0, Ext1, CostKind);
if (!ConvertToShuf)
return false;
assert((ConvertToShuf == Ext0 || ConvertToShuf == Ext1) &&
@@ -1089,13 +1096,12 @@ bool VectorCombine::foldExtractedCmps(Instruction &I) {
// The original scalar pattern is:
// binop i1 (cmp Pred (ext X, Index0), C0), (cmp Pred (ext X, Index1), C1)
CmpInst::Predicate Pred = P0;
- unsigned CmpOpcode = CmpInst::isFPPredicate(Pred) ? Instruction::FCmp
- : Instruction::ICmp;
+ unsigned CmpOpcode =
+ CmpInst::isFPPredicate(Pred) ? Instruction::FCmp : Instruction::ICmp;
auto *VecTy = dyn_cast<FixedVectorType>(X->getType());
if (!VecTy)
return false;
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
InstructionCost Ext0Cost =
TTI.getVectorInstrCost(*Ext0, VecTy, CostKind, Index0),
Ext1Cost =
@@ -1331,7 +1337,8 @@ bool VectorCombine::foldSingleElementStore(Instruction &I) {
}
/// Try to scalarize vector loads feeding extractelement instructions.
-bool VectorCombine::scalarizeLoadExtract(Instruction &I) {
+bool VectorCombine::scalarizeLoadExtract(Instruction &I,
+ TTI::TargetCostKind CostKind) {
Value *Ptr;
if (!match(&I, m_Load(m_Value(Ptr))))
return false;
@@ -1386,7 +1393,6 @@ bool VectorCombine::scalarizeLoadExtract(Instruction &I) {
}
auto *Index = dyn_cast<ConstantInt>(UI->getOperand(1));
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
OriginalCost +=
TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy, CostKind,
Index ? Index->getZExtValue() : -1);
@@ -1428,7 +1434,8 @@ bool VectorCombine::scalarizeLoadExtract(Instruction &I) {
/// Try to convert "shuffle (binop (shuffle, shuffle)), undef"
/// --> "binop (shuffle), (shuffle)".
-bool VectorCombine::foldPermuteOfBinops(Instruction &I) {
+bool VectorCombine::foldPermuteOfBinops(Instruction &I,
+ TTI::TargetCostKind CostKind) {
BinaryOperator *BinOp;
ArrayRef<int> OuterMask;
if (!match(&I,
@@ -1480,8 +1487,6 @@ bool VectorCombine::foldPermuteOfBinops(Instruction &I) {
}
// Try to merge shuffles across the binop if the new shuffles are not costly.
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
-
InstructionCost OldCost =
TTI.getArithmeticInstrCost(Opcode, BinOpTy, CostKind) +
TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, BinOpTy,
@@ -1523,7 +1528,8 @@ bool VectorCombine::foldPermuteOfBinops(Instruction &I) {
}
/// Try to convert "shuffle (binop), (binop)" into "binop (shuffle), (shuffle)".
-bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
+bool VectorCombine::foldShuffleOfBinops(Instruction &I,
+ TTI::TargetCostKind CostKind) {
BinaryOperator *B0, *B1;
ArrayRef<int> OldMask;
if (!match(&I, m_Shuffle(m_OneUse(m_BinOp(B0)), m_OneUse(m_BinOp(B1)),
@@ -1575,8 +1581,6 @@ bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
}
// Try to replace a binop with a shuffle if the shuffle is not costly.
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
-
InstructionCost OldCost =
TTI.getArithmeticInstrCost(B0->getOpcode(), BinOpTy, CostKind) +
TTI.getArithmeticInstrCost(B1->getOpcode(), BinOpTy, CostKind) +
@@ -1612,7 +1616,8 @@ bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
/// Try to convert "shuffle (castop), (castop)" with a shared castop operand
/// into "castop (shuffle)".
-bool VectorCombine::foldShuffleOfCastops(Instruction &I) {
+bool VectorCombine::foldShuffleOfCastops(Instruction &I,
+ TTI::TargetCostKind CostKind) {
Value *V0, *V1;
ArrayRef<int> OldMask;
if (!match(&I, m_Shuffle(m_Value(V0), m_Value(V1), m_Mask(OldMask))))
@@ -1672,8 +1677,6 @@ bool VectorCombine::foldShuffleOfCastops(Instruction &I) {
FixedVectorType::get(CastSrcTy->getScalarType(), NewMask.size());
// Try to replace a castop with a shuffle if the shuffle is not costly.
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
-
InstructionCost CostC0 =
TTI.getCastInstrCost(C0->getOpcode(), CastDstTy, CastSrcTy,
TTI::CastContextHint::None, CostKind);
@@ -1717,7 +1720,8 @@ bool VectorCombine::foldShuffleOfCastops(Instruction &I) {
/// Try to convert "shuffle (shuffle x, undef), (shuffle y, undef)"
/// into "shuffle x, y".
-bool VectorCombine::foldShuffleOfShuffles(Instruction &I) {
+bool VectorCombine::foldShuffleOfShuffles(Instruction &I,
+ TTI::TargetCostKind CostKind) {
Value *V0, *V1;
UndefValue *U0, *U1;
ArrayRef<int> OuterMask, InnerMask0, InnerMask1;
@@ -1767,8 +1771,6 @@ bool VectorCombine::foldShuffleOfShuffles(Instruction &I) {
}
// Try to merge the shuffles if the new shuffle is not costly.
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
-
InstructionCost InnerCost0 =
TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, ShuffleSrcTy,
InnerMask0, CostKind, 0, nullptr, {V0, U0}, ShufI0);
@@ -1807,7 +1809,8 @@ bool VectorCombine::foldShuffleOfShuffles(Instruction &I) {
/// Try to convert
/// "shuffle (intrinsic), (intrinsic)" into "intrinsic (shuffle), (shuffle)".
-bool VectorCombine::foldShuffleOfIntrinsics(Instruction &I) {
+bool VectorCombine::foldShuffleOfIntrinsics(Instruction &I,
+ TTI::TargetCostKind CostKind) {
Value *V0, *V1;
ArrayRef<int> OldMask;
if (!match(&I, m_Shuffle(m_OneUse(m_Value(V0)), m_OneUse(m_Value(V1)),
@@ -1837,12 +1840,10 @@ bool VectorCombine::foldShuffleOfIntrinsics(Instruction &I) {
return false;
InstructionCost OldCost =
- TTI.getIntrinsicInstrCost(IntrinsicCostAttributes(IID, *II0),
- TTI::TCK_RecipThroughput) +
- TTI.getIntrinsicInstrCost(IntrinsicCostAttributes(IID, *II1),
- TTI::TCK_RecipThroughput) +
+ TTI.getIntrinsicInstrCost(IntrinsicCostAttributes(IID, *II0), CostKind) +
+ TTI.getIntrinsicInstrCost(IntrinsicCostAttributes(IID, *II1), CostKind) +
TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, II0Ty, OldMask,
- TTI::TCK_RecipThroughput, 0, nullptr, {II0, II1}, &I);
+ CostKind, 0, nullptr, {II0, II1}, &I);
SmallVector<Type *> NewArgsTy;
InstructionCost NewCost = 0;
@@ -1854,10 +1855,10 @@ bool VectorCombine::foldShuffleOfIntrinsics(Instruction &I) {
NewArgsTy.push_back(FixedVectorType::get(VecTy->getElementType(),
VecTy->getNumElements() * 2));
NewCost += TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc,
- VecTy, OldMask, TTI::TCK_RecipThroughput);
+ VecTy, OldMask, CostKind);
}
IntrinsicCostAttributes NewAttr(IID, ShuffleDstTy, NewArgsTy);
- NewCost += TTI.getIntrinsicInstrCost(NewAttr, TTI::TCK_RecipThroughput);
+ NewCost += TTI.getIntrinsicInstrCost(NewAttr, CostKind);
LLVM_DEBUG(dbgs() << "Found a shuffle feeding two intrinsics: " << I
<< "\n OldCost: " << OldCost << " vs NewCost: " << NewCost
@@ -1923,7 +1924,7 @@ generateInstLaneVectorFromOperand(ArrayRef<InstLane> Item, int Op) {
}
/// Detect concat of multiple values into a vector
-static bool isFreeConcat(ArrayRef<InstLane> Item,
+static bool isFreeConcat(ArrayRef<InstLane> Item, TTI::TargetCostKind CostKind,
const TargetTransformInfo &TTI) {
auto *Ty = cast<FixedVectorType>(Item.front().first->get()->getType());
unsigned NumElts = Ty->getNumElements();
@@ -1934,8 +1935,7 @@ static bool isFreeConcat(ArrayRef<InstLane> Item,
// during legalization.
SmallVector<int, 16> ConcatMask(NumElts * 2);
std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
- if (TTI.getShuffleCost(TTI::SK_PermuteTwoSrc, Ty, ConcatMask,
- TTI::TCK_RecipThroughput) != 0)
+ if (TTI.getShuffleCost(TTI::SK_PermuteTwoSrc, Ty, ConcatMask, CostKind) != 0)
return false;
unsigned NumSlices = Item.size() / NumElts;
@@ -2048,7 +2048,8 @@ static Value *generateNewInstTree(ArrayRef<InstLane> Item, FixedVectorType *Ty,
// Starting from a shuffle, look up through operands tracking the shuffled index
// of each lane. If we can simplify away the shuffles to identities then
// do so.
-bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
+bool VectorCombine::foldShuffleToIdentity(Instruction &I,
+ TTI::TargetCostKind CostKind) {
auto *Ty = dyn_cast<FixedVectorType>(I.getType());
if (!Ty || I.use_empty())
return false;
@@ -2189,7 +2190,7 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
}
}
- if (isFreeConcat(Item, TTI)) {
+ if (isFreeConcat(Item, CostKind, TTI)) {
ConcatLeafs.insert(FrontU);
continue;
}
@@ -2332,7 +2333,8 @@ bool VectorCombine::foldShuffleFromReductions(Instruction &I) {
/// reduce(trunc(x)) -> trunc(reduce(x)).
/// reduce(sext(x)) -> sext(reduce(x)).
/// reduce(zext(x)) -> zext(reduce(x)).
-bool VectorCombine::foldCastFromReductions(Instruction &I) {
+bool VectorCombine::foldCastFromReductions(Instruction &I,
+ TTI::TargetCostKind CostKind) {
auto *II = dyn_cast<IntrinsicInst>(&I);
if (!II)
return false;
@@ -2367,7 +2369,6 @@ bool VectorCombine::foldCastFromReductions(Instruction &I) {
auto *ReductionSrcTy = cast<VectorType>(ReductionSrc->getType());
Type *ResultTy = I.getType();
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
InstructionCost OldCost = TTI.getArithmeticReductionCost(
ReductionOpc, ReductionSrcTy, std::nullopt, CostKind);
OldCost += TTI.getCastInstrCost(CastOpc, ReductionSrcTy, SrcTy,
@@ -2717,7 +2718,7 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) {
/// lshr((zext(x),y) -> zext(lshr(x,trunc(y)))
/// Cost model calculations takes into account if zext(x) has other users and
/// whether it can be propagated through them too.
-bool VectorCombine::shrinkType(llvm::Instruction &I) {
+bool VectorCombine::shrinkType(Instruction &I, TTI::TargetCostKind CostKind) {
Value *ZExted, *OtherOperand;
if (!match(&I, m_c_BitwiseLogic(m_ZExt(m_Value(ZExted)),
m_Value(OtherOperand))) &&
@@ -2746,7 +2747,6 @@ bool VectorCombine::shrinkType(llvm::Instruction &I) {
// Calculate costs of leaving current IR as it is and moving ZExt operation
// later, along with adding truncates if needed
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
InstructionCost ZExtCost = TTI.getCastInstrCost(
Instruction::ZExt, BigTy, SmallTy,
TargetTransformInfo::CastContextHint::None, CostKind);
@@ -2809,7 +2809,8 @@ bool VectorCombine::shrinkType(llvm::Instruction &I) {
/// insert (DstVec, (extract SrcVec, ExtIdx), InsIdx) -->
/// shuffle (DstVec, SrcVec, Mask)
-bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
+bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I,
+ TTI::TargetCostKind CostKind) {
Value *DstVec, *SrcVec;
uint64_t ExtIdx, InsIdx;
if (!match(&I,
@@ -2833,7 +2834,6 @@ bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
auto *Ins = cast<InsertElementInst>(&I);
auto *Ext = cast<ExtractElementInst>(I.getOperand(1));
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
InstructionCost OldCost =
TTI.getVectorInstrCost(*Ext, VecTy, CostKind, ExtIdx) +
TTI.getVectorInstrCost(*Ins, VecTy, CostKind, InsIdx);
@@ -2865,6 +2865,7 @@ bool VectorCombine::run() {
bool MadeChange = false;
auto FoldInst = [this, &MadeChange](Instruction &I) {
Builder.SetInsertPoint(&I);
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
bool IsFixedVectorType = isa<FixedVectorType>(I.getType());
auto Opcode = I.getOpcode();
@@ -2875,10 +2876,10 @@ bool VectorCombine::run() {
if (IsFixedVectorType) {
switch (Opcode) {
case Instruction::InsertElement:
- MadeChange |= vectorizeLoadInsert(I);
+ MadeChange |= vectorizeLoadInsert(I, CostKind);
break;
case Instruction::ShuffleVector:
- MadeChange |= widenSubvectorLoad(I);
+ MadeChange |= widenSubvectorLoad(I, CostKind);
break;
default:
break;
@@ -2888,9 +2889,9 @@ bool VectorCombine::run() {
// This transform works with scalable and fixed vectors
// TODO: Identify and allow other scalable transforms
if (isa<VectorType>(I.getType())) {
- MadeChange |= scalarizeBinopOrCmp(I);
- MadeChange |= scalarizeLoadExtract(I);
- MadeChange |= scalarizeVPIntrinsic(I);
+ MadeChange |= scalarizeBinopOrCmp(I, CostKind);
+ MadeChange |= scalarizeLoadExtract(I, CostKind);
+ MadeChange |= scalarizeVPIntrinsic(I, CostKind);
}
if (Opcode == Instruction::Store)
@@ -2907,39 +2908,39 @@ bool VectorCombine::run() {
if (IsFixedVectorType) {
switch (Opcode) {
case Instruction::InsertElement:
- MadeChange |= foldInsExtFNeg(I);
- MadeChange |= foldInsExtVectorToShuffle(I);
+ MadeChange |= foldInsExtFNeg(I, CostKind);
+ MadeChange |= foldInsExtVectorToShuffle(I, CostKind);
break;
case Instruction::ShuffleVector:
- MadeChange |= foldPermuteOfBinops(I);
- MadeChange |= foldShuffleOfBinops(I);
- MadeChange |= foldShuffleOfCastops(I);
- MadeChange |= foldShuffleOfShuffles(I);
- MadeChange |= foldShuffleOfIntrinsics(I);
- MadeChange |= foldSelectShuffle(I);
- MadeChange |= foldShuffleToIdentity(I);
+ MadeChange |= foldPermuteOfBinops(I, CostKind);
+ MadeChange |= foldShuffleOfBinops(I, CostKind);
+ MadeChange |= foldShuffleOfCastops(I, CostKind);
+ MadeChange |= foldShuffleOfShuffles(I, CostKind);
+ MadeChange |= foldShuffleOfIntrinsics(I, CostKind);
+ MadeChange |= foldSelectShuffle(I, CostKind);
+ MadeChange |= foldShuffleToIdentity(I, CostKind);
break;
case Instruction::BitCast:
- MadeChange |= foldBitcastShuffle(I);
+ MadeChange |= foldBitcastShuffle(I, CostKind);
break;
default:
- MadeChange |= shrinkType(I);
+ MadeChange |= shrinkType(I, CostKind);
break;
}
} else {
switch (Opcode) {
case Instruction::Call:
MadeChange |= foldShuffleFromReductions(I);
- MadeChange |= foldCastFromReductions(I);
+ MadeChange |= foldCastFromReductions(I, CostKind);
break;
case Instruction::ICmp:
case Instruction::FCmp:
- MadeChange |= foldExtractExtract(I);
+ MadeChange |= foldExtractExtract(I, CostKind);
break;
default:
if (Instruction::isBinaryOp(Opcode)) {
- MadeChange |= foldExtractExtract(I);
- MadeChange |= foldExtractedCmps(I);
+ MadeChange |= foldExtractExtract(I, CostKind);
+ MadeChange |= foldExtractedCmps(I, CostKind);
}
break;
}
>From ee80ee8ae7e664413a4d208b7758c55576b829e7 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Mon, 9 Dec 2024 11:05:42 +0000
Subject: [PATCH 2/2] [VectorCombine] Replace TargetCostKind function argument
with common class member
---
.../Transforms/Vectorize/VectorCombine.cpp | 149 ++++++++----------
1 file changed, 64 insertions(+), 85 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index e655e73fa9e148..9635b106c6158b 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -67,9 +67,10 @@ class VectorCombine {
public:
VectorCombine(Function &F, const TargetTransformInfo &TTI,
const DominatorTree &DT, AAResults &AA, AssumptionCache &AC,
- const DataLayout *DL, bool TryEarlyFoldsOnly)
+ const DataLayout *DL, TTI::TargetCostKind CostKind,
+ bool TryEarlyFoldsOnly)
: F(F), Builder(F.getContext()), TTI(TTI), DT(DT), AA(AA), AC(AC), DL(DL),
- TryEarlyFoldsOnly(TryEarlyFoldsOnly) {}
+ CostKind(CostKind), TryEarlyFoldsOnly(TryEarlyFoldsOnly) {}
bool run();
@@ -81,6 +82,7 @@ class VectorCombine {
AAResults &AA;
AssumptionCache &AC;
const DataLayout *DL;
+ TTI::TargetCostKind CostKind;
/// If true, only perform beneficial early IR transforms. Do not introduce new
/// vector operations.
@@ -91,40 +93,38 @@ class VectorCombine {
// TODO: Direct calls from the top-level "run" loop use a plain "Instruction"
// parameter. That should be updated to specific sub-classes because the
// run loop was changed to dispatch on opcode.
- bool vectorizeLoadInsert(Instruction &I, TTI::TargetCostKind CostKind);
- bool widenSubvectorLoad(Instruction &I, TTI::TargetCostKind CostKind);
+ bool vectorizeLoadInsert(Instruction &I);
+ bool widenSubvectorLoad(Instruction &I);
ExtractElementInst *getShuffleExtract(ExtractElementInst *Ext0,
ExtractElementInst *Ext1,
- TTI::TargetCostKind CostKind,
unsigned PreferredExtractIndex) const;
bool isExtractExtractCheap(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
const Instruction &I,
ExtractElementInst *&ConvertToShuffle,
- TTI::TargetCostKind CostKind,
unsigned PreferredExtractIndex);
void foldExtExtCmp(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
Instruction &I);
void foldExtExtBinop(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
Instruction &I);
- bool foldExtractExtract(Instruction &I, TTI::TargetCostKind CostKind);
- bool foldInsExtFNeg(Instruction &I, TTI::TargetCostKind CostKind);
- bool foldInsExtVectorToShuffle(Instruction &I, TTI::TargetCostKind CostKind);
- bool foldBitcastShuffle(Instruction &I, TTI::TargetCostKind CostKind);
- bool scalarizeBinopOrCmp(Instruction &I, TTI::TargetCostKind CostKind);
- bool scalarizeVPIntrinsic(Instruction &I, TTI::TargetCostKind CostKind);
- bool foldExtractedCmps(Instruction &I, TTI::TargetCostKind CostKind);
+ bool foldExtractExtract(Instruction &I);
+ bool foldInsExtFNeg(Instruction &I);
+ bool foldInsExtVectorToShuffle(Instruction &I);
+ bool foldBitcastShuffle(Instruction &I);
+ bool scalarizeBinopOrCmp(Instruction &I);
+ bool scalarizeVPIntrinsic(Instruction &I);
+ bool foldExtractedCmps(Instruction &I);
bool foldSingleElementStore(Instruction &I);
- bool scalarizeLoadExtract(Instruction &I, TTI::TargetCostKind CostKind);
- bool foldPermuteOfBinops(Instruction &I, TTI::TargetCostKind CostKind);
- bool foldShuffleOfBinops(Instruction &I, TTI::TargetCostKind CostKind);
- bool foldShuffleOfCastops(Instruction &I, TTI::TargetCostKind CostKind);
- bool foldShuffleOfShuffles(Instruction &I, TTI::TargetCostKind CostKind);
- bool foldShuffleOfIntrinsics(Instruction &I, TTI::TargetCostKind CostKind);
- bool foldShuffleToIdentity(Instruction &I, TTI::TargetCostKind CostKind);
+ bool scalarizeLoadExtract(Instruction &I);
+ bool foldPermuteOfBinops(Instruction &I);
+ bool foldShuffleOfBinops(Instruction &I);
+ bool foldShuffleOfCastops(Instruction &I);
+ bool foldShuffleOfShuffles(Instruction &I);
+ bool foldShuffleOfIntrinsics(Instruction &I);
+ bool foldShuffleToIdentity(Instruction &I);
bool foldShuffleFromReductions(Instruction &I);
- bool foldCastFromReductions(Instruction &I, TTI::TargetCostKind CostKind);
+ bool foldCastFromReductions(Instruction &I);
bool foldSelectShuffle(Instruction &I, bool FromReduction = false);
- bool shrinkType(Instruction &I, TTI::TargetCostKind CostKind);
+ bool shrinkType(Instruction &I);
void replaceValue(Value &Old, Value &New) {
Old.replaceAllUsesWith(&New);
@@ -174,8 +174,7 @@ static bool canWidenLoad(LoadInst *Load, const TargetTransformInfo &TTI) {
return true;
}
-bool VectorCombine::vectorizeLoadInsert(Instruction &I,
- TTI::TargetCostKind CostKind) {
+bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
// Match insert into fixed vector of scalar value.
// TODO: Handle non-zero insert index.
Value *Scalar;
@@ -295,8 +294,7 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I,
/// If we are loading a vector and then inserting it into a larger vector with
/// undefined elements, try to load the larger vector and eliminate the insert.
/// This removes a shuffle in IR and may allow combining of other loaded values.
-bool VectorCombine::widenSubvectorLoad(Instruction &I,
- TTI::TargetCostKind CostKind) {
+bool VectorCombine::widenSubvectorLoad(Instruction &I) {
// Match subvector insert of fixed vector.
auto *Shuf = cast<ShuffleVectorInst>(&I);
if (!Shuf->isIdentityWithPadding())
@@ -356,7 +354,6 @@ bool VectorCombine::widenSubvectorLoad(Instruction &I,
/// followed by extract from a different index.
ExtractElementInst *VectorCombine::getShuffleExtract(
ExtractElementInst *Ext0, ExtractElementInst *Ext1,
- TTI::TargetCostKind CostKind,
unsigned PreferredExtractIndex = InvalidIndex) const {
auto *Index0C = dyn_cast<ConstantInt>(Ext0->getIndexOperand());
auto *Index1C = dyn_cast<ConstantInt>(Ext1->getIndexOperand());
@@ -408,7 +405,6 @@ bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
ExtractElementInst *Ext1,
const Instruction &I,
ExtractElementInst *&ConvertToShuffle,
- TTI::TargetCostKind CostKind,
unsigned PreferredExtractIndex) {
auto *Ext0IndexC = dyn_cast<ConstantInt>(Ext0->getIndexOperand());
auto *Ext1IndexC = dyn_cast<ConstantInt>(Ext1->getIndexOperand());
@@ -478,8 +474,7 @@ bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
!Ext1->hasOneUse() * Extract1Cost;
}
- ConvertToShuffle =
- getShuffleExtract(Ext0, Ext1, CostKind, PreferredExtractIndex);
+ ConvertToShuffle = getShuffleExtract(Ext0, Ext1, PreferredExtractIndex);
if (ConvertToShuffle) {
if (IsBinOp && DisableBinopExtractShuffle)
return true;
@@ -593,8 +588,7 @@ void VectorCombine::foldExtExtBinop(ExtractElementInst *Ext0,
}
/// Match an instruction with extracted vector operands.
-bool VectorCombine::foldExtractExtract(Instruction &I,
- TTI::TargetCostKind CostKind) {
+bool VectorCombine::foldExtractExtract(Instruction &I) {
// It is not safe to transform things like div, urem, etc. because we may
// create undefined behavior when executing those on unknown vector elements.
if (!isSafeToSpeculativelyExecute(&I))
@@ -626,8 +620,7 @@ bool VectorCombine::foldExtractExtract(Instruction &I,
m_InsertElt(m_Value(), m_Value(), m_ConstantInt(InsertIndex)));
ExtractElementInst *ExtractToChange;
- if (isExtractExtractCheap(Ext0, Ext1, I, ExtractToChange, CostKind,
- InsertIndex))
+ if (isExtractExtractCheap(Ext0, Ext1, I, ExtractToChange, InsertIndex))
return false;
if (ExtractToChange) {
@@ -654,8 +647,7 @@ bool VectorCombine::foldExtractExtract(Instruction &I,
/// Try to replace an extract + scalar fneg + insert with a vector fneg +
/// shuffle.
-bool VectorCombine::foldInsExtFNeg(Instruction &I,
- TTI::TargetCostKind CostKind) {
+bool VectorCombine::foldInsExtFNeg(Instruction &I) {
// Match an insert (op (extract)) pattern.
Value *DestVec;
uint64_t Index;
@@ -718,8 +710,7 @@ bool VectorCombine::foldInsExtFNeg(Instruction &I,
/// If this is a bitcast of a shuffle, try to bitcast the source vector to the
/// destination type followed by shuffle. This can enable further transforms by
/// moving bitcasts or shuffles together.
-bool VectorCombine::foldBitcastShuffle(Instruction &I,
- TTI::TargetCostKind CostKind) {
+bool VectorCombine::foldBitcastShuffle(Instruction &I) {
Value *V0, *V1;
ArrayRef<int> Mask;
if (!match(&I, m_BitCast(m_OneUse(
@@ -808,8 +799,7 @@ bool VectorCombine::foldBitcastShuffle(Instruction &I,
/// VP Intrinsics whose vector operands are both splat values may be simplified
/// into the scalar version of the operation and the result splatted. This
/// can lead to scalarization down the line.
-bool VectorCombine::scalarizeVPIntrinsic(Instruction &I,
- TTI::TargetCostKind CostKind) {
+bool VectorCombine::scalarizeVPIntrinsic(Instruction &I) {
if (!isa<VPIntrinsic>(I))
return false;
VPIntrinsic &VPI = cast<VPIntrinsic>(I);
@@ -929,8 +919,7 @@ bool VectorCombine::scalarizeVPIntrinsic(Instruction &I,
/// Match a vector binop or compare instruction with at least one inserted
/// scalar operand and convert to scalar binop/cmp followed by insertelement.
-bool VectorCombine::scalarizeBinopOrCmp(Instruction &I,
- TTI::TargetCostKind CostKind) {
+bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE;
Value *Ins0, *Ins1;
if (!match(&I, m_BinOp(m_Value(Ins0), m_Value(Ins1))) &&
@@ -1058,8 +1047,7 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I,
/// Try to combine a scalar binop + 2 scalar compares of extracted elements of
/// a vector into vector operations followed by extract. Note: The SLP pass
/// may miss this pattern because of implementation problems.
-bool VectorCombine::foldExtractedCmps(Instruction &I,
- TTI::TargetCostKind CostKind) {
+bool VectorCombine::foldExtractedCmps(Instruction &I) {
auto *BI = dyn_cast<BinaryOperator>(&I);
// We are looking for a scalar binop of booleans.
@@ -1337,8 +1325,7 @@ bool VectorCombine::foldSingleElementStore(Instruction &I) {
}
/// Try to scalarize vector loads feeding extractelement instructions.
-bool VectorCombine::scalarizeLoadExtract(Instruction &I,
- TTI::TargetCostKind CostKind) {
+bool VectorCombine::scalarizeLoadExtract(Instruction &I) {
Value *Ptr;
if (!match(&I, m_Load(m_Value(Ptr))))
return false;
@@ -1434,8 +1421,7 @@ bool VectorCombine::scalarizeLoadExtract(Instruction &I,
/// Try to convert "shuffle (binop (shuffle, shuffle)), undef"
/// --> "binop (shuffle), (shuffle)".
-bool VectorCombine::foldPermuteOfBinops(Instruction &I,
- TTI::TargetCostKind CostKind) {
+bool VectorCombine::foldPermuteOfBinops(Instruction &I) {
BinaryOperator *BinOp;
ArrayRef<int> OuterMask;
if (!match(&I,
@@ -1528,8 +1514,7 @@ bool VectorCombine::foldPermuteOfBinops(Instruction &I,
}
/// Try to convert "shuffle (binop), (binop)" into "binop (shuffle), (shuffle)".
-bool VectorCombine::foldShuffleOfBinops(Instruction &I,
- TTI::TargetCostKind CostKind) {
+bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
BinaryOperator *B0, *B1;
ArrayRef<int> OldMask;
if (!match(&I, m_Shuffle(m_OneUse(m_BinOp(B0)), m_OneUse(m_BinOp(B1)),
@@ -1616,8 +1601,7 @@ bool VectorCombine::foldShuffleOfBinops(Instruction &I,
/// Try to convert "shuffle (castop), (castop)" with a shared castop operand
/// into "castop (shuffle)".
-bool VectorCombine::foldShuffleOfCastops(Instruction &I,
- TTI::TargetCostKind CostKind) {
+bool VectorCombine::foldShuffleOfCastops(Instruction &I) {
Value *V0, *V1;
ArrayRef<int> OldMask;
if (!match(&I, m_Shuffle(m_Value(V0), m_Value(V1), m_Mask(OldMask))))
@@ -1720,8 +1704,7 @@ bool VectorCombine::foldShuffleOfCastops(Instruction &I,
/// Try to convert "shuffle (shuffle x, undef), (shuffle y, undef)"
/// into "shuffle x, y".
-bool VectorCombine::foldShuffleOfShuffles(Instruction &I,
- TTI::TargetCostKind CostKind) {
+bool VectorCombine::foldShuffleOfShuffles(Instruction &I) {
Value *V0, *V1;
UndefValue *U0, *U1;
ArrayRef<int> OuterMask, InnerMask0, InnerMask1;
@@ -1809,8 +1792,7 @@ bool VectorCombine::foldShuffleOfShuffles(Instruction &I,
/// Try to convert
/// "shuffle (intrinsic), (intrinsic)" into "intrinsic (shuffle), (shuffle)".
-bool VectorCombine::foldShuffleOfIntrinsics(Instruction &I,
- TTI::TargetCostKind CostKind) {
+bool VectorCombine::foldShuffleOfIntrinsics(Instruction &I) {
Value *V0, *V1;
ArrayRef<int> OldMask;
if (!match(&I, m_Shuffle(m_OneUse(m_Value(V0)), m_OneUse(m_Value(V1)),
@@ -2048,8 +2030,7 @@ static Value *generateNewInstTree(ArrayRef<InstLane> Item, FixedVectorType *Ty,
// Starting from a shuffle, look up through operands tracking the shuffled index
// of each lane. If we can simplify away the shuffles to identities then
// do so.
-bool VectorCombine::foldShuffleToIdentity(Instruction &I,
- TTI::TargetCostKind CostKind) {
+bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
auto *Ty = dyn_cast<FixedVectorType>(I.getType());
if (!Ty || I.use_empty())
return false;
@@ -2333,8 +2314,7 @@ bool VectorCombine::foldShuffleFromReductions(Instruction &I) {
/// reduce(trunc(x)) -> trunc(reduce(x)).
/// reduce(sext(x)) -> sext(reduce(x)).
/// reduce(zext(x)) -> zext(reduce(x)).
-bool VectorCombine::foldCastFromReductions(Instruction &I,
- TTI::TargetCostKind CostKind) {
+bool VectorCombine::foldCastFromReductions(Instruction &I) {
auto *II = dyn_cast<IntrinsicInst>(&I);
if (!II)
return false;
@@ -2718,7 +2698,7 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) {
/// lshr((zext(x),y) -> zext(lshr(x,trunc(y)))
/// Cost model calculations takes into account if zext(x) has other users and
/// whether it can be propagated through them too.
-bool VectorCombine::shrinkType(Instruction &I, TTI::TargetCostKind CostKind) {
+bool VectorCombine::shrinkType(Instruction &I) {
Value *ZExted, *OtherOperand;
if (!match(&I, m_c_BitwiseLogic(m_ZExt(m_Value(ZExted)),
m_Value(OtherOperand))) &&
@@ -2809,8 +2789,7 @@ bool VectorCombine::shrinkType(Instruction &I, TTI::TargetCostKind CostKind) {
/// insert (DstVec, (extract SrcVec, ExtIdx), InsIdx) -->
/// shuffle (DstVec, SrcVec, Mask)
-bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I,
- TTI::TargetCostKind CostKind) {
+bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
Value *DstVec, *SrcVec;
uint64_t ExtIdx, InsIdx;
if (!match(&I,
@@ -2865,7 +2844,6 @@ bool VectorCombine::run() {
bool MadeChange = false;
auto FoldInst = [this, &MadeChange](Instruction &I) {
Builder.SetInsertPoint(&I);
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
bool IsFixedVectorType = isa<FixedVectorType>(I.getType());
auto Opcode = I.getOpcode();
@@ -2876,10 +2854,10 @@ bool VectorCombine::run() {
if (IsFixedVectorType) {
switch (Opcode) {
case Instruction::InsertElement:
- MadeChange |= vectorizeLoadInsert(I, CostKind);
+ MadeChange |= vectorizeLoadInsert(I);
break;
case Instruction::ShuffleVector:
- MadeChange |= widenSubvectorLoad(I, CostKind);
+ MadeChange |= widenSubvectorLoad(I);
break;
default:
break;
@@ -2889,9 +2867,9 @@ bool VectorCombine::run() {
// This transform works with scalable and fixed vectors
// TODO: Identify and allow other scalable transforms
if (isa<VectorType>(I.getType())) {
- MadeChange |= scalarizeBinopOrCmp(I, CostKind);
- MadeChange |= scalarizeLoadExtract(I, CostKind);
- MadeChange |= scalarizeVPIntrinsic(I, CostKind);
+ MadeChange |= scalarizeBinopOrCmp(I);
+ MadeChange |= scalarizeLoadExtract(I);
+ MadeChange |= scalarizeVPIntrinsic(I);
}
if (Opcode == Instruction::Store)
@@ -2908,39 +2886,39 @@ bool VectorCombine::run() {
if (IsFixedVectorType) {
switch (Opcode) {
case Instruction::InsertElement:
- MadeChange |= foldInsExtFNeg(I, CostKind);
- MadeChange |= foldInsExtVectorToShuffle(I, CostKind);
+ MadeChange |= foldInsExtFNeg(I);
+ MadeChange |= foldInsExtVectorToShuffle(I);
break;
case Instruction::ShuffleVector:
- MadeChange |= foldPermuteOfBinops(I, CostKind);
- MadeChange |= foldShuffleOfBinops(I, CostKind);
- MadeChange |= foldShuffleOfCastops(I, CostKind);
- MadeChange |= foldShuffleOfShuffles(I, CostKind);
- MadeChange |= foldShuffleOfIntrinsics(I, CostKind);
- MadeChange |= foldSelectShuffle(I, CostKind);
- MadeChange |= foldShuffleToIdentity(I, CostKind);
+ MadeChange |= foldPermuteOfBinops(I);
+ MadeChange |= foldShuffleOfBinops(I);
+ MadeChange |= foldShuffleOfCastops(I);
+ MadeChange |= foldShuffleOfShuffles(I);
+ MadeChange |= foldShuffleOfIntrinsics(I);
+ MadeChange |= foldSelectShuffle(I);
+ MadeChange |= foldShuffleToIdentity(I);
break;
case Instruction::BitCast:
- MadeChange |= foldBitcastShuffle(I, CostKind);
+ MadeChange |= foldBitcastShuffle(I);
break;
default:
- MadeChange |= shrinkType(I, CostKind);
+ MadeChange |= shrinkType(I);
break;
}
} else {
switch (Opcode) {
case Instruction::Call:
MadeChange |= foldShuffleFromReductions(I);
- MadeChange |= foldCastFromReductions(I, CostKind);
+ MadeChange |= foldCastFromReductions(I);
break;
case Instruction::ICmp:
case Instruction::FCmp:
- MadeChange |= foldExtractExtract(I, CostKind);
+ MadeChange |= foldExtractExtract(I);
break;
default:
if (Instruction::isBinaryOp(Opcode)) {
- MadeChange |= foldExtractExtract(I, CostKind);
- MadeChange |= foldExtractedCmps(I, CostKind);
+ MadeChange |= foldExtractExtract(I);
+ MadeChange |= foldExtractedCmps(I);
}
break;
}
@@ -2982,7 +2960,8 @@ PreservedAnalyses VectorCombinePass::run(Function &F,
DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F);
AAResults &AA = FAM.getResult<AAManager>(F);
const DataLayout *DL = &F.getDataLayout();
- VectorCombine Combiner(F, TTI, DT, AA, AC, DL, TryEarlyFoldsOnly);
+ VectorCombine Combiner(F, TTI, DT, AA, AC, DL, TTI::TCK_RecipThroughput,
+ TryEarlyFoldsOnly);
if (!Combiner.run())
return PreservedAnalyses::all();
PreservedAnalyses PA;
More information about the llvm-commits
mailing list