[llvm] e3056ae - [NFC][TTI] Explicit use of VectorType
Sam Parker via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 20 01:18:59 PDT 2020
Author: Sam Parker
Date: 2020-04-20T09:16:52+01:00
New Revision: e3056ae9a05b823b77a8a32e245d5dea7b9fcef1
URL: https://github.com/llvm/llvm-project/commit/e3056ae9a05b823b77a8a32e245d5dea7b9fcef1
DIFF: https://github.com/llvm/llvm-project/commit/e3056ae9a05b823b77a8a32e245d5dea7b9fcef1.diff
LOG: [NFC][TTI] Explicit use of VectorType
The API for shuffles and reductions uses generic Type parameters,
instead of VectorType, and so assertions and casts are used a lot.
This patch makes those types explicit, which means that the clients
can't be lazy, but results in less ambiguity, and that can only be a
good thing.
Bugzilla: https://bugs.llvm.org/show_bug.cgi?id=45562
Differential Revision: https://reviews.llvm.org/D78357
Added:
Modified:
llvm/include/llvm/Analysis/TargetTransformInfo.h
llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
llvm/include/llvm/CodeGen/BasicTTIImpl.h
llvm/lib/Analysis/TargetTransformInfo.cpp
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
llvm/lib/Target/ARM/ARMTargetTransformInfo.h
llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
llvm/lib/Target/X86/X86TargetTransformInfo.cpp
llvm/lib/Target/X86/X86TargetTransformInfo.h
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/lib/Transforms/Vectorize/VectorCombine.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 89c7c9792629..cc89bf30c2e9 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -910,8 +910,8 @@ class TargetTransformInfo {
/// extraction shuffle kinds to show the insert/extract point and the type of
/// the subvector being inserted/extracted.
/// NOTE: For subvector extractions Tp represents the source type.
- int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index = 0,
- Type *SubTp = nullptr) const;
+ int getShuffleCost(ShuffleKind Kind, VectorType *Tp, int Index = 0,
+ VectorType *SubTp = nullptr) const;
/// \return The expected cost of cast instructions, such as bitcast, trunc,
/// zext, etc. If there is an existing instruction that holds Opcode, it
@@ -989,10 +989,10 @@ class TargetTransformInfo {
/// Split:
/// (v0, v1, v2, v3)
/// ((v0+v2), (v1+v3), undef, undef)
- int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
+ int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
bool IsPairwiseForm) const;
- int getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwiseForm,
- bool IsUnsigned) const;
+ int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
+ bool IsPairwiseForm, bool IsUnsigned) const;
/// \returns The cost of Intrinsic instructions. Analyses the real arguments.
/// Three cases are handled: 1. scalar instruction 2. vector instruction
@@ -1332,8 +1332,8 @@ class TargetTransformInfo::Concept {
OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
const Instruction *CxtI = nullptr) = 0;
- virtual int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
- Type *SubTp) = 0;
+ virtual int getShuffleCost(ShuffleKind Kind, VectorType *Tp, int Index,
+ VectorType *SubTp) = 0;
virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
const Instruction *I) = 0;
virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst,
@@ -1356,9 +1356,9 @@ class TargetTransformInfo::Concept {
ArrayRef<unsigned> Indices, unsigned Alignment,
unsigned AddressSpace, bool UseMaskForCond = false,
bool UseMaskForGaps = false) = 0;
- virtual int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
+ virtual int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
bool IsPairwiseForm) = 0;
- virtual int getMinMaxReductionCost(Type *Ty, Type *CondTy,
+ virtual int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
bool IsPairwiseForm, bool IsUnsigned) = 0;
virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Type *> Tys, FastMathFlags FMF,
@@ -1731,8 +1731,8 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
return Impl.getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
Opd1PropInfo, Opd2PropInfo, Args, CxtI);
}
- int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
- Type *SubTp) override {
+ int getShuffleCost(ShuffleKind Kind, VectorType *Tp, int Index,
+ VectorType *SubTp) override {
return Impl.getShuffleCost(Kind, Tp, Index, SubTp);
}
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
@@ -1775,12 +1775,12 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
Alignment, AddressSpace,
UseMaskForCond, UseMaskForGaps);
}
- int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
+ int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
bool IsPairwiseForm) override {
return Impl.getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm);
}
- int getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwiseForm,
- bool IsUnsigned) override {
+ int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
+ bool IsPairwiseForm, bool IsUnsigned) override {
return Impl.getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned);
}
int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys,
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index a36c805f3296..35853f540b6c 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -438,8 +438,8 @@ class TargetTransformInfoImplBase {
return 1;
}
- unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Ty, int Index,
- Type *SubTp) {
+ unsigned getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty, int Index,
+ VectorType *SubTp) {
return 1;
}
@@ -512,9 +512,9 @@ class TargetTransformInfoImplBase {
return 0;
}
- unsigned getArithmeticReductionCost(unsigned, Type *, bool) { return 1; }
+ unsigned getArithmeticReductionCost(unsigned, VectorType *, bool) { return 1; }
- unsigned getMinMaxReductionCost(Type *, Type *, bool, bool) { return 1; }
+ unsigned getMinMaxReductionCost(VectorType *, VectorType *, bool, bool) { return 1; }
unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) { return 0; }
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 7646c2196941..010a43dfe01f 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -80,8 +80,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
/// Estimate a cost of Broadcast as an extract and sequence of insert
/// operations.
- unsigned getBroadcastShuffleOverhead(Type *Ty) {
- auto *VTy = cast<VectorType>(Ty);
+ unsigned getBroadcastShuffleOverhead(VectorType *VTy) {
unsigned Cost = 0;
// Broadcast cost is equal to the cost of extracting the zero'th element
// plus the cost of inserting it into every element of the result vector.
@@ -97,8 +96,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
/// Estimate a cost of shuffle as a sequence of extract and insert
/// operations.
- unsigned getPermuteShuffleOverhead(Type *Ty) {
- auto *VTy = cast<VectorType>(Ty);
+ unsigned getPermuteShuffleOverhead(VectorType *VTy) {
unsigned Cost = 0;
// Shuffle cost is equal to the cost of extracting element from its argument
// plus the cost of inserting them onto the result vector.
@@ -118,11 +116,10 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
/// Estimate a cost of subvector extraction as a sequence of extract and
/// insert operations.
- unsigned getExtractSubvectorOverhead(Type *Ty, int Index, Type *SubTy) {
- assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() &&
+ unsigned getExtractSubvectorOverhead(VectorType *VTy, int Index,
+ VectorType *SubVTy) {
+ assert(VTy && SubVTy &&
"Can only extract subvectors from vectors");
- auto *VTy = cast<VectorType>(Ty);
- auto *SubVTy = cast<VectorType>(SubTy);
int NumSubElts = SubVTy->getNumElements();
assert((Index + NumSubElts) <= (int)VTy->getNumElements() &&
"SK_ExtractSubvector index out of range");
@@ -142,11 +139,10 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
/// Estimate a cost of subvector insertion as a sequence of extract and
/// insert operations.
- unsigned getInsertSubvectorOverhead(Type *Ty, int Index, Type *SubTy) {
- assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() &&
+ unsigned getInsertSubvectorOverhead(VectorType *VTy, int Index,
+ VectorType *SubVTy) {
+ assert(VTy && SubVTy &&
"Can only insert subvectors into vectors");
- auto *VTy = cast<VectorType>(Ty);
- auto *SubVTy = cast<VectorType>(SubTy);
int NumSubElts = SubVTy->getNumElements();
assert((Index + NumSubElts) <= (int)VTy->getNumElements() &&
"SK_InsertSubvector index out of range");
@@ -683,8 +679,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
return OpCost;
}
- unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
- Type *SubTp) {
+ unsigned getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index,
+ VectorType *SubTp) {
switch (Kind) {
case TTI::SK_Broadcast:
return getBroadcastShuffleOverhead(Tp);
@@ -1198,6 +1194,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
unsigned ScalarizationCostPassed = std::numeric_limits<unsigned>::max(),
const Instruction *I = nullptr) {
auto *ConcreteTTI = static_cast<T *>(this);
+ auto *VecOpTy = Tys.empty() ? nullptr : dyn_cast<VectorType>(Tys[0]);
SmallVector<unsigned, 2> ISDs;
unsigned SingleCallCost = 10; // Library call cost. Make it expensive.
@@ -1320,28 +1317,28 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
case Intrinsic::masked_load:
return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0);
case Intrinsic::experimental_vector_reduce_add:
- return ConcreteTTI->getArithmeticReductionCost(Instruction::Add, Tys[0],
+ return ConcreteTTI->getArithmeticReductionCost(Instruction::Add, VecOpTy,
/*IsPairwiseForm=*/false);
case Intrinsic::experimental_vector_reduce_mul:
- return ConcreteTTI->getArithmeticReductionCost(Instruction::Mul, Tys[0],
+ return ConcreteTTI->getArithmeticReductionCost(Instruction::Mul, VecOpTy,
/*IsPairwiseForm=*/false);
case Intrinsic::experimental_vector_reduce_and:
- return ConcreteTTI->getArithmeticReductionCost(Instruction::And, Tys[0],
+ return ConcreteTTI->getArithmeticReductionCost(Instruction::And, VecOpTy,
/*IsPairwiseForm=*/false);
case Intrinsic::experimental_vector_reduce_or:
- return ConcreteTTI->getArithmeticReductionCost(Instruction::Or, Tys[0],
+ return ConcreteTTI->getArithmeticReductionCost(Instruction::Or, VecOpTy,
/*IsPairwiseForm=*/false);
case Intrinsic::experimental_vector_reduce_xor:
- return ConcreteTTI->getArithmeticReductionCost(Instruction::Xor, Tys[0],
+ return ConcreteTTI->getArithmeticReductionCost(Instruction::Xor, VecOpTy,
/*IsPairwiseForm=*/false);
case Intrinsic::experimental_vector_reduce_v2_fadd:
return ConcreteTTI->getArithmeticReductionCost(
- Instruction::FAdd, Tys[0],
+ Instruction::FAdd, VecOpTy,
/*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict
// reductions.
case Intrinsic::experimental_vector_reduce_v2_fmul:
return ConcreteTTI->getArithmeticReductionCost(
- Instruction::FMul, Tys[0],
+ Instruction::FMul, VecOpTy,
/*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict
// reductions.
case Intrinsic::experimental_vector_reduce_smax:
@@ -1349,12 +1346,14 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
case Intrinsic::experimental_vector_reduce_fmax:
case Intrinsic::experimental_vector_reduce_fmin:
return ConcreteTTI->getMinMaxReductionCost(
- Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
+ VecOpTy, cast<VectorType>(CmpInst::makeCmpResultType(VecOpTy)),
+ /*IsPairwiseForm=*/false,
/*IsUnsigned=*/false);
case Intrinsic::experimental_vector_reduce_umax:
case Intrinsic::experimental_vector_reduce_umin:
return ConcreteTTI->getMinMaxReductionCost(
- Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
+ VecOpTy, cast<VectorType>(CmpInst::makeCmpResultType(VecOpTy)),
+ /*IsPairwiseForm=*/false,
/*IsUnsigned=*/true);
case Intrinsic::sadd_sat:
case Intrinsic::ssub_sat: {
@@ -1639,11 +1638,10 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
///
/// The cost model should take into account that the actual length of the
/// vector is reduced on each iteration.
- unsigned getArithmeticReductionCost(unsigned Opcode, Type *Ty,
+ unsigned getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
bool IsPairwise) {
- assert(Ty->isVectorTy() && "Expect a vector type");
- Type *ScalarTy = cast<VectorType>(Ty)->getElementType();
- unsigned NumVecElts = cast<VectorType>(Ty)->getNumElements();
+ Type *ScalarTy = Ty->getElementType();
+ unsigned NumVecElts = Ty->getNumElements();
unsigned NumReduxLevels = Log2_32(NumVecElts);
unsigned ArithCost = 0;
unsigned ShuffleCost = 0;
@@ -1655,7 +1653,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
while (NumVecElts > MVTLen) {
NumVecElts /= 2;
- Type *SubTy = VectorType::get(ScalarTy, NumVecElts);
+ VectorType *SubTy = VectorType::get(ScalarTy, NumVecElts);
// Assume the pairwise shuffles add a cost.
ShuffleCost += (IsPairwise + 1) *
ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
@@ -1689,12 +1687,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
/// Try to calculate op costs for min/max reduction operations.
/// \param CondTy Conditional type for the Select instruction.
- unsigned getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwise,
- bool) {
- assert(Ty->isVectorTy() && "Expect a vector type");
- Type *ScalarTy = cast<VectorType>(Ty)->getElementType();
- Type *ScalarCondTy = cast<VectorType>(CondTy)->getElementType();
- unsigned NumVecElts = cast<VectorType>(Ty)->getNumElements();
+ unsigned getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
+ bool IsPairwise, bool) {
+ Type *ScalarTy = Ty->getElementType();
+ Type *ScalarCondTy = CondTy->getElementType();
+ unsigned NumVecElts = Ty->getNumElements();
unsigned NumReduxLevels = Log2_32(NumVecElts);
unsigned CmpOpcode;
if (Ty->isFPOrFPVectorTy()) {
@@ -1714,7 +1711,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
while (NumVecElts > MVTLen) {
NumVecElts /= 2;
- Type *SubTy = VectorType::get(ScalarTy, NumVecElts);
+ VectorType *SubTy = VectorType::get(ScalarTy, NumVecElts);
CondTy = VectorType::get(ScalarCondTy, NumVecElts);
// Assume the pairwise shuffles add a cost.
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index e8d15b730bdc..4370583eee40 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -599,8 +599,8 @@ int TargetTransformInfo::getArithmeticInstrCost(
return Cost;
}
-int TargetTransformInfo::getShuffleCost(ShuffleKind Kind, Type *Ty, int Index,
- Type *SubTp) const {
+int TargetTransformInfo::getShuffleCost(ShuffleKind Kind, VectorType *Ty,
+ int Index, VectorType *SubTp) const {
int Cost = TTIImpl->getShuffleCost(Kind, Ty, Index, SubTp);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
@@ -732,14 +732,16 @@ int TargetTransformInfo::getMemcpyCost(const Instruction *I) const {
return Cost;
}
-int TargetTransformInfo::getArithmeticReductionCost(unsigned Opcode, Type *Ty,
+int TargetTransformInfo::getArithmeticReductionCost(unsigned Opcode,
+ VectorType *Ty,
bool IsPairwiseForm) const {
int Cost = TTIImpl->getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
-int TargetTransformInfo::getMinMaxReductionCost(Type *Ty, Type *CondTy,
+int TargetTransformInfo::getMinMaxReductionCost(VectorType *Ty,
+ VectorType *CondTy,
bool IsPairwiseForm,
bool IsUnsigned) const {
int Cost =
@@ -1011,7 +1013,8 @@ static ReductionKind matchPairwiseReductionAtLevel(Instruction *I,
}
static ReductionKind matchPairwiseReduction(const ExtractElementInst *ReduxRoot,
- unsigned &Opcode, Type *&Ty) {
+ unsigned &Opcode,
+ VectorType *&Ty) {
if (!EnableReduxCost)
return RK_None;
@@ -1076,7 +1079,7 @@ getShuffleAndOtherOprd(Value *L, Value *R) {
static ReductionKind
matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot,
- unsigned &Opcode, Type *&Ty) {
+ unsigned &Opcode, VectorType *&Ty) {
if (!EnableReduxCost)
return RK_None;
@@ -1249,7 +1252,7 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
// Try to match a reduction sequence (series of shufflevector and vector
// adds followed by a extractelement).
unsigned ReduxOpCode;
- Type *ReduxType;
+ VectorType *ReduxType;
switch (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType)) {
case RK_Arithmetic:
@@ -1257,11 +1260,11 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
/*IsPairwiseForm=*/false);
case RK_MinMax:
return getMinMaxReductionCost(
- ReduxType, CmpInst::makeCmpResultType(ReduxType),
+ ReduxType, cast<VectorType>(CmpInst::makeCmpResultType(ReduxType)),
/*IsPairwiseForm=*/false, /*IsUnsigned=*/false);
case RK_UnsignedMinMax:
return getMinMaxReductionCost(
- ReduxType, CmpInst::makeCmpResultType(ReduxType),
+ ReduxType, cast<VectorType>(CmpInst::makeCmpResultType(ReduxType)),
/*IsPairwiseForm=*/false, /*IsUnsigned=*/true);
case RK_None:
break;
@@ -1273,11 +1276,11 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
/*IsPairwiseForm=*/true);
case RK_MinMax:
return getMinMaxReductionCost(
- ReduxType, CmpInst::makeCmpResultType(ReduxType),
+ ReduxType, cast<VectorType>(CmpInst::makeCmpResultType(ReduxType)),
/*IsPairwiseForm=*/true, /*IsUnsigned=*/false);
case RK_UnsignedMinMax:
return getMinMaxReductionCost(
- ReduxType, CmpInst::makeCmpResultType(ReduxType),
+ ReduxType, cast<VectorType>(CmpInst::makeCmpResultType(ReduxType)),
/*IsPairwiseForm=*/true, /*IsUnsigned=*/true);
case RK_None:
break;
@@ -1298,8 +1301,8 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
return 0; // Model all ExtractValue nodes as free.
case Instruction::ShuffleVector: {
const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
- Type *Ty = Shuffle->getType();
- Type *SrcTy = Shuffle->getOperand(0)->getType();
+ auto *Ty = cast<VectorType>(Shuffle->getType());
+ auto *SrcTy = cast<VectorType>(Shuffle->getOperand(0)->getType());
// TODO: Identify and add costs for insert subvector, etc.
int SubIndex;
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index cd8b71767997..e67b0e671908 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -930,7 +930,8 @@ bool AArch64TTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty,
return false;
}
-int AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
+int AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode,
+ VectorType *ValTy,
bool IsPairwiseForm) {
if (IsPairwiseForm)
@@ -958,8 +959,8 @@ int AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm);
}
-int AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
- Type *SubTp) {
+int AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
+ int Index, VectorType *SubTp) {
if (Kind == TTI::SK_Broadcast || Kind == TTI::SK_Transpose ||
Kind == TTI::SK_Select || Kind == TTI::SK_PermuteSingleSrc) {
static const CostTblEntry ShuffleTbl[] = {
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index a47f87a7bbcf..b676f63fe0a8 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -224,10 +224,11 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
TTI::ReductionFlags Flags) const;
- int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
+ int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
bool IsPairwiseForm);
- int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
+ int getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index,
+ VectorType *SubTp);
/// @}
};
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index d9f004af30dc..402f55722ac1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -620,8 +620,8 @@ unsigned GCNTTIImpl::getCFInstrCost(unsigned Opcode) {
}
}
-int GCNTTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *Ty,
- bool IsPairwise) {
+int GCNTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
+ bool IsPairwise) {
EVT OrigTy = TLI->getValueType(DL, Ty);
// Computes cost on targets that have packed math instructions(which support
@@ -635,7 +635,7 @@ int GCNTTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *Ty,
return LT.first * getFullRateInstrCost();
}
-int GCNTTIImpl::getMinMaxReductionCost(Type *Ty, Type *CondTy,
+int GCNTTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
bool IsPairwise,
bool IsUnsigned) {
EVT OrigTy = TLI->getValueType(DL, Ty);
@@ -899,10 +899,9 @@ bool GCNTTIImpl::rewriteIntrinsicWithAddressSpace(
}
}
-unsigned GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
- Type *SubTp) {
+unsigned GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *VT,
+ int Index, VectorType *SubTp) {
if (ST->hasVOP3PInsts()) {
- VectorType *VT = cast<VectorType>(Tp);
if (VT->getNumElements() == 2 &&
DL.getTypeSizeInBits(VT->getElementType()) == 16) {
// With op_sel VOP3P instructions freely can access the low half or high
@@ -919,7 +918,7 @@ unsigned GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
}
}
- return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
+ return BaseT::getShuffleCost(Kind, VT, Index, SubTp);
}
bool GCNTTIImpl::areInlineCompatible(const Function *Caller,
@@ -986,8 +985,8 @@ unsigned GCNTTIImpl::getUserCost(const User *U,
}
case Instruction::ShuffleVector: {
const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
- Type *Ty = Shuffle->getType();
- Type *SrcTy = Shuffle->getOperand(0)->getType();
+ auto *Ty = cast<VectorType>(Shuffle->getType());
+ auto *SrcTy = cast<VectorType>(Shuffle->getOperand(0)->getType());
// TODO: Identify and add costs for insert subvector, etc.
int SubIndex;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index 1313e2f8f576..a009837b66ee 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -215,8 +215,8 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
unsigned getVectorSplitCost() { return 0; }
- unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
- Type *SubTp);
+ unsigned getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index,
+ VectorType *SubTp);
bool areInlineCompatible(const Function *Caller,
const Function *Callee) const;
@@ -226,7 +226,7 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
int getInlinerVectorBonusPercent() { return 0; }
int getArithmeticReductionCost(unsigned Opcode,
- Type *Ty,
+ VectorType *Ty,
bool IsPairwise);
template <typename T>
int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef<T *> Args,
@@ -239,7 +239,7 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<Value *> Args, FastMathFlags FMF,
unsigned VF = 1, const Instruction *I = nullptr);
- int getMinMaxReductionCost(Type *Ty, Type *CondTy,
+ int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
bool IsPairwiseForm,
bool IsUnsigned);
unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands);
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 1c845669fed5..34d819696e1a 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -587,8 +587,8 @@ int ARMTTIImpl::getMemcpyCost(const Instruction *I) {
return LibCallCost;
}
-int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
- Type *SubTp) {
+int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
+ int Index, VectorType *SubTp) {
if (ST->hasNEON()) {
if (Kind == TTI::SK_Broadcast) {
static const CostTblEntry NEONDupTbl[] = {
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index f21b348b6476..5f7d17d75d71 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -164,7 +164,8 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
int getMemcpyCost(const Instruction *I);
- int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
+ int getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index,
+ VectorType *SubTp);
bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
TTI::ReductionFlags Flags) const;
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index b3fcbfbf3e63..d597dcf79517 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -526,9 +526,8 @@ int SystemZTTIImpl::getArithmeticInstrCost(
Opd1PropInfo, Opd2PropInfo, Args, CxtI);
}
-int SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
- Type *SubTp) {
- assert (Tp->isVectorTy());
+int SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
+ int Index, VectorType *SubTp) {
if (ST->hasVector()) {
unsigned NumVectors = getNumVectorRegs(Tp);
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
index c6e3b36bd98e..b4dc53bdc70a 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
@@ -81,7 +81,8 @@ class SystemZTTIImpl : public BasicTTIImplBase<SystemZTTIImpl> {
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
const Instruction *CxtI = nullptr);
- int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
+ int getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index,
+ VectorType *SubTp);
unsigned getVectorTruncCost(Type *SrcTy, Type *DstTy);
unsigned getVectorBitmaskConversionCost(Type *SrcTy, Type *DstTy);
unsigned getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst,
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 4530a6a0ab46..79458eb01856 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -925,12 +925,11 @@ int X86TTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info);
}
-int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *BaseTp, int Index,
- Type *SubTp) {
- auto *Tp = cast<VectorType>(BaseTp);
+int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *BaseTp,
+ int Index, VectorType *SubTp) {
// 64-bit packed float vectors (v2f32) are widened to type v4f32.
// 64-bit packed integer vectors (v2i32) are widened to type v4i32.
- std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, BaseTp);
// Treat Transpose as 2-op shuffles - there's no
diff erence in lowering.
if (Kind == TTI::SK_Transpose)
@@ -965,13 +964,14 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *BaseTp, int Index,
LT.second.getVectorElementType() ==
SubLT.second.getVectorElementType() &&
LT.second.getVectorElementType().getSizeInBits() ==
- Tp->getElementType()->getPrimitiveSizeInBits()) {
+ BaseTp->getElementType()->getPrimitiveSizeInBits()) {
assert(NumElts >= NumSubElts && NumElts > OrigSubElts &&
"Unexpected number of elements!");
- Type *VecTy = VectorType::get(Tp->getElementType(),
- LT.second.getVectorNumElements());
- Type *SubTy = VectorType::get(Tp->getElementType(),
- SubLT.second.getVectorNumElements());
+ VectorType *VecTy = VectorType::get(BaseTp->getElementType(),
+ LT.second.getVectorNumElements());
+ VectorType *SubTy =
+ VectorType::get(BaseTp->getElementType(),
+ SubLT.second.getVectorNumElements());
int ExtractIndex = alignDown((Index % NumElts), NumSubElts);
int ExtractCost = getShuffleCost(TTI::SK_ExtractSubvector, VecTy,
ExtractIndex, SubTy);
@@ -991,7 +991,7 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *BaseTp, int Index,
// Handle some common (illegal) sub-vector types as they are often very cheap
// to shuffle even on targets without PSHUFB.
- EVT VT = TLI->getValueType(DL, Tp);
+ EVT VT = TLI->getValueType(DL, BaseTp);
if (VT.isSimple() && VT.isVector() && VT.getSizeInBits() < 128 &&
!ST->hasSSSE3()) {
static const CostTblEntry SSE2SubVectorShuffleTbl[] = {
@@ -1032,25 +1032,26 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *BaseTp, int Index,
MVT LegalVT = LT.second;
if (LegalVT.isVector() &&
LegalVT.getVectorElementType().getSizeInBits() ==
- Tp->getElementType()->getPrimitiveSizeInBits() &&
- LegalVT.getVectorNumElements() < Tp->getNumElements()) {
+ BaseTp->getElementType()->getPrimitiveSizeInBits() &&
+ LegalVT.getVectorNumElements() < BaseTp->getNumElements()) {
- unsigned VecTySize = DL.getTypeStoreSize(Tp);
+ unsigned VecTySize = DL.getTypeStoreSize(BaseTp);
unsigned LegalVTSize = LegalVT.getStoreSize();
// Number of source vectors after legalization:
unsigned NumOfSrcs = (VecTySize + LegalVTSize - 1) / LegalVTSize;
// Number of destination vectors after legalization:
unsigned NumOfDests = LT.first;
- Type *SingleOpTy =
- VectorType::get(Tp->getElementType(), LegalVT.getVectorNumElements());
+ VectorType *SingleOpTy =
+ VectorType::get(BaseTp->getElementType(),
+ LegalVT.getVectorNumElements());
unsigned NumOfShuffles = (NumOfSrcs - 1) * NumOfDests;
return NumOfShuffles *
getShuffleCost(TTI::SK_PermuteTwoSrc, SingleOpTy, 0, nullptr);
}
- return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
+ return BaseT::getShuffleCost(Kind, BaseTp, Index, SubTp);
}
// For 2-input shuffles, we must account for splitting the 2 inputs into many.
@@ -1352,7 +1353,7 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *BaseTp, int Index,
if (const auto *Entry = CostTableLookup(SSE1ShuffleTbl, Kind, LT.second))
return LT.first * Entry->Cost;
- return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
+ return BaseT::getShuffleCost(Kind, BaseTp, Index, SubTp);
}
int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
@@ -2648,7 +2649,7 @@ int X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
// TODO: Under what circumstances should we shuffle using the full width?
int ShuffleCost = 1;
if (Opcode == Instruction::InsertElement) {
- Type *SubTy = Val;
+ auto *SubTy = cast<VectorType>(Val);
EVT VT = TLI->getValueType(DL, Val);
if (VT.getScalarType() != MScalarTy || VT.getSizeInBits() >= 128)
SubTy = VectorType::get(ScalarType, SubNumElts);
@@ -2796,7 +2797,7 @@ int X86TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
return BaseT::getAddressComputationCost(Ty, SE, Ptr);
}
-int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
+int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
bool IsPairwise) {
// Just use the default implementation for pair reductions.
if (IsPairwise)
@@ -2868,7 +2869,7 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
if (LT.first != 1 && MTy.isVector() &&
MTy.getVectorNumElements() < ValVTy->getNumElements()) {
// Type needs to be split. We need LT.first - 1 arithmetic ops.
- Type *SingleOpTy =
+ VectorType *SingleOpTy =
VectorType::get(ValVTy->getElementType(), MTy.getVectorNumElements());
ArithmeticCost = getArithmeticInstrCost(Opcode, SingleOpTy);
ArithmeticCost *= LT.first - 1;
@@ -3150,7 +3151,7 @@ int X86TTIImpl::getMinMaxCost(Type *Ty, Type *CondTy, bool IsUnsigned) {
getCmpSelInstrCost(Instruction::Select, Ty, CondTy, nullptr);
}
-int X86TTIImpl::getMinMaxReductionCost(Type *ValTy, Type *CondTy,
+int X86TTIImpl::getMinMaxReductionCost(VectorType *ValTy, VectorType *CondTy,
bool IsPairwise, bool IsUnsigned) {
// Just use the default implementation for pair reductions.
if (IsPairwise)
@@ -3241,7 +3242,7 @@ int X86TTIImpl::getMinMaxReductionCost(Type *ValTy, Type *CondTy,
MTy.getVectorNumElements() < ValVTy->getNumElements()) {
// Type needs to be split. We need LT.first - 1 operations ops.
Ty = VectorType::get(ValVTy->getElementType(), MTy.getVectorNumElements());
- Type *SubCondTy = VectorType::get(
+ auto *SubCondTy = VectorType::get(
cast<VectorType>(CondTy)->getElementType(), MTy.getVectorNumElements());
MinMaxCost = getMinMaxCost(Ty, SubCondTy, IsUnsigned);
MinMaxCost *= LT.first - 1;
@@ -3286,7 +3287,7 @@ int X86TTIImpl::getMinMaxReductionCost(Type *ValTy, Type *CondTy,
Ty = SubTy;
} else if (Size == 128) {
// Reducing from 128 bits is a permute of v2f64/v2i64.
- Type *ShufTy;
+ VectorType *ShufTy;
if (ValTy->isFloatingPointTy())
ShufTy = VectorType::get(Type::getDoubleTy(ValTy->getContext()), 2);
else
@@ -3295,7 +3296,7 @@ int X86TTIImpl::getMinMaxReductionCost(Type *ValTy, Type *CondTy,
getShuffleCost(TTI::SK_PermuteSingleSrc, ShufTy, 0, nullptr);
} else if (Size == 64) {
// Reducing from 64 bits is a shuffle of v4f32/v4i32.
- Type *ShufTy;
+ VectorType *ShufTy;
if (ValTy->isFloatingPointTy())
ShufTy = VectorType::get(Type::getFloatTy(ValTy->getContext()), 4);
else
@@ -3304,7 +3305,7 @@ int X86TTIImpl::getMinMaxReductionCost(Type *ValTy, Type *CondTy,
getShuffleCost(TTI::SK_PermuteSingleSrc, ShufTy, 0, nullptr);
} else {
// Reducing from smaller size is a shift by immediate.
- Type *ShiftTy = VectorType::get(
+ VectorType *ShiftTy = VectorType::get(
Type::getIntNTy(ValTy->getContext(), Size), 128 / Size);
MinMaxCost += getArithmeticInstrCost(
Instruction::LShr, ShiftTy, TargetTransformInfo::OK_AnyValue,
@@ -3313,8 +3314,8 @@ int X86TTIImpl::getMinMaxReductionCost(Type *ValTy, Type *CondTy,
}
// Add the arithmetic op for this level.
- auto *SubCondTy = VectorType::get(
- cast<VectorType>(CondTy)->getElementType(), Ty->getNumElements());
+ auto *SubCondTy = VectorType::get(CondTy->getElementType(),
+ Ty->getNumElements());
MinMaxCost += getMinMaxCost(Ty, SubCondTy, IsUnsigned);
}
@@ -4036,7 +4037,7 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy,
unsigned NumOfMemOps = (VecTySize + LegalVTSize - 1) / LegalVTSize;
// Get the cost of one memory operation.
- Type *SingleMemOpTy =
+ auto *SingleMemOpTy =
VectorType::get(cast<VectorType>(VecTy)->getElementType(),
LegalVT.getVectorNumElements());
unsigned MemOpCost = getMemoryOpCost(Opcode, SingleMemOpTy,
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h
index 476a1bfe87df..1b6f741ed4de 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -125,7 +125,8 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
const Instruction *CxtI = nullptr);
- int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
+ int getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index,
+ VectorType *SubTp);
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
const Instruction *I = nullptr);
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
@@ -152,13 +153,13 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
ArrayRef<Value *> Args, FastMathFlags FMF,
unsigned VF = 1, const Instruction *I = nullptr);
- int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
+ int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
bool IsPairwiseForm);
int getMinMaxCost(Type *Ty, Type *CondTy, bool IsUnsigned);
- int getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwiseForm,
- bool IsUnsigned);
+ int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
+ bool IsPairwiseForm, bool IsUnsigned);
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
unsigned Factor, ArrayRef<unsigned> Indices,
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 27e3c93f7af8..503389ec50a0 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5857,7 +5857,7 @@ unsigned LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
unsigned LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,
unsigned VF) {
Type *ValTy = getMemInstValueType(I);
- Type *VectorTy = ToVectorTy(ValTy, VF);
+ auto *VectorTy = cast<VectorType>(ToVectorTy(ValTy, VF));
Value *Ptr = getLoadStorePointerOperand(I);
unsigned AS = getLoadStoreAddressSpace(I);
int ConsecutiveStride = Legal->isConsecutivePtr(Ptr);
@@ -5881,7 +5881,7 @@ unsigned LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,
unsigned LoopVectorizationCostModel::getUniformMemOpCost(Instruction *I,
unsigned VF) {
Type *ValTy = getMemInstValueType(I);
- Type *VectorTy = ToVectorTy(ValTy, VF);
+ auto *VectorTy = cast<VectorType>(ToVectorTy(ValTy, VF));
const MaybeAlign Alignment = getLoadStoreAlignment(I);
unsigned AS = getLoadStoreAddressSpace(I);
if (isa<LoadInst>(I)) {
@@ -5903,7 +5903,7 @@ unsigned LoopVectorizationCostModel::getUniformMemOpCost(Instruction *I,
unsigned LoopVectorizationCostModel::getGatherScatterCost(Instruction *I,
unsigned VF) {
Type *ValTy = getMemInstValueType(I);
- Type *VectorTy = ToVectorTy(ValTy, VF);
+ auto *VectorTy = cast<VectorType>(ToVectorTy(ValTy, VF));
const MaybeAlign Alignment = getLoadStoreAlignment(I);
Value *Ptr = getLoadStorePointerOperand(I);
@@ -5916,14 +5916,14 @@ unsigned LoopVectorizationCostModel::getGatherScatterCost(Instruction *I,
unsigned LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I,
unsigned VF) {
Type *ValTy = getMemInstValueType(I);
- Type *VectorTy = ToVectorTy(ValTy, VF);
+ auto *VectorTy = cast<VectorType>(ToVectorTy(ValTy, VF));
unsigned AS = getLoadStoreAddressSpace(I);
auto Group = getInterleavedAccessGroup(I);
assert(Group && "Fail to get an interleaved access group.");
unsigned InterleaveFactor = Group->getFactor();
- Type *WideVecTy = VectorType::get(ValTy, VF * InterleaveFactor);
+ VectorType *WideVecTy = VectorType::get(ValTy, VF * InterleaveFactor);
// Holds the indices of existing members in an interleaved load group.
// An interleaved store group doesn't need this as it doesn't allow gaps.
@@ -6227,7 +6227,8 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
// NOTE: Don't use ToVectorTy as SK_ExtractSubvector expects a vector type.
if (VF > 1 && Legal->isFirstOrderRecurrence(Phi))
return TTI.getShuffleCost(TargetTransformInfo::SK_ExtractSubvector,
- VectorTy, VF - 1, VectorType::get(RetTy, 1));
+ cast<VectorType>(VectorTy), VF - 1,
+ VectorType::get(RetTy, 1));
// Phi nodes in non-header blocks (not inductions, reductions, etc.) are
// converted into select instructions. We require N - 1 selects per phi
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index fabe3e06bfeb..00560ba010a9 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1400,7 +1400,8 @@ class BoUpSLP {
/// \returns the scalarization cost for this type. Scalarization in this
/// context means the creation of vectors from a group of scalars.
- int getGatherCost(Type *Ty, const DenseSet<unsigned> &ShuffledIndices) const;
+ int getGatherCost(VectorType *Ty,
+ const DenseSet<unsigned> &ShuffledIndices) const;
/// \returns the scalarization cost for this list of values. Assuming that
/// this subtree gets vectorized, we may need to extract the values from the
@@ -3871,10 +3872,10 @@ int BoUpSLP::getTreeCost() {
return Cost;
}
-int BoUpSLP::getGatherCost(Type *Ty,
+int BoUpSLP::getGatherCost(VectorType *Ty,
const DenseSet<unsigned> &ShuffledIndices) const {
int Cost = 0;
- for (unsigned i = 0, e = cast<VectorType>(Ty)->getNumElements(); i < e; ++i)
+ for (unsigned i = 0, e = Ty->getNumElements(); i < e; ++i)
if (!ShuffledIndices.count(i))
Cost += TTI->getVectorInstrCost(Instruction::InsertElement, Ty, i);
if (!ShuffledIndices.empty())
@@ -6890,7 +6891,7 @@ class HorizontalReduction {
int getReductionCost(TargetTransformInfo *TTI, Value *FirstReducedVal,
unsigned ReduxWidth) {
Type *ScalarTy = FirstReducedVal->getType();
- Type *VecTy = VectorType::get(ScalarTy, ReduxWidth);
+ VectorType *VecTy = VectorType::get(ScalarTy, ReduxWidth);
int PairwiseRdxCost;
int SplittingRdxCost;
@@ -6907,7 +6908,7 @@ class HorizontalReduction {
case RK_Max:
case RK_UMin:
case RK_UMax: {
- Type *VecCondTy = CmpInst::makeCmpResultType(VecTy);
+ auto *VecCondTy = cast<VectorType>(CmpInst::makeCmpResultType(VecTy));
bool IsUnsigned = ReductionData.getKind() == RK_UMin ||
ReductionData.getKind() == RK_UMax;
PairwiseRdxCost =
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index b473d946b7ea..02f42a1b4fe6 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -58,7 +58,7 @@ static bool isExtractExtractCheap(Instruction *Ext0, Instruction *Ext1,
isa<ConstantInt>(Ext1->getOperand(1)) &&
"Expected constant extract indexes");
Type *ScalarTy = Ext0->getType();
- Type *VecTy = Ext0->getOperand(0)->getType();
+ auto *VecTy = cast<VectorType>(Ext0->getOperand(0)->getType());
int ScalarOpCost, VectorOpCost;
// Get cost estimates for scalar and vector versions of the operation.
More information about the llvm-commits
mailing list