[llvm] d2befc6 - [SVE] Eliminate calls to default-false VectorType::get() from Vectorize
Christopher Tetreault via llvm-commits
llvm-commits at lists.llvm.org
Fri May 29 11:31:43 PDT 2020
Author: Christopher Tetreault
Date: 2020-05-29T11:31:24-07:00
New Revision: d2befc66336d4d4c014be11d40e8ed6d3140fd36
URL: https://github.com/llvm/llvm-project/commit/d2befc66336d4d4c014be11d40e8ed6d3140fd36
DIFF: https://github.com/llvm/llvm-project/commit/d2befc66336d4d4c014be11d40e8ed6d3140fd36.diff
LOG: [SVE] Eliminate calls to default-false VectorType::get() from Vectorize
Reviewers: efriedma, c-rhodes, david-arm, fhahn
Reviewed By: david-arm
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D80339
Added:
Modified:
llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
index c02b8f889500..4885dd4351d5 100644
--- a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
@@ -1030,10 +1030,10 @@ bool Vectorizer::vectorizeStoreChain(
VectorType *VecTy;
VectorType *VecStoreTy = dyn_cast<VectorType>(StoreTy);
if (VecStoreTy)
- VecTy = VectorType::get(StoreTy->getScalarType(),
- Chain.size() * VecStoreTy->getNumElements());
+ VecTy = FixedVectorType::get(StoreTy->getScalarType(),
+ Chain.size() * VecStoreTy->getNumElements());
else
- VecTy = VectorType::get(StoreTy, Chain.size());
+ VecTy = FixedVectorType::get(StoreTy, Chain.size());
// If it's more than the max vector size or the target has a better
// vector factor, break it into two pieces.
@@ -1182,10 +1182,10 @@ bool Vectorizer::vectorizeLoadChain(
VectorType *VecTy;
VectorType *VecLoadTy = dyn_cast<VectorType>(LoadTy);
if (VecLoadTy)
- VecTy = VectorType::get(LoadTy->getScalarType(),
- Chain.size() * VecLoadTy->getNumElements());
+ VecTy = FixedVectorType::get(LoadTy->getScalarType(),
+ Chain.size() * VecLoadTy->getNumElements());
else
- VecTy = VectorType::get(LoadTy, Chain.size());
+ VecTy = FixedVectorType::get(LoadTy, Chain.size());
// If it's more than the max vector size or the target has a better
// vector factor, break it into two pieces.
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index bf19405cd4ee..eb8709a9d63f 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -767,7 +767,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// supported on the target.
if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
// Arbitrarily try a vector of 2 elements.
- Type *VecTy = VectorType::get(T, /*NumElements=*/2);
+ auto *VecTy = FixedVectorType::get(T, /*NumElements=*/2);
assert(VecTy && "did not find vectorized version of stored type");
if (!TTI->isLegalNTStore(VecTy, ST->getAlign())) {
reportVectorizationFailure(
@@ -782,7 +782,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
// For nontemporal loads, check that a nontemporal vector version is
// supported on the target (arbitrarily try a vector of 2 elements).
- Type *VecTy = VectorType::get(I.getType(), /*NumElements=*/2);
+ auto *VecTy = FixedVectorType::get(I.getType(), /*NumElements=*/2);
assert(VecTy && "did not find vectorized version of load type");
if (!TTI->isLegalNTLoad(VecTy, LD->getAlign())) {
reportVectorizationFailure(
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 8d52ddc5b3b5..5e5f029578f0 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -311,7 +311,7 @@ static bool hasIrregularType(Type *Ty, const DataLayout &DL, unsigned VF) {
// Determine if an array of VF elements of type Ty is "bitcast compatible"
// with a <VF x Ty> vector.
if (VF > 1) {
- auto *VectorTy = VectorType::get(Ty, VF);
+ auto *VectorTy = FixedVectorType::get(Ty, VF);
return VF * DL.getTypeAllocSize(Ty) != DL.getTypeStoreSize(VectorTy);
}
@@ -2074,7 +2074,7 @@ Value *InnerLoopVectorizer::getOrCreateVectorValue(Value *V, unsigned Part) {
VectorLoopValueMap.setVectorValue(V, Part, VectorValue);
} else {
// Initialize packing with insertelements to start from undef.
- Value *Undef = UndefValue::get(VectorType::get(V->getType(), VF));
+ Value *Undef = UndefValue::get(FixedVectorType::get(V->getType(), VF));
VectorLoopValueMap.setVectorValue(V, Part, Undef);
for (unsigned Lane = 0; Lane < VF; ++Lane)
packScalarIntoVectorValue(V, {Part, Lane});
@@ -2196,7 +2196,7 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
// Prepare for the vector type of the interleaved load/store.
Type *ScalarTy = getMemInstValueType(Instr);
unsigned InterleaveFactor = Group->getFactor();
- Type *VecTy = VectorType::get(ScalarTy, InterleaveFactor * VF);
+ auto *VecTy = FixedVectorType::get(ScalarTy, InterleaveFactor * VF);
// Prepare for the new pointers.
SmallVector<Value *, 2> AddrParts;
@@ -2300,7 +2300,7 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
// If this member has
diff erent type, cast the result type.
if (Member->getType() != ScalarTy) {
- VectorType *OtherVTy = VectorType::get(Member->getType(), VF);
+ VectorType *OtherVTy = FixedVectorType::get(Member->getType(), VF);
StridedVec = createBitOrPointerCast(StridedVec, OtherVTy, DL);
}
@@ -2314,7 +2314,7 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
}
// The sub vector type for current instruction.
- VectorType *SubVT = VectorType::get(ScalarTy, VF);
+ auto *SubVT = FixedVectorType::get(ScalarTy, VF);
// Vectorize the interleaved store group.
for (unsigned Part = 0; Part < UF; Part++) {
@@ -2385,7 +2385,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
"CM decision is not to widen the memory instruction");
Type *ScalarDataTy = getMemInstValueType(Instr);
- Type *DataTy = VectorType::get(ScalarDataTy, VF);
+ auto *DataTy = FixedVectorType::get(ScalarDataTy, VF);
const Align Alignment = getLoadStoreAlignment(Instr);
// Determine if the pointer operand of the access is either consecutive or
@@ -2688,7 +2688,7 @@ Value *InnerLoopVectorizer::createBitOrPointerCast(Value *V, VectorType *DstVTy,
"Only one type should be a floating point type");
Type *IntTy =
IntegerType::getIntNTy(V->getContext(), DL.getTypeSizeInBits(SrcElemTy));
- VectorType *VecIntTy = VectorType::get(IntTy, VF);
+ auto *VecIntTy = FixedVectorType::get(IntTy, VF);
Value *CastVal = Builder.CreateBitOrPointerCast(V, VecIntTy);
return Builder.CreateBitOrPointerCast(CastVal, DstVTy);
}
@@ -3359,7 +3359,7 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() {
Type *OriginalTy = I->getType();
Type *ScalarTruncatedTy =
IntegerType::get(OriginalTy->getContext(), KV.second);
- Type *TruncatedTy = VectorType::get(
+ auto *TruncatedTy = FixedVectorType::get(
ScalarTruncatedTy, cast<VectorType>(OriginalTy)->getNumElements());
if (TruncatedTy == OriginalTy)
continue;
@@ -3413,11 +3413,13 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() {
auto Elements0 =
cast<VectorType>(SI->getOperand(0)->getType())->getNumElements();
auto *O0 = B.CreateZExtOrTrunc(
- SI->getOperand(0), VectorType::get(ScalarTruncatedTy, Elements0));
+ SI->getOperand(0),
+ FixedVectorType::get(ScalarTruncatedTy, Elements0));
auto Elements1 =
cast<VectorType>(SI->getOperand(1)->getType())->getNumElements();
auto *O1 = B.CreateZExtOrTrunc(
- SI->getOperand(1), VectorType::get(ScalarTruncatedTy, Elements1));
+ SI->getOperand(1),
+ FixedVectorType::get(ScalarTruncatedTy, Elements1));
NewI = B.CreateShuffleVector(O0, O1, SI->getShuffleMask());
} else if (isa<LoadInst>(I) || isa<PHINode>(I)) {
@@ -3427,14 +3429,16 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() {
auto Elements =
cast<VectorType>(IE->getOperand(0)->getType())->getNumElements();
auto *O0 = B.CreateZExtOrTrunc(
- IE->getOperand(0), VectorType::get(ScalarTruncatedTy, Elements));
+ IE->getOperand(0),
+ FixedVectorType::get(ScalarTruncatedTy, Elements));
auto *O1 = B.CreateZExtOrTrunc(IE->getOperand(1), ScalarTruncatedTy);
NewI = B.CreateInsertElement(O0, O1, IE->getOperand(2));
} else if (auto *EE = dyn_cast<ExtractElementInst>(I)) {
auto Elements =
cast<VectorType>(EE->getOperand(0)->getType())->getNumElements();
auto *O0 = B.CreateZExtOrTrunc(
- EE->getOperand(0), VectorType::get(ScalarTruncatedTy, Elements));
+ EE->getOperand(0),
+ FixedVectorType::get(ScalarTruncatedTy, Elements));
NewI = B.CreateExtractElement(O0, EE->getOperand(2));
} else {
// If we don't know what to do, be conservative and don't do anything.
@@ -3598,8 +3602,8 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) {
if (VF > 1) {
Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator());
VectorInit = Builder.CreateInsertElement(
- UndefValue::get(VectorType::get(VectorInit->getType(), VF)), VectorInit,
- Builder.getInt32(VF - 1), "vector.recur.init");
+ UndefValue::get(FixedVectorType::get(VectorInit->getType(), VF)),
+ VectorInit, Builder.getInt32(VF - 1), "vector.recur.init");
}
// We constructed a temporary phi node in the first phase of vectorization.
@@ -3821,7 +3825,7 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
// then extend the loop exit value to enable InstCombine to evaluate the
// entire expression in the smaller type.
if (VF > 1 && Phi->getType() != RdxDesc.getRecurrenceType()) {
- Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), VF);
+ Type *RdxVecTy = FixedVectorType::get(RdxDesc.getRecurrenceType(), VF);
Builder.SetInsertPoint(
LI->getLoopFor(LoopVectorBody)->getLoopLatch()->getTerminator());
VectorParts RdxParts(UF);
@@ -4148,7 +4152,7 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, unsigned UF,
// Create a vector phi with no operands - the vector phi operands will be
// set at the end of vector code generation.
Type *VecTy =
- (VF == 1) ? PN->getType() : VectorType::get(PN->getType(), VF);
+ (VF == 1) ? PN->getType() : FixedVectorType::get(PN->getType(), VF);
Value *VecPhi = Builder.CreatePHI(VecTy, PN->getNumOperands(), "vec.phi");
VectorLoopValueMap.setVectorValue(P, 0, VecPhi);
OrigPHIsToFix.push_back(P);
@@ -4167,7 +4171,7 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, unsigned UF,
for (unsigned Part = 0; Part < UF; ++Part) {
// This is phase one of vectorizing PHIs.
Type *VecTy =
- (VF == 1) ? PN->getType() : VectorType::get(PN->getType(), VF);
+ (VF == 1) ? PN->getType() : FixedVectorType::get(PN->getType(), VF);
Value *EntryPart = PHINode::Create(
VecTy, 2, "vec.phi", &*LoopVectorBody->getFirstInsertionPt());
VectorLoopValueMap.setVectorValue(P, Part, EntryPart);
@@ -4327,7 +4331,7 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I, VPUser &User,
/// Vectorize casts.
Type *DestTy =
- (VF == 1) ? CI->getType() : VectorType::get(CI->getType(), VF);
+ (VF == 1) ? CI->getType() : FixedVectorType::get(CI->getType(), VF);
for (unsigned Part = 0; Part < UF; ++Part) {
Value *A = State.get(User.getOperand(0), Part);
@@ -4387,7 +4391,8 @@ void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPUser &ArgOperands,
// Use vector version of the intrinsic.
Type *TysForDecl[] = {CI->getType()};
if (VF > 1)
- TysForDecl[0] = VectorType::get(CI->getType()->getScalarType(), VF);
+ TysForDecl[0] =
+ FixedVectorType::get(CI->getType()->getScalarType(), VF);
VectorF = Intrinsic::getDeclaration(M, ID, TysForDecl);
assert(VectorF && "Can't retrieve vector intrinsic.");
} else {
@@ -5947,7 +5952,7 @@ unsigned LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I,
assert(Group && "Fail to get an interleaved access group.");
unsigned InterleaveFactor = Group->getFactor();
- VectorType *WideVecTy = VectorType::get(ValTy, VF * InterleaveFactor);
+ auto *WideVecTy = FixedVectorType::get(ValTy, VF * InterleaveFactor);
// Holds the indices of existing members in an interleaved load group.
// An interleaved store group doesn't need this as it doesn't allow gaps.
@@ -6349,7 +6354,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
bool ScalarCond = (SE->isLoopInvariant(CondSCEV, TheLoop));
Type *CondTy = SI->getCondition()->getType();
if (!ScalarCond)
- CondTy = VectorType::get(CondTy, VF);
+ CondTy = FixedVectorType::get(CondTy, VF);
return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy,
CostKind, I);
@@ -7510,8 +7515,8 @@ void VPReplicateRecipe::execute(VPTransformState &State) {
if (AlsoPack && State.VF > 1) {
// If we're constructing lane 0, initialize to start from undef.
if (State.Instance->Lane == 0) {
- Value *Undef =
- UndefValue::get(VectorType::get(Ingredient->getType(), State.VF));
+ Value *Undef = UndefValue::get(
+ FixedVectorType::get(Ingredient->getType(), State.VF));
State.ValueMap.setVectorValue(Ingredient, State.Instance->Part, Undef);
}
State.ILV->packScalarIntoVectorValue(Ingredient, *State.Instance);
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 1657b9e90115..4c18fab4ec09 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -3165,7 +3165,7 @@ unsigned BoUpSLP::canMapToVector(Type *T, const DataLayout &DL) const {
if (!isValidElementType(EltTy))
return 0;
- uint64_t VTSize = DL.getTypeStoreSizeInBits(VectorType::get(EltTy, N));
+ uint64_t VTSize = DL.getTypeStoreSizeInBits(FixedVectorType::get(EltTy, N));
if (VTSize < MinVecRegSize || VTSize > MaxVecRegSize || VTSize != DL.getTypeStoreSizeInBits(T))
return 0;
return N;
@@ -3265,7 +3265,7 @@ getVectorCallCosts(CallInst *CI, VectorType *VecTy, TargetTransformInfo *TTI,
SmallVector<Type *, 4> VecTys;
for (Use &Arg : CI->args())
VecTys.push_back(
- VectorType::get(Arg->getType(), VecTy->getNumElements()));
+ FixedVectorType::get(Arg->getType(), VecTy->getNumElements()));
// If the corresponding vector call is cheaper, return its cost.
LibCost = TTI->getCallInstrCost(nullptr, VecTy, VecTys,
@@ -3425,7 +3425,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
// Calculate the cost of this instruction.
int ScalarCost = VL.size() * ScalarEltCost;
- VectorType *SrcVecTy = VectorType::get(SrcTy, VL.size());
+ auto *SrcVecTy = FixedVectorType::get(SrcTy, VL.size());
int VecCost = 0;
// Check if the values are candidates to demote.
if (!MinBWs.count(VL0) || VecTy != SrcVecTy) {
@@ -3445,7 +3445,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
if (NeedToShuffleReuses) {
ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
}
- VectorType *MaskTy = VectorType::get(Builder.getInt1Ty(), VL.size());
+ auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(), VL.size());
int ScalarCost = VecTy->getNumElements() * ScalarEltCost;
int VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy,
CostKind, VL0);
@@ -3633,8 +3633,8 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
} else {
Type *Src0SclTy = E->getMainOp()->getOperand(0)->getType();
Type *Src1SclTy = E->getAltOp()->getOperand(0)->getType();
- VectorType *Src0Ty = VectorType::get(Src0SclTy, VL.size());
- VectorType *Src1Ty = VectorType::get(Src1SclTy, VL.size());
+ auto *Src0Ty = FixedVectorType::get(Src0SclTy, VL.size());
+ auto *Src1Ty = FixedVectorType::get(Src1SclTy, VL.size());
VecCost = TTI->getCastInstrCost(E->getOpcode(), VecTy, Src0Ty,
CostKind);
VecCost += TTI->getCastInstrCost(E->getAltOpcode(), VecTy, Src1Ty,
@@ -3807,7 +3807,7 @@ int BoUpSLP::getSpillCost() const {
if (NumCalls) {
SmallVector<Type*, 4> V;
for (auto *II : LiveValues)
- V.push_back(VectorType::get(II->getType(), BundleWidth));
+ V.push_back(FixedVectorType::get(II->getType(), BundleWidth));
Cost += NumCalls * TTI->getCostOfKeepingLiveOverCall(V);
}
@@ -4100,7 +4100,7 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {
else
VL = UniqueValues;
}
- VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
+ auto *VecTy = FixedVectorType::get(ScalarTy, VL.size());
Value *V = Gather(VL, VecTy);
if (!ReuseShuffleIndicies.empty()) {
@@ -4135,7 +4135,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
Type *ScalarTy = VL0->getType();
if (StoreInst *SI = dyn_cast<StoreInst>(VL0))
ScalarTy = SI->getValueOperand()->getType();
- VectorType *VecTy = VectorType::get(ScalarTy, E->Scalars.size());
+ auto *VecTy = FixedVectorType::get(ScalarTy, E->Scalars.size());
bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty();
@@ -4532,7 +4532,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
}
Module *M = F->getParent();
- Type *Tys[] = { VectorType::get(CI->getType(), E->Scalars.size()) };
+ Type *Tys[] = {FixedVectorType::get(CI->getType(), E->Scalars.size())};
Function *CF = Intrinsic::getDeclaration(M, ID, Tys);
if (!UseIntrinsic) {
@@ -4660,7 +4660,7 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
Builder.SetInsertPoint(&*++BasicBlock::iterator(I));
auto BundleWidth = VectorizableTree[0]->Scalars.size();
auto *MinTy = IntegerType::get(F->getContext(), MinBWs[ScalarRoot].first);
- auto *VecTy = VectorType::get(MinTy, BundleWidth);
+ auto *VecTy = FixedVectorType::get(MinTy, BundleWidth);
auto *Trunc = Builder.CreateTrunc(VectorRoot, VecTy);
VectorizableTree[0]->VectorizedValue = Trunc;
}
@@ -5988,7 +5988,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
// No actual vectorization should happen, if number of parts is the same as
// provided vectorization factor (i.e. the scalar type is used for vector
// code during codegen).
- auto *VecTy = VectorType::get(VL[0]->getType(), VF);
+ auto *VecTy = FixedVectorType::get(VL[0]->getType(), VF);
if (TTI->getNumberOfParts(VecTy) == VF)
continue;
for (unsigned I = NextInst; I < MaxInst; ++I) {
More information about the llvm-commits
mailing list