[llvm] ab35ba5 - [SVE] Remove calls to VectorType::getNumElements from AArch64
Christopher Tetreault via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 30 11:18:05 PDT 2020
Author: Christopher Tetreault
Date: 2020-06-30T11:17:50-07:00
New Revision: ab35ba5742216b3aea8421a66b480405377bf8f1
URL: https://github.com/llvm/llvm-project/commit/ab35ba5742216b3aea8421a66b480405377bf8f1
DIFF: https://github.com/llvm/llvm-project/commit/ab35ba5742216b3aea8421a66b480405377bf8f1.diff
LOG: [SVE] Remove calls to VectorType::getNumElements from AArch64
Reviewers: efriedma, paquette, david-arm, kmclaughlin
Reviewed By: david-arm
Subscribers: tschuett, kristof.beyls, hiraditya, rkruppe, psnobl, danielkiss, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D82214
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64StackTagging.cpp
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 2a632afd3431..0656a8e84ed9 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -9486,8 +9486,8 @@ static bool areExtractShuffleVectors(Value *Op1, Value *Op2) {
};
auto extractHalf = [](Value *FullV, Value *HalfV) {
- auto *FullVT = cast<VectorType>(FullV->getType());
- auto *HalfVT = cast<VectorType>(HalfV->getType());
+ auto *FullVT = cast<FixedVectorType>(FullV->getType());
+ auto *HalfVT = cast<FixedVectorType>(HalfV->getType());
return FullVT->getNumElements() == 2 * HalfVT->getNumElements();
};
@@ -9507,7 +9507,7 @@ static bool areExtractShuffleVectors(Value *Op1, Value *Op2) {
// elements.
int M1Start = -1;
int M2Start = -1;
- int NumElements = cast<VectorType>(Op1->getType())->getNumElements() * 2;
+ int NumElements = cast<FixedVectorType>(Op1->getType())->getNumElements() * 2;
if (!ShuffleVectorInst::isExtractSubvectorMask(M1, NumElements, M1Start) ||
!ShuffleVectorInst::isExtractSubvectorMask(M2, NumElements, M2Start) ||
M1Start != M2Start || (M1Start != 0 && M2Start != (NumElements / 2)))
@@ -9639,7 +9639,7 @@ bool AArch64TargetLowering::isLegalInterleavedAccessType(
unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
// Ensure the number of vector elements is greater than 1.
- if (VecTy->getNumElements() < 2)
+ if (cast<FixedVectorType>(VecTy)->getNumElements() < 2)
return false;
// Ensure the element type is legal.
@@ -9673,22 +9673,24 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
const DataLayout &DL = LI->getModule()->getDataLayout();
- VectorType *VecTy = Shuffles[0]->getType();
+ VectorType *VTy = Shuffles[0]->getType();
// Skip if we do not have NEON and skip illegal vector types. We can
// "legalize" wide vector types into multiple interleaved accesses as long as
// the vector types are divisible by 128.
- if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(VecTy, DL))
+ if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(VTy, DL))
return false;
- unsigned NumLoads = getNumInterleavedAccesses(VecTy, DL);
+ unsigned NumLoads = getNumInterleavedAccesses(VTy, DL);
+
+ auto *FVTy = cast<FixedVectorType>(VTy);
// A pointer vector can not be the return type of the ldN intrinsics. Need to
// load integer vectors first and then convert to pointer vectors.
- Type *EltTy = VecTy->getElementType();
+ Type *EltTy = FVTy->getElementType();
if (EltTy->isPointerTy())
- VecTy =
- FixedVectorType::get(DL.getIntPtrType(EltTy), VecTy->getNumElements());
+ FVTy =
+ FixedVectorType::get(DL.getIntPtrType(EltTy), FVTy->getNumElements());
IRBuilder<> Builder(LI);
@@ -9698,19 +9700,19 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
if (NumLoads > 1) {
// If we're going to generate more than one load, reset the sub-vector type
// to something legal.
- VecTy = FixedVectorType::get(VecTy->getElementType(),
- VecTy->getNumElements() / NumLoads);
+ FVTy = FixedVectorType::get(FVTy->getElementType(),
+ FVTy->getNumElements() / NumLoads);
// We will compute the pointer operand of each load from the original base
// address using GEPs. Cast the base address to a pointer to the scalar
// element type.
BaseAddr = Builder.CreateBitCast(
BaseAddr,
- VecTy->getElementType()->getPointerTo(LI->getPointerAddressSpace()));
+ FVTy->getElementType()->getPointerTo(LI->getPointerAddressSpace()));
}
- Type *PtrTy = VecTy->getPointerTo(LI->getPointerAddressSpace());
- Type *Tys[2] = {VecTy, PtrTy};
+ Type *PtrTy = FVTy->getPointerTo(LI->getPointerAddressSpace());
+ Type *Tys[2] = {FVTy, PtrTy};
static const Intrinsic::ID LoadInts[3] = {Intrinsic::aarch64_neon_ld2,
Intrinsic::aarch64_neon_ld3,
Intrinsic::aarch64_neon_ld4};
@@ -9727,8 +9729,8 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
// If we're generating more than one load, compute the base address of
// subsequent loads as an offset from the previous.
if (LoadCount > 0)
- BaseAddr = Builder.CreateConstGEP1_32(VecTy->getElementType(), BaseAddr,
- VecTy->getNumElements() * Factor);
+ BaseAddr = Builder.CreateConstGEP1_32(FVTy->getElementType(), BaseAddr,
+ FVTy->getNumElements() * Factor);
CallInst *LdN = Builder.CreateCall(
LdNFunc, Builder.CreateBitCast(BaseAddr, PtrTy), "ldN");
@@ -9744,7 +9746,7 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
if (EltTy->isPointerTy())
SubVec = Builder.CreateIntToPtr(
SubVec, FixedVectorType::get(SVI->getType()->getElementType(),
- VecTy->getNumElements()));
+ FVTy->getNumElements()));
SubVecs[SVI].push_back(SubVec);
}
}
@@ -9795,7 +9797,7 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
"Invalid interleave factor");
- VectorType *VecTy = SVI->getType();
+ auto *VecTy = cast<FixedVectorType>(SVI->getType());
assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store");
unsigned LaneLen = VecTy->getNumElements() / Factor;
@@ -9820,7 +9822,8 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
// vectors to integer vectors.
if (EltTy->isPointerTy()) {
Type *IntTy = DL.getIntPtrType(EltTy);
- unsigned NumOpElts = cast<VectorType>(Op0->getType())->getNumElements();
+ unsigned NumOpElts =
+ cast<FixedVectorType>(Op0->getType())->getNumElements();
// Convert to the corresponding integer vector.
auto *IntVecTy = FixedVectorType::get(IntTy, NumOpElts);
diff --git a/llvm/lib/Target/AArch64/AArch64StackTagging.cpp b/llvm/lib/Target/AArch64/AArch64StackTagging.cpp
index 81f669a7cb22..61f27cbc3b29 100644
--- a/llvm/lib/Target/AArch64/AArch64StackTagging.cpp
+++ b/llvm/lib/Target/AArch64/AArch64StackTagging.cpp
@@ -265,8 +265,9 @@ class InitializerBuilder {
Type *EltTy = VecTy->getElementType();
if (EltTy->isPointerTy()) {
uint32_t EltSize = DL->getTypeSizeInBits(EltTy);
- auto *NewTy = FixedVectorType::get(IntegerType::get(Ctx, EltSize),
- VecTy->getNumElements());
+ auto *NewTy = FixedVectorType::get(
+ IntegerType::get(Ctx, EltSize),
+ cast<FixedVectorType>(VecTy)->getNumElements());
V = IRB.CreatePointerCast(V, NewTy);
}
}
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 41aca3049e8c..0fa2748ebe7c 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -212,7 +212,7 @@ bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
// elements in type Ty determine the vector width.
auto toVectorTy = [&](Type *ArgTy) {
return FixedVectorType::get(ArgTy->getScalarType(),
- cast<VectorType>(DstTy)->getNumElements());
+ cast<FixedVectorType>(DstTy)->getNumElements());
};
// Exit early if DstTy is not a vector type whose elements are at least
@@ -724,8 +724,8 @@ int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
// have to promote the elements to v.2.
ProfitableNumElements = 8;
- if (cast<VectorType>(Ty)->getNumElements() < ProfitableNumElements) {
- unsigned NumVecElts = cast<VectorType>(Ty)->getNumElements();
+ if (cast<FixedVectorType>(Ty)->getNumElements() < ProfitableNumElements) {
+ unsigned NumVecElts = cast<FixedVectorType>(Ty)->getNumElements();
unsigned NumVectorizableInstsToAmortize = NumVecElts * 2;
// We generate 2 instructions per vector element.
return NumVectorizableInstsToAmortize * NumVecElts * 2;
@@ -740,7 +740,7 @@ int AArch64TTIImpl::getInterleavedMemoryOpCost(
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
bool UseMaskForCond, bool UseMaskForGaps) {
assert(Factor >= 2 && "Invalid interleave factor");
- auto *VecVTy = cast<VectorType>(VecTy);
+ auto *VecVTy = cast<FixedVectorType>(VecTy);
if (!UseMaskForCond && !UseMaskForGaps &&
Factor <= TLI->getMaxSupportedInterleaveFactor()) {
@@ -767,7 +767,8 @@ int AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) {
for (auto *I : Tys) {
if (!I->isVectorTy())
continue;
- if (I->getScalarSizeInBits() * cast<VectorType>(I)->getNumElements() == 128)
+ if (I->getScalarSizeInBits() * cast<FixedVectorType>(I)->getNumElements() ==
+ 128)
Cost += getMemoryOpCost(Instruction::Store, I, Align(128), 0, CostKind) +
getMemoryOpCost(Instruction::Load, I, Align(128), 0, CostKind);
}
@@ -970,9 +971,10 @@ bool AArch64TTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty,
case Instruction::Mul:
return false;
case Instruction::Add:
- return ScalarBits * VTy->getNumElements() >= 128;
+ return ScalarBits * cast<FixedVectorType>(VTy)->getNumElements() >= 128;
case Instruction::ICmp:
- return (ScalarBits < 64) && (ScalarBits * VTy->getNumElements() >= 128);
+ return (ScalarBits < 64) &&
+ (ScalarBits * cast<FixedVectorType>(VTy)->getNumElements() >= 128);
case Instruction::FCmp:
return Flags.NoNaN;
default:
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index ecd9d8194492..55d4c01ee1e3 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -189,7 +189,8 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
// the element type fits into a register and the number of elements is a
// power of 2 > 1.
if (auto *DataTypeVTy = dyn_cast<VectorType>(DataType)) {
- unsigned NumElements = DataTypeVTy->getNumElements();
+ unsigned NumElements =
+ cast<FixedVectorType>(DataTypeVTy)->getNumElements();
unsigned EltSize = DataTypeVTy->getElementType()->getScalarSizeInBits();
return NumElements > 1 && isPowerOf2_64(NumElements) && EltSize >= 8 &&
EltSize <= 128 && isPowerOf2_64(EltSize);
More information about the llvm-commits
mailing list