[llvm] 5a99ec1 - [SVE] Eliminate calls to default-false VectorType::get() from X86
Christopher Tetreault via llvm-commits
llvm-commits at lists.llvm.org
Fri May 29 16:16:24 PDT 2020
Author: Christopher Tetreault
Date: 2020-05-29T16:16:07-07:00
New Revision: 5a99ec10f5df7fa351e81b9bc90bf38e670653ae
URL: https://github.com/llvm/llvm-project/commit/5a99ec10f5df7fa351e81b9bc90bf38e670653ae
DIFF: https://github.com/llvm/llvm-project/commit/5a99ec10f5df7fa351e81b9bc90bf38e670653ae.diff
LOG: [SVE] Eliminate calls to default-false VectorType::get() from X86
Reviewers: efriedma, sdesmalen, c-rhodes, craig.topper
Reviewed By: craig.topper
Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D80331
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/lib/Target/X86/X86InstrInfo.cpp
llvm/lib/Target/X86/X86InterleavedAccess.cpp
llvm/lib/Target/X86/X86PartialReduction.cpp
llvm/lib/Target/X86/X86TargetTransformInfo.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 3d2cdccd50a5..0b114b34186d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -28632,7 +28632,7 @@ static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget &Subtarget,
DAG.getExternalSymbol(LibcallName, TLI.getPointerTy(DAG.getDataLayout()));
Type *RetTy = isF64 ? (Type *)StructType::get(ArgTy, ArgTy)
- : (Type *)VectorType::get(ArgTy, 4);
+ : (Type *)FixedVectorType::get(ArgTy, 4);
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl)
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 2632cb8a745a..c8939e348a70 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -5999,14 +5999,18 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
else if (Opc == X86::FsFLD0F128 || Opc == X86::AVX512_FsFLD0F128)
Ty = Type::getFP128Ty(MF.getFunction().getContext());
else if (Opc == X86::AVX512_512_SET0 || Opc == X86::AVX512_512_SETALLONES)
- Ty = VectorType::get(Type::getInt32Ty(MF.getFunction().getContext()),16);
+ Ty = FixedVectorType::get(Type::getInt32Ty(MF.getFunction().getContext()),
+ 16);
else if (Opc == X86::AVX2_SETALLONES || Opc == X86::AVX_SET0 ||
Opc == X86::AVX512_256_SET0 || Opc == X86::AVX1_SETALLONES)
- Ty = VectorType::get(Type::getInt32Ty(MF.getFunction().getContext()), 8);
+ Ty = FixedVectorType::get(Type::getInt32Ty(MF.getFunction().getContext()),
+ 8);
else if (Opc == X86::MMX_SET0)
- Ty = VectorType::get(Type::getInt32Ty(MF.getFunction().getContext()), 2);
+ Ty = FixedVectorType::get(Type::getInt32Ty(MF.getFunction().getContext()),
+ 2);
else
- Ty = VectorType::get(Type::getInt32Ty(MF.getFunction().getContext()), 4);
+ Ty = FixedVectorType::get(Type::getInt32Ty(MF.getFunction().getContext()),
+ 4);
bool IsAllOnes = (Opc == X86::V_SETALLONES || Opc == X86::AVX2_SETALLONES ||
Opc == X86::AVX512_512_SETALLONES ||
diff --git a/llvm/lib/Target/X86/X86InterleavedAccess.cpp b/llvm/lib/Target/X86/X86InterleavedAccess.cpp
index 72a37a9ddeb9..00ac238f284b 100644
--- a/llvm/lib/Target/X86/X86InterleavedAccess.cpp
+++ b/llvm/lib/Target/X86/X86InterleavedAccess.cpp
@@ -201,7 +201,7 @@ void X86InterleavedAccessGroup::decompose(
// [0,1...,VF/2-1,VF/2+VF,VF/2+VF+1,...,2VF-1]
unsigned VecLength = DL.getTypeSizeInBits(VecWidth);
if (VecLength == 768 || VecLength == 1536) {
- VecBaseTy = VectorType::get(Type::getInt8Ty(LI->getContext()), 16);
+ VecBaseTy = FixedVectorType::get(Type::getInt8Ty(LI->getContext()), 16);
VecBasePtrTy = VecBaseTy->getPointerTo(LI->getPointerAddressSpace());
VecBasePtr = Builder.CreateBitCast(LI->getPointerOperand(), VecBasePtrTy);
NumLoads = NumSubVectors * (VecLength / 384);
@@ -768,7 +768,8 @@ bool X86InterleavedAccessGroup::lowerIntoOptimizedSequence() {
// Lower the interleaved stores:
// 1. Decompose the interleaved wide shuffle into individual shuffle
// vectors.
- decompose(Shuffles[0], Factor, VectorType::get(ShuffleEltTy, NumSubVecElems),
+ decompose(Shuffles[0], Factor,
+ FixedVectorType::get(ShuffleEltTy, NumSubVecElems),
DecomposedVectors);
// 2. Transpose the interleaved-vectors into vectors of contiguous
diff --git a/llvm/lib/Target/X86/X86PartialReduction.cpp b/llvm/lib/Target/X86/X86PartialReduction.cpp
index 4b3ba2044409..16108bd1928f 100644
--- a/llvm/lib/Target/X86/X86PartialReduction.cpp
+++ b/llvm/lib/Target/X86/X86PartialReduction.cpp
@@ -372,7 +372,8 @@ bool X86PartialReduction::trySADReplacement(Value *Op, BinaryOperator *Add) {
}
// Intrinsics produce vXi64 and need to be casted to vXi32.
- Type *I32Ty = VectorType::get(Builder.getInt32Ty(), IntrinsicNumElts / 4);
+ auto *I32Ty =
+ FixedVectorType::get(Builder.getInt32Ty(), IntrinsicNumElts / 4);
assert(NumElts % IntrinsicNumElts == 0 && "Unexpected number of elements!");
unsigned NumSplits = NumElts / IntrinsicNumElts;
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 6bfcadeaf8b6..5199bfc829ef 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -3164,8 +3164,8 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
if (LT.first != 1 && MTy.isVector() &&
MTy.getVectorNumElements() < ValVTy->getNumElements()) {
// Type needs to be split. We need LT.first - 1 arithmetic ops.
- VectorType *SingleOpTy =
- VectorType::get(ValVTy->getElementType(), MTy.getVectorNumElements());
+ auto *SingleOpTy = FixedVectorType::get(ValVTy->getElementType(),
+ MTy.getVectorNumElements());
ArithmeticCost = getArithmeticInstrCost(Opcode, SingleOpTy, CostKind);
ArithmeticCost *= LT.first - 1;
}
@@ -3234,8 +3234,8 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
if (LT.first != 1 && MTy.isVector() &&
MTy.getVectorNumElements() < ValVTy->getNumElements()) {
// Type needs to be split. We need LT.first - 1 arithmetic ops.
- Type *SingleOpTy =
- VectorType::get(ValVTy->getElementType(), MTy.getVectorNumElements());
+ auto *SingleOpTy = FixedVectorType::get(ValVTy->getElementType(),
+ MTy.getVectorNumElements());
ArithmeticCost = getArithmeticInstrCost(Opcode, SingleOpTy, CostKind);
ArithmeticCost *= LT.first - 1;
}
@@ -3310,7 +3310,7 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
getShuffleCost(TTI::SK_PermuteSingleSrc, ShufTy, 0, nullptr);
} else {
// Reducing from smaller size is a shift by immediate.
- auto *ShiftTy = VectorType::get(
+ auto *ShiftTy = FixedVectorType::get(
Type::getIntNTy(ValVTy->getContext(), Size), 128 / Size);
ReductionCost += getArithmeticInstrCost(
Instruction::LShr, ShiftTy, CostKind,
@@ -3617,8 +3617,8 @@ int X86TTIImpl::getMinMaxReductionCost(VectorType *ValTy, VectorType *CondTy,
}
// Add the arithmetic op for this level.
- auto *SubCondTy = VectorType::get(CondTy->getElementType(),
- Ty->getNumElements());
+ auto *SubCondTy =
+ FixedVectorType::get(CondTy->getElementType(), Ty->getNumElements());
MinMaxCost += getMinMaxCost(Ty, SubCondTy, IsUnsigned);
}
@@ -3866,14 +3866,15 @@ int X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy, Value *Ptr,
? getIndexSizeInBits(Ptr, DL)
: DL.getPointerSizeInBits();
- Type *IndexVTy = VectorType::get(IntegerType::get(SrcVTy->getContext(),
- IndexSize), VF);
+ auto *IndexVTy = FixedVectorType::get(
+ IntegerType::get(SrcVTy->getContext(), IndexSize), VF);
std::pair<int, MVT> IdxsLT = TLI->getTypeLegalizationCost(DL, IndexVTy);
std::pair<int, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, SrcVTy);
int SplitFactor = std::max(IdxsLT.first, SrcLT.first);
if (SplitFactor > 1) {
// Handle splitting of vector of pointers
- Type *SplitSrcTy = VectorType::get(SrcVTy->getScalarType(), VF / SplitFactor);
+ auto *SplitSrcTy =
+ FixedVectorType::get(SrcVTy->getScalarType(), VF / SplitFactor);
return SplitFactor * getGSVectorCost(Opcode, SplitSrcTy, Ptr, Alignment,
AddressSpace);
}
@@ -4265,14 +4266,14 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy,
unsigned NumOfMemOps = (VecTySize + LegalVTSize - 1) / LegalVTSize;
// Get the cost of one memory operation.
- Type *SingleMemOpTy =
- VectorType::get(cast<VectorType>(VecTy)->getElementType(),
- LegalVT.getVectorNumElements());
+ auto *SingleMemOpTy =
+ FixedVectorType::get(cast<VectorType>(VecTy)->getElementType(),
+ LegalVT.getVectorNumElements());
unsigned MemOpCost = getMemoryOpCost(Opcode, SingleMemOpTy,
MaybeAlign(Alignment), AddressSpace,
CostKind);
- VectorType *VT = VectorType::get(ScalarTy, VF);
+ auto *VT = FixedVectorType::get(ScalarTy, VF);
EVT ETy = TLI->getValueType(DL, VT);
if (!ETy.isSimple())
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
@@ -4408,9 +4409,9 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy,
unsigned NumOfLoadsInInterleaveGrp =
Indices.size() ? Indices.size() : Factor;
- Type *ResultTy =
- VectorType::get(cast<VectorType>(VecTy)->getElementType(),
- cast<VectorType>(VecTy)->getNumElements() / Factor);
+ auto *ResultTy = FixedVectorType::get(
+ cast<VectorType>(VecTy)->getElementType(),
+ cast<VectorType>(VecTy)->getNumElements() / Factor);
unsigned NumOfResults =
getTLI()->getTypeLegalizationCost(DL, ResultTy).first *
NumOfLoadsInInterleaveGrp;
More information about the llvm-commits
mailing list