[llvm] r330065 - [Hexagon] Initial instruction cost model for auto-vectorization
Krzysztof Parzyszek via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 13 13:46:50 PDT 2018
Author: kparzysz
Date: Fri Apr 13 13:46:50 2018
New Revision: 330065
URL: http://llvm.org/viewvc/llvm-project?rev=330065&view=rev
Log:
[Hexagon] Initial instruction cost model for auto-vectorization
Modified:
llvm/trunk/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
llvm/trunk/lib/Target/Hexagon/HexagonTargetTransformInfo.h
Modified: llvm/trunk/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp?rev=330065&r1=330064&r2=330065&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp Fri Apr 13 13:46:50 2018
@@ -16,6 +16,7 @@
#include "HexagonTargetTransformInfo.h"
#include "HexagonSubtarget.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/User.h"
@@ -27,16 +28,35 @@ using namespace llvm;
#define DEBUG_TYPE "hexagontti"
-static cl::opt<bool> HexagonAutoHVX("hexagon-autohvx", cl::init(false),
+static cl::opt<bool> HexagonAutoHVX("hexagon-autohvx", cl::init(true),
cl::Hidden, cl::desc("Enable loop vectorizer for HVX"));
static cl::opt<bool> EmitLookupTables("hexagon-emit-lookup-tables",
cl::init(true), cl::Hidden,
cl::desc("Control lookup table emission on Hexagon target"));
+
+bool HexagonTTIImpl::useHVX() const {
+ return ST.useHVXOps() && HexagonAutoHVX;
+}
+
+bool HexagonTTIImpl::isTypeForHVX(Type *VecTy) const {
+ assert(VecTy->isVectorTy());
+ // Avoid types like <2 x i32*>.
+ if (!cast<VectorType>(VecTy)->getElementType()->isIntegerTy())
+ return false;
+ EVT VecVT = EVT::getEVT(VecTy);
+ if (!VecVT.isSimple() || VecVT.getSizeInBits() <= 64)
+ return false;
+ if (ST.isHVXVectorType(VecVT.getSimpleVT()))
+ return true;
+ auto Action = TLI.getPreferredVectorAction(VecVT);
+ return Action == TargetLoweringBase::TypeWidenVector;
+}
+
TargetTransformInfo::PopcntSupportKind
HexagonTTIImpl::getPopcntSupport(unsigned IntTyWidthInBit) const {
- // Return Fast Hardware support as every input < 64 bits will be promoted
+ // Return fast hardware support as every input < 64 bits will be promoted
// to 64 bits.
return TargetTransformInfo::PSK_FastHardware;
}
@@ -58,14 +78,16 @@ bool HexagonTTIImpl::shouldFavorPostInc(
return true;
}
+/// --- Vector TTI begin ---
+
unsigned HexagonTTIImpl::getNumberOfRegisters(bool Vector) const {
if (Vector)
- return HexagonAutoHVX && getST()->useHVXOps() ? 32 : 0;
+ return useHVX() ? 32 : 0;
return 32;
}
unsigned HexagonTTIImpl::getMaxInterleaveFactor(unsigned VF) {
- return HexagonAutoHVX && getST()->useHVXOps() ? 64 : 0;
+ return useHVX() ? 2 : 0;
}
unsigned HexagonTTIImpl::getRegisterBitWidth(bool Vector) const {
@@ -73,38 +95,161 @@ unsigned HexagonTTIImpl::getRegisterBitW
}
unsigned HexagonTTIImpl::getMinVectorRegisterBitWidth() const {
- return getST()->useHVXOps() ? getST()->getVectorLength()*8 : 0;
+ return useHVX() ? ST.getVectorLength()*8 : 0;
}
unsigned HexagonTTIImpl::getMinimumVF(unsigned ElemWidth) const {
- return (8 * getST()->getVectorLength()) / ElemWidth;
+ return (8 * ST.getVectorLength()) / ElemWidth;
+}
+
+unsigned HexagonTTIImpl::getScalarizationOverhead(Type *Ty, bool Insert,
+ bool Extract) {
+ return BaseT::getScalarizationOverhead(Ty, Insert, Extract);
+}
+
+unsigned HexagonTTIImpl::getOperandsScalarizationOverhead(
+ ArrayRef<const Value*> Args, unsigned VF) {
+ return BaseT::getOperandsScalarizationOverhead(Args, VF);
+}
+
+unsigned HexagonTTIImpl::getCallInstrCost(Function *F, Type *RetTy,
+ ArrayRef<Type*> Tys) {
+ return BaseT::getCallInstrCost(F, RetTy, Tys);
+}
+
+unsigned HexagonTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+ ArrayRef<Value*> Args, FastMathFlags FMF, unsigned VF) {
+ return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
+}
+
+unsigned HexagonTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+ ArrayRef<Type*> Tys, FastMathFlags FMF,
+ unsigned ScalarizationCostPassed) {
+ if (ID == Intrinsic::bswap) {
+ std::pair<int, MVT> LT = TLI.getTypeLegalizationCost(DL, RetTy);
+ return LT.first + 2;
+ }
+ return BaseT::getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
+ ScalarizationCostPassed);
+}
+
+unsigned HexagonTTIImpl::getAddressComputationCost(Type *Tp,
+ ScalarEvolution *SE, const SCEV *S) {
+ return 0;
}
unsigned HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
unsigned Alignment, unsigned AddressSpace, const Instruction *I) {
- if (Opcode == Instruction::Load && Src->isVectorTy()) {
+ assert(Opcode == Instruction::Load || Opcode == Instruction::Store);
+ if (Opcode == Instruction::Store)
+ return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
+
+ if (Src->isVectorTy()) {
VectorType *VecTy = cast<VectorType>(Src);
unsigned VecWidth = VecTy->getBitWidth();
- if (VecWidth > 64) {
- // Assume that vectors longer than 64 bits are meant for HVX.
- if (getNumberOfRegisters(true) > 0) {
- if (VecWidth % getRegisterBitWidth(true) == 0)
- return 1;
- }
+ if (useHVX() && isTypeForHVX(VecTy)) {
+ unsigned RegWidth = getRegisterBitWidth(true);
+ Alignment = std::min(Alignment, RegWidth/8);
+ // Cost of HVX loads.
+ if (VecWidth % RegWidth == 0)
+ return VecWidth / RegWidth;
+ // Cost of constructing HVX vector from scalar loads.
unsigned AlignWidth = 8 * std::max(1u, Alignment);
unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth;
return 3*NumLoads;
}
+
+ // Non-HVX vectors.
+ // Add extra cost for floating point types.
+ unsigned Cost = VecTy->getElementType()->isFloatingPointTy() ? 4 : 1;
+
+ Alignment = std::min(Alignment, 8u);
+ unsigned AlignWidth = 8 * std::max(1u, Alignment);
+ unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth;
+ if (Alignment == 4 || Alignment == 8)
+ return Cost * NumLoads;
+ // Loads of less than 32 bits will need extra inserts to compose a vector.
+ unsigned LogA = Log2_32(Alignment);
+ return (3 - LogA) * Cost * NumLoads;
}
+
return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
}
+unsigned HexagonTTIImpl::getMaskedMemoryOpCost(unsigned Opcode,
+ Type *Src, unsigned Alignment, unsigned AddressSpace) {
+ return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
+}
+
+unsigned HexagonTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp,
+ int Index, Type *SubTp) {
+ return 1;
+}
+
+unsigned HexagonTTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
+ Value *Ptr, bool VariableMask, unsigned Alignment) {
+ return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
+ Alignment);
+}
+
+unsigned HexagonTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode,
+ Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
+ unsigned Alignment, unsigned AddressSpace) {
+ return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
+ Alignment, AddressSpace);
+}
+
+unsigned HexagonTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+ Type *CondTy, const Instruction *I) {
+ if (ValTy->isVectorTy()) {
+ auto *VecTy = dyn_cast<VectorType>(ValTy);
+ std::pair<int, MVT> LT = TLI.getTypeLegalizationCost(DL, ValTy);
+ if (Opcode == Instruction::FCmp)
+ return LT.first + 4 * VecTy->getNumElements();
+ }
+ return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
+}
+
+unsigned HexagonTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+ TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info,
+ TTI::OperandValueProperties Opd1PropInfo,
+ TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value*> Args) {
+ return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
+ Opd1PropInfo, Opd2PropInfo, Args);
+}
+
+unsigned HexagonTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
+ Type *Src, const Instruction *I) {
+ return 1;
+}
+
+unsigned HexagonTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
+ unsigned Index) {
+ Type *ElemTy = Val->isVectorTy() ? cast<VectorType>(Val)->getElementType()
+ : Val;
+ if (Opcode == Instruction::InsertElement) {
+ // Need two rotations for non-zero index.
+ unsigned Cost = (Index != 0) ? 2 : 0;
+ if (ElemTy->isIntegerTy(32))
+ return Cost;
+ // If it's not a 32-bit value, there will need to be an extract.
+ return Cost + getVectorInstrCost(Instruction::ExtractElement, Val, Index);
+ }
+
+ if (Opcode == Instruction::ExtractElement)
+ return 2;
+
+ return 1;
+}
+
+/// --- Vector TTI end ---
+
unsigned HexagonTTIImpl::getPrefetchDistance() const {
- return getST()->getL1PrefetchDistance();
+ return ST.getL1PrefetchDistance();
}
unsigned HexagonTTIImpl::getCacheLineSize() const {
- return getST()->getL1CacheLineSize();
+ return ST.getL1CacheLineSize();
}
int HexagonTTIImpl::getUserCost(const User *U,
Modified: llvm/trunk/lib/Target/Hexagon/HexagonTargetTransformInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonTargetTransformInfo.h?rev=330065&r1=330064&r2=330065&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Hexagon/HexagonTargetTransformInfo.h (original)
+++ llvm/trunk/lib/Target/Hexagon/HexagonTargetTransformInfo.h Fri Apr 13 13:46:50 2018
@@ -37,16 +37,19 @@ class HexagonTTIImpl : public BasicTTIIm
friend BaseT;
- const HexagonSubtarget *ST;
- const HexagonTargetLowering *TLI;
+ const HexagonSubtarget &ST;
+ const HexagonTargetLowering &TLI;
- const HexagonSubtarget *getST() const { return ST; }
- const HexagonTargetLowering *getTLI() const { return TLI; }
+ const HexagonSubtarget *getST() const { return &ST; }
+ const HexagonTargetLowering *getTLI() const { return &TLI; }
+
+ bool useHVX() const;
+ bool isTypeForHVX(Type *VecTy) const;
public:
explicit HexagonTTIImpl(const HexagonTargetMachine *TM, const Function &F)
- : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
- TLI(ST->getTargetLowering()) {}
+ : BaseT(TM, F.getParent()->getDataLayout()),
+ ST(*TM->getSubtargetImpl(F)), TLI(*ST.getTargetLowering()) {}
/// \name Scalar TTI Implementations
/// @{
@@ -73,110 +76,59 @@ public:
unsigned getMaxInterleaveFactor(unsigned VF);
unsigned getRegisterBitWidth(bool Vector) const;
unsigned getMinVectorRegisterBitWidth() const;
- bool shouldMaximizeVectorBandwidth(bool OptSize) const { return true; }
unsigned getMinimumVF(unsigned ElemWidth) const;
+ bool shouldMaximizeVectorBandwidth(bool OptSize) const {
+ return true;
+ }
bool supportsEfficientVectorElementLoadStore() {
return false;
}
-
- unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
- return 0;
- }
-
- unsigned getOperandsScalarizationOverhead(ArrayRef<const Value*> Args,
- unsigned VF) {
- return 0;
- }
-
- unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type*> Tys) {
- return 1;
- }
-
- unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
- ArrayRef<Value*> Args, FastMathFlags FMF, unsigned VF) {
- return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
- }
- unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
- ArrayRef<Type*> Tys, FastMathFlags FMF,
- unsigned ScalarizationCostPassed = UINT_MAX) {
- return 1;
- }
-
bool hasBranchDivergence() {
return false;
}
-
bool enableAggressiveInterleaving(bool LoopHasReductions) {
return false;
}
-
- unsigned getCFInstrCost(unsigned Opcode) {
- return 1;
- }
-
- unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *,
- const SCEV *) {
- return 0;
+ bool prefersVectorizedAddressing() {
+ return false;
}
+ unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract);
+ unsigned getOperandsScalarizationOverhead(ArrayRef<const Value*> Args,
+ unsigned VF);
+ unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type*> Tys);
+ unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+ ArrayRef<Value*> Args, FastMathFlags FMF, unsigned VF);
+ unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+ ArrayRef<Type*> Tys, FastMathFlags FMF,
+ unsigned ScalarizationCostPassed = UINT_MAX);
+ unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *SE,
+ const SCEV *S);
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace, const Instruction *I = nullptr);
-
unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace) {
- return 1;
- }
-
+ unsigned AddressSpace);
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
- Type *SubTp) {
- return 1;
- }
-
+ Type *SubTp);
unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
- bool VariableMask,
- unsigned Alignment) {
- return 1;
- }
-
+ bool VariableMask, unsigned Alignment);
unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
- unsigned Factor,
- ArrayRef<unsigned> Indices,
- unsigned Alignment,
- unsigned AddressSpace) {
- return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace);
- }
-
- unsigned getNumberOfParts(Type *Tp) {
- return BaseT::getNumberOfParts(Tp);
- }
-
- bool prefersVectorizedAddressing() {
- return true;
- }
-
+ unsigned Factor, ArrayRef<unsigned> Indices, unsigned Alignment,
+ unsigned AddressSpace);
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
- const Instruction *I) {
- return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
- }
-
+ const Instruction *I);
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
- ArrayRef<const Value *> Args = ArrayRef<const Value *>()) {
- return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
- Opd1PropInfo, Opd2PropInfo, Args);
- }
-
+ ArrayRef<const Value *> Args = ArrayRef<const Value *>());
unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
- const Instruction *I = nullptr) {
- return 1;
- }
+ const Instruction *I = nullptr);
+ unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
- unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
+ unsigned getCFInstrCost(unsigned Opcode) {
return 1;
}
More information about the llvm-commits
mailing list