[llvm] r328639 - [Hexagon] Rudimentary support for auto-vectorization for HVX
Krzysztof Parzyszek via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 27 10:07:52 PDT 2018
Author: kparzysz
Date: Tue Mar 27 10:07:52 2018
New Revision: 328639
URL: http://llvm.org/viewvc/llvm-project?rev=328639&view=rev
Log:
[Hexagon] Rudimentary support for auto-vectorization for HVX
This implements a set of TTI functions that the loop vectorizer uses.
The only purpose of this is to enable testing. Auto-vectorization is
disabled by default, enabled by -hexagon-autohvx.
Modified:
llvm/trunk/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
llvm/trunk/lib/Target/Hexagon/HexagonTargetTransformInfo.h
Modified: llvm/trunk/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp?rev=328639&r1=328638&r2=328639&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp Tue Mar 27 10:07:52 2018
@@ -26,6 +26,9 @@ using namespace llvm;
#define DEBUG_TYPE "hexagontti"
+static cl::opt<bool> HexagonAutoHVX("hexagon-autohvx", cl::init(false),
+ cl::Hidden, cl::desc("Enable loop vectorizer for HVX"));
+
static cl::opt<bool> EmitLookupTables("hexagon-emit-lookup-tables",
cl::init(true), cl::Hidden,
cl::desc("Control lookup table emission on Hexagon target"));
@@ -47,8 +50,41 @@ bool HexagonTTIImpl::shouldFavorPostInc(
return true;
}
-unsigned HexagonTTIImpl::getNumberOfRegisters(bool vector) const {
- return vector ? 0 : 32;
+unsigned HexagonTTIImpl::getNumberOfRegisters(bool Vector) const {
+ if (Vector)
+ return HexagonAutoHVX && getST()->useHVXOps() ? 32 : 0;
+ return 32;
+}
+
+unsigned HexagonTTIImpl::getMaxInterleaveFactor(unsigned VF) {
+ return HexagonAutoHVX && getST()->useHVXOps() ? 64 : 0;
+}
+
+unsigned HexagonTTIImpl::getRegisterBitWidth(bool Vector) const {
+ return Vector ? getMinVectorRegisterBitWidth() : 32;
+}
+
+unsigned HexagonTTIImpl::getMinVectorRegisterBitWidth() const {
+ return getST()->useHVXOps() ? getST()->getVectorLength()*8 : 0;
+}
+
+unsigned HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
+ unsigned Alignment, unsigned AddressSpace, const Instruction *I) {
+ if (Opcode == Instruction::Load && Src->isVectorTy()) {
+ VectorType *VecTy = cast<VectorType>(Src);
+ unsigned VecWidth = VecTy->getBitWidth();
+ if (VecWidth > 64) {
+ // Assume that vectors longer than 64 bits are meant for HVX.
+ if (getNumberOfRegisters(true) > 0) {
+ if (VecWidth % getRegisterBitWidth(true) == 0)
+ return 1;
+ }
+ unsigned AlignWidth = 8 * std::max(1u, Alignment);
+ unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth;
+ return 3*NumLoads;
+ }
+ }
+ return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
}
unsigned HexagonTTIImpl::getPrefetchDistance() const {
@@ -61,21 +97,22 @@ unsigned HexagonTTIImpl::getCacheLineSiz
int HexagonTTIImpl::getUserCost(const User *U,
ArrayRef<const Value *> Operands) {
- auto isCastFoldedIntoLoad = [](const CastInst *CI) -> bool {
+ auto isCastFoldedIntoLoad = [this](const CastInst *CI) -> bool {
if (!CI->isIntegerCast())
return false;
+ // Only extensions from an integer type shorter than 32-bit to i32
+ // can be folded into the load.
+ const DataLayout &DL = getDataLayout();
+ unsigned SBW = DL.getTypeSizeInBits(CI->getSrcTy());
+ unsigned DBW = DL.getTypeSizeInBits(CI->getDestTy());
+ if (DBW != 32 || SBW >= DBW)
+ return false;
+
const LoadInst *LI = dyn_cast<const LoadInst>(CI->getOperand(0));
// Technically, this code could allow multiple uses of the load, and
// check if all the uses are the same extension operation, but this
// should be sufficient for most cases.
- if (!LI || !LI->hasOneUse())
- return false;
-
- // Only extensions from an integer type shorter than 32-bit to i32
- // can be folded into the load.
- unsigned SBW = CI->getSrcTy()->getIntegerBitWidth();
- unsigned DBW = CI->getDestTy()->getIntegerBitWidth();
- return DBW == 32 && (SBW < DBW);
+ return LI && LI->hasOneUse();
};
if (const CastInst *CI = dyn_cast<const CastInst>(U))
Modified: llvm/trunk/lib/Target/Hexagon/HexagonTargetTransformInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonTargetTransformInfo.h?rev=328639&r1=328638&r2=328639&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Hexagon/HexagonTargetTransformInfo.h (original)
+++ llvm/trunk/lib/Target/Hexagon/HexagonTargetTransformInfo.h Tue Mar 27 10:07:52 2018
@@ -70,6 +70,113 @@ public:
/// @{
unsigned getNumberOfRegisters(bool vector) const;
+ unsigned getMaxInterleaveFactor(unsigned VF);
+ unsigned getRegisterBitWidth(bool Vector) const;
+ unsigned getMinVectorRegisterBitWidth() const;
+
+ bool supportsEfficientVectorElementLoadStore() {
+ return false;
+ }
+
+ unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
+ return 0;
+ }
+
+ unsigned getOperandsScalarizationOverhead(ArrayRef<const Value*> Args,
+ unsigned VF) {
+ return 0;
+ }
+
+ unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type*> Tys) {
+ return 1;
+ }
+
+ unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+ ArrayRef<Value*> Args, FastMathFlags FMF, unsigned VF) {
+ return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
+ }
+ unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+ ArrayRef<Type*> Tys, FastMathFlags FMF,
+ unsigned ScalarizationCostPassed = UINT_MAX) {
+ return 1;
+ }
+
+ bool hasBranchDivergence() {
+ return false;
+ }
+
+ bool enableAggressiveInterleaving(bool LoopHasReductions) {
+ return false;
+ }
+
+ unsigned getCFInstrCost(unsigned Opcode) {
+ return 1;
+ }
+
+ unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *,
+ const SCEV *) {
+ return 0;
+ }
+
+ unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace, const Instruction *I = nullptr);
+
+ unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace) {
+ return 1;
+ }
+
+ unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
+ Type *SubTp) {
+ return 1;
+ }
+
+ unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
+ bool VariableMask,
+ unsigned Alignment) {
+ return 1;
+ }
+
+ unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
+ unsigned Factor,
+ ArrayRef<unsigned> Indices,
+ unsigned Alignment,
+ unsigned AddressSpace) {
+ return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
+ Alignment, AddressSpace);
+ }
+
+ unsigned getNumberOfParts(Type *Tp) {
+ return BaseT::getNumberOfParts(Tp);
+ }
+
+ bool prefersVectorizedAddressing() {
+ return true;
+ }
+
+ unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ const Instruction *I) {
+ return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
+ }
+
+ unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+ TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
+ TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
+ TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
+ TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
+ ArrayRef<const Value *> Args = ArrayRef<const Value *>()) {
+ return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
+ Opd1PropInfo, Opd2PropInfo, Args);
+ }
+
+ unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ const Instruction *I = nullptr) {
+ return 1;
+ }
+
+ unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
+ return 1;
+ }
/// @}
@@ -80,5 +187,4 @@ public:
};
} // end namespace llvm
-
#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONTARGETTRANSFORMINFO_H
More information about the llvm-commits
mailing list