[llvm] r334508 - [Hexagon] Make floating point operations expensive for vectorization
Krzysztof Parzyszek via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 12 08:12:50 PDT 2018
Author: kparzysz
Date: Tue Jun 12 08:12:50 2018
New Revision: 334508
URL: http://llvm.org/viewvc/llvm-project?rev=334508&view=rev
Log:
[Hexagon] Make floating point operations expensive for vectorization
Added:
llvm/trunk/test/CodeGen/Hexagon/autohvx/float-cost.ll
Modified:
llvm/trunk/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
llvm/trunk/lib/Target/Hexagon/HexagonTargetTransformInfo.h
Modified: llvm/trunk/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp?rev=334508&r1=334507&r2=334508&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp Tue Jun 12 08:12:50 2018
@@ -35,6 +35,10 @@ static cl::opt<bool> EmitLookupTables("h
cl::init(true), cl::Hidden,
cl::desc("Control lookup table emission on Hexagon target"));
+// Constant "cost factor" to make floating point operations more expensive
+// in terms of vectorization cost. This isn't the best way, but it should
+// do. Ultimately, the cost should use cycles.
+static const unsigned FloatFactor = 4;
bool HexagonTTIImpl::useHVX() const {
return ST.useHVXOps() && HexagonAutoHVX;
@@ -54,6 +58,14 @@ bool HexagonTTIImpl::isTypeForHVX(Type *
return Action == TargetLoweringBase::TypeWidenVector;
}
+unsigned HexagonTTIImpl::getTypeNumElements(Type *Ty) const {
+ if (Ty->isVectorTy())
+ return Ty->getVectorNumElements();
+ assert((Ty->isIntegerTy() || Ty->isFloatingPointTy()) &&
+ "Expecting scalar type");
+ return 1;
+}
+
TargetTransformInfo::PopcntSupportKind
HexagonTTIImpl::getPopcntSupport(unsigned IntTyWidthInBit) const {
// Return fast hardware support as every input < 64 bits will be promoted
@@ -161,8 +173,8 @@ unsigned HexagonTTIImpl::getMemoryOpCost
// Non-HVX vectors.
// Add extra cost for floating point types.
- unsigned Cost = VecTy->getElementType()->isFloatingPointTy() ? 4 : 1;
-
+ unsigned Cost = VecTy->getElementType()->isFloatingPointTy() ? FloatFactor
+ : 1;
Alignment = std::min(Alignment, 8u);
unsigned AlignWidth = 8 * std::max(1u, Alignment);
unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth;
@@ -202,10 +214,9 @@ unsigned HexagonTTIImpl::getInterleavedM
unsigned HexagonTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
Type *CondTy, const Instruction *I) {
if (ValTy->isVectorTy()) {
- auto *VecTy = dyn_cast<VectorType>(ValTy);
std::pair<int, MVT> LT = TLI.getTypeLegalizationCost(DL, ValTy);
if (Opcode == Instruction::FCmp)
- return LT.first + 4 * VecTy->getNumElements();
+ return LT.first + FloatFactor * getTypeNumElements(ValTy);
}
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
}
@@ -214,12 +225,25 @@ unsigned HexagonTTIImpl::getArithmeticIn
TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info,
TTI::OperandValueProperties Opd1PropInfo,
TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value*> Args) {
+ if (Ty->isVectorTy()) {
+ std::pair<int, MVT> LT = TLI.getTypeLegalizationCost(DL, Ty);
+ if (LT.second.isFloatingPoint())
+ return LT.first + FloatFactor * getTypeNumElements(Ty);
+ }
return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
Opd1PropInfo, Opd2PropInfo, Args);
}
-unsigned HexagonTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
- Type *Src, const Instruction *I) {
+unsigned HexagonTTIImpl::getCastInstrCost(unsigned Opcode, Type *DstTy,
+ Type *SrcTy, const Instruction *I) {
+ if (SrcTy->isFPOrFPVectorTy() || DstTy->isFPOrFPVectorTy()) {
+ unsigned SrcN = SrcTy->isFPOrFPVectorTy() ? getTypeNumElements(SrcTy) : 0;
+ unsigned DstN = DstTy->isFPOrFPVectorTy() ? getTypeNumElements(DstTy) : 0;
+
+ std::pair<int, MVT> SrcLT = TLI.getTypeLegalizationCost(DL, SrcTy);
+ std::pair<int, MVT> DstLT = TLI.getTypeLegalizationCost(DL, DstTy);
+ return std::max(SrcLT.first, DstLT.first) + FloatFactor * (SrcN + DstN);
+ }
return 1;
}
Modified: llvm/trunk/lib/Target/Hexagon/HexagonTargetTransformInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonTargetTransformInfo.h?rev=334508&r1=334507&r2=334508&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Hexagon/HexagonTargetTransformInfo.h (original)
+++ llvm/trunk/lib/Target/Hexagon/HexagonTargetTransformInfo.h Tue Jun 12 08:12:50 2018
@@ -46,6 +46,11 @@ class HexagonTTIImpl : public BasicTTIIm
bool useHVX() const;
bool isTypeForHVX(Type *VecTy) const;
+ // Returns the number of vector elements of Ty, if Ty is a vector type,
+ // or 1 if Ty is a scalar type. It is incorrect to call this function
+ // with any other type.
+ unsigned getTypeNumElements(Type *Ty) const;
+
public:
explicit HexagonTTIImpl(const HexagonTargetMachine *TM, const Function &F)
: BaseT(TM, F.getParent()->getDataLayout()),
Added: llvm/trunk/test/CodeGen/Hexagon/autohvx/float-cost.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/autohvx/float-cost.ll?rev=334508&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Hexagon/autohvx/float-cost.ll (added)
+++ llvm/trunk/test/CodeGen/Hexagon/autohvx/float-cost.ll Tue Jun 12 08:12:50 2018
@@ -0,0 +1,91 @@
+; RUN: opt -march=hexagon -loop-vectorize -hexagon-autohvx -debug-only=loop-vectorize -S < %s 2>&1 | FileCheck %s
+; REQUIRES: asserts
+
+; Check that the cost model makes vectorization non-profitable.
+; CHECK: LV: Vectorization is possible but not beneficial
+
+target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
+target triple = "hexagon"
+
+define void @f0(i8* nocapture readonly %a0, i8* nocapture %a1, i32 %a2, i32 %a3, i32 %a4, float %a5, float %a6) #0 {
+b0:
+ %v0 = icmp sgt i32 %a2, 0
+ br i1 %v0, label %b1, label %b2
+
+b1: ; preds = %b0
+ %v1 = add nsw i32 %a3, -1
+ %v2 = sitofp i32 %v1 to float
+ %v3 = fcmp olt float %v2, %a6
+ %v4 = select i1 %v3, float %v2, float %a6
+ %v5 = sitofp i32 %a4 to float
+ %v6 = fmul float %v4, %v5
+ %v7 = sitofp i32 %a2 to float
+ %v8 = fmul float %v6, %v7
+ %v9 = add nsw i32 %a4, -1
+ %v10 = sitofp i32 %v9 to float
+ %v11 = fcmp olt float %v10, %a5
+ %v12 = select i1 %v11, float %v10, float %a5
+ %v13 = fmul float %v12, %v7
+ %v14 = fadd float %v13, %v8
+ %v15 = fptosi float %v14 to i32
+ %v16 = fadd float %a5, 1.000000e+00
+ %v17 = fcmp ogt float %v16, %v10
+ %v18 = select i1 %v17, float %v10, float %v16
+ %v19 = fmul float %v18, %v7
+ %v20 = fadd float %v19, %v8
+ %v21 = fptosi float %v20 to i32
+ %v22 = fadd float %a6, 1.000000e+00
+ %v23 = fcmp ogt float %v22, %v2
+ %v24 = select i1 %v23, float %v2, float %v22
+ %v25 = fmul float %v24, %v5
+ %v26 = fmul float %v25, %v7
+ %v27 = fadd float %v13, %v26
+ %v28 = fptosi float %v27 to i32
+ %v29 = fadd float %v19, %v26
+ %v30 = fptosi float %v29 to i32
+ br label %b3
+
+b2: ; preds = %b3, %b0
+ ret void
+
+b3: ; preds = %b3, %b1
+ %v31 = phi i32 [ 0, %b1 ], [ %v60, %b3 ]
+ %v32 = add nsw i32 %v31, %v15
+ %v33 = getelementptr inbounds i8, i8* %a0, i32 %v32
+ %v34 = load i8, i8* %v33, align 1, !tbaa !0
+ %v35 = add nsw i32 %v31, %v21
+ %v36 = getelementptr inbounds i8, i8* %a0, i32 %v35
+ %v37 = load i8, i8* %v36, align 1, !tbaa !0
+ %v38 = add nsw i32 %v31, %v28
+ %v39 = getelementptr inbounds i8, i8* %a0, i32 %v38
+ %v40 = load i8, i8* %v39, align 1, !tbaa !0
+ %v41 = add nsw i32 %v31, %v30
+ %v42 = getelementptr inbounds i8, i8* %a0, i32 %v41
+ %v43 = load i8, i8* %v42, align 1, !tbaa !0
+ %v44 = uitofp i8 %v34 to float
+ %v45 = uitofp i8 %v37 to float
+ %v46 = uitofp i8 %v40 to float
+ %v47 = uitofp i8 %v43 to float
+ %v48 = fsub float %v45, %v44
+ %v49 = fmul float %v48, 0x3FD99999A0000000
+ %v50 = fadd float %v49, %v44
+ %v51 = fsub float %v47, %v46
+ %v52 = fmul float %v51, 0x3FD99999A0000000
+ %v53 = fadd float %v52, %v46
+ %v54 = fsub float %v53, %v50
+ %v55 = fmul float %v54, 0x3FD99999A0000000
+ %v56 = fadd float %v50, %v55
+ %v57 = fadd float %v56, 5.000000e-01
+ %v58 = fptoui float %v57 to i8
+ %v59 = getelementptr inbounds i8, i8* %a1, i32 %v31
+ store i8 %v58, i8* %v59, align 1, !tbaa !0
+ %v60 = add nuw nsw i32 %v31, 1
+ %v61 = icmp eq i32 %v60, %a2
+ br i1 %v61, label %b2, label %b3
+}
+
+attributes #0 = { norecurse nounwind "target-cpu"="hexagonv65" "target-features"="+hvx-length128b,+hvxv65" }
+
+!0 = !{!1, !1, i64 0}
+!1 = !{!"omnipotent char", !2, i64 0}
+!2 = !{!"Simple C/C++ TBAA"}
More information about the llvm-commits
mailing list