[PATCH] Cost model support for lowered math builtins.
Benjamin Kramer
benny.kra at gmail.com
Thu Feb 28 10:32:08 PST 2013
bkramer added you to the CC list for the revision "Cost model support for lowered math builtins.".
Hi nadav, paul.redmond, rengolin,
This patch allows us to compile a function like
void foo(float *f) {
for (unsigned i = 0; i != 1024; ++i)
f[i] = floorf(f[i]);
}
into roundps if SSE4.1 is available and not vectorize it otherwise.
http://llvm-reviews.chandlerc.com/D466
Files:
lib/CodeGen/BasicTargetTransformInfo.cpp
test/Transforms/LoopVectorize/X86/intrinsic-cost.ll
Index: lib/CodeGen/BasicTargetTransformInfo.cpp
===================================================================
--- lib/CodeGen/BasicTargetTransformInfo.cpp
+++ lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -117,7 +117,6 @@
return new BasicTTI(TLI);
}
-
bool BasicTTI::isLegalAddImmediate(int64_t imm) const {
return TLI->isLegalAddImmediate(imm);
}
@@ -379,22 +378,77 @@
return LT.first;
}
-unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID, Type *RetTy,
+unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<Type *> Tys) const {
- // assume that we need to scalarize this intrinsic.
- unsigned ScalarizationCost = 0;
- unsigned ScalarCalls = 1;
- if (RetTy->isVectorTy()) {
- ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
- ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
- }
- for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
- if (Tys[i]->isVectorTy()) {
- ScalarizationCost += getScalarizationOverhead(Tys[i], false, true);
+ unsigned ISD = 0;
+ switch (IID) {
+ default: {
+ // Assume that we need to scalarize this intrinsic.
+ unsigned ScalarizationCost = 0;
+ unsigned ScalarCalls = 1;
+ if (RetTy->isVectorTy()) {
+ ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
}
+ for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
+ if (Tys[i]->isVectorTy()) {
+ ScalarizationCost += getScalarizationOverhead(Tys[i], false, true);
+ ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
+ }
+ }
+
+ return ScalarCalls + ScalarizationCost;
+ }
+ // Look for intrinsics that can be lowered directly or turned into a scalar
+ // intrinsic call.
+ case Intrinsic::sqrt: ISD = ISD::FSQRT; break;
+ case Intrinsic::sin: ISD = ISD::FSIN; break;
+ case Intrinsic::cos: ISD = ISD::FCOS; break;
+ case Intrinsic::exp: ISD = ISD::FEXP; break;
+ case Intrinsic::exp2: ISD = ISD::FEXP2; break;
+ case Intrinsic::log: ISD = ISD::FLOG; break;
+ case Intrinsic::log10: ISD = ISD::FLOG10; break;
+ case Intrinsic::log2: ISD = ISD::FLOG2; break;
+ case Intrinsic::fabs: ISD = ISD::FABS; break;
+ case Intrinsic::floor: ISD = ISD::FFLOOR; break;
+ case Intrinsic::ceil: ISD = ISD::FCEIL; break;
+ case Intrinsic::trunc: ISD = ISD::FTRUNC; break;
+ case Intrinsic::rint: ISD = ISD::FRINT; break;
+ case Intrinsic::pow: ISD = ISD::FPOW; break;
+ case Intrinsic::fma: ISD = ISD::FMA; break;
+ case Intrinsic::fmuladd: ISD = ISD::FMA; break; // FIXME: mul + add?
}
- return ScalarCalls + ScalarizationCost;
+
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(RetTy);
+
+ if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
+ // The operation is legal. Assume it costs 1.
+ // If the type is split to multiple registers, assume that thre is some
+ // overhead to this.
+ // TODO: Once we have extract/insert subvector cost we need to use them.
+ if (LT.first > 1)
+ return LT.first * 2;
+ return LT.first * 1;
+ }
+
+ if (!TLI->isOperationExpand(ISD, LT.second)) {
+ // If the operation is custom lowered then assume
+ // thare the code is twice as expensive.
+ return LT.first * 2;
+ }
+
+ // Else, assume that we need to scalarize this intrinsic. For math builtins
+ // this will emit a costly libcall, adding call overhead and spills. Make it
+ // very expensive.
+ if (RetTy->isVectorTy()) {
+ unsigned Num = RetTy->getVectorNumElements();
+ unsigned Cost = TopTTI->getIntrinsicInstrCost(IID, RetTy->getScalarType(),
+ Tys);
+ return 10 * Cost * Num;
+ }
+
+ // This is going to be turned into a library call, make it expensive.
+ return 10;
}
unsigned BasicTTI::getNumberOfParts(Type *Tp) const {
Index: test/Transforms/LoopVectorize/X86/intrinsic-cost.ll
===================================================================
--- /dev/null
+++ test/Transforms/LoopVectorize/X86/intrinsic-cost.ll
@@ -0,0 +1,32 @@
+; RUN: opt -S -mtriple=x86_64-apple-darwin -mcpu=core2 -loop-vectorize < %s | FileCheck %s -check-prefix=NO
+; RUN: opt -S -mtriple=x86_64-apple-darwin -mcpu=corei7 -loop-vectorize < %s | FileCheck %s -check-prefix=YES
+
+define void @test1(float* nocapture %x) nounwind {
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds float* %x, i64 %indvars.iv
+ %0 = load float* %arrayidx, align 4
+ %call = tail call float @ceilf(float %0) nounwind readnone
+ store float %call, float* %arrayidx, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 1024
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+
+; NO: @test1
+; NO-NOT: llvm.ceilf
+; NO: ret void
+
+; YES: @test1
+; YES: llvm.ceil.v4f32
+; YES: ret void
+
+}
+
+declare float @ceilf(float) nounwind readnone
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D466.2.patch
Type: text/x-patch
Size: 5348 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20130228/72d452d5/attachment.bin>
More information about the llvm-commits
mailing list