[PATCH] Cost model support for lowered math builtins.
Arnold Schwaighofer
aschwaighofer at apple.com
Thu Feb 28 10:46:53 PST 2013
LGTM,
Can you write shorter tests using the cost model analysis pass?
opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
See for example: test/Analysis/CostModel/X86/arith.ll
Thanks,
Arnold
On Feb 28, 2013, at 12:32 PM, Benjamin Kramer <benny.kra at gmail.com> wrote:
> bkramer added you to the CC list for the revision "Cost model support for lowered math builtins.".
>
> Hi nadav, paul.redmond, rengolin,
>
> This patch allows us to compile a function like
>
> void foo(float *f) {
> for (unsigned i = 0; i != 1024; ++i)
> f[i] = floorf(f[i]);
> }
>
> into roundps if SSE4.1 is available and not vectorize it otherwise.
>
> http://llvm-reviews.chandlerc.com/D466
>
> Files:
> lib/CodeGen/BasicTargetTransformInfo.cpp
> test/Transforms/LoopVectorize/X86/intrinsic-cost.ll
>
> Index: lib/CodeGen/BasicTargetTransformInfo.cpp
> ===================================================================
> --- lib/CodeGen/BasicTargetTransformInfo.cpp
> +++ lib/CodeGen/BasicTargetTransformInfo.cpp
> @@ -117,7 +117,6 @@
> return new BasicTTI(TLI);
> }
>
> -
> bool BasicTTI::isLegalAddImmediate(int64_t imm) const {
> return TLI->isLegalAddImmediate(imm);
> }
> @@ -379,22 +378,77 @@
> return LT.first;
> }
>
> -unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID, Type *RetTy,
> +unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
> ArrayRef<Type *> Tys) const {
> - // assume that we need to scalarize this intrinsic.
> - unsigned ScalarizationCost = 0;
> - unsigned ScalarCalls = 1;
> - if (RetTy->isVectorTy()) {
> - ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
> - ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
> - }
> - for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
> - if (Tys[i]->isVectorTy()) {
> - ScalarizationCost += getScalarizationOverhead(Tys[i], false, true);
> + unsigned ISD = 0;
> + switch (IID) {
> + default: {
> + // Assume that we need to scalarize this intrinsic.
> + unsigned ScalarizationCost = 0;
> + unsigned ScalarCalls = 1;
> + if (RetTy->isVectorTy()) {
> + ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
> ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
> }
> + for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
> + if (Tys[i]->isVectorTy()) {
> + ScalarizationCost += getScalarizationOverhead(Tys[i], false, true);
> + ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
> + }
> + }
> +
> + return ScalarCalls + ScalarizationCost;
> + }
> + // Look for intrinsics that can be lowered directly or turned into a scalar
> + // intrinsic call.
> + case Intrinsic::sqrt: ISD = ISD::FSQRT; break;
> + case Intrinsic::sin: ISD = ISD::FSIN; break;
> + case Intrinsic::cos: ISD = ISD::FCOS; break;
> + case Intrinsic::exp: ISD = ISD::FEXP; break;
> + case Intrinsic::exp2: ISD = ISD::FEXP2; break;
> + case Intrinsic::log: ISD = ISD::FLOG; break;
> + case Intrinsic::log10: ISD = ISD::FLOG10; break;
> + case Intrinsic::log2: ISD = ISD::FLOG2; break;
> + case Intrinsic::fabs: ISD = ISD::FABS; break;
> + case Intrinsic::floor: ISD = ISD::FFLOOR; break;
> + case Intrinsic::ceil: ISD = ISD::FCEIL; break;
> + case Intrinsic::trunc: ISD = ISD::FTRUNC; break;
> + case Intrinsic::rint: ISD = ISD::FRINT; break;
> + case Intrinsic::pow: ISD = ISD::FPOW; break;
> + case Intrinsic::fma: ISD = ISD::FMA; break;
> + case Intrinsic::fmuladd: ISD = ISD::FMA; break; // FIXME: mul + add?
> }
> - return ScalarCalls + ScalarizationCost;
> +
> + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(RetTy);
> +
> + if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
> + // The operation is legal. Assume it costs 1.
> + // If the type is split to multiple registers, assume that thre is some
> + // overhead to this.
> + // TODO: Once we have extract/insert subvector cost we need to use them.
> + if (LT.first > 1)
> + return LT.first * 2;
> + return LT.first * 1;
> + }
> +
> + if (!TLI->isOperationExpand(ISD, LT.second)) {
> + // If the operation is custom lowered then assume
> + // thare the code is twice as expensive.
> + return LT.first * 2;
> + }
> +
> + // Else, assume that we need to scalarize this intrinsic. For math builtins
> + // this will emit a costly libcall, adding call overhead and spills. Make it
> + // very expensive.
> + if (RetTy->isVectorTy()) {
> + unsigned Num = RetTy->getVectorNumElements();
> + unsigned Cost = TopTTI->getIntrinsicInstrCost(IID, RetTy->getScalarType(),
> + Tys);
> + return 10 * Cost * Num;
> + }
> +
> + // This is going to be turned into a library call, make it expensive.
> + return 10;
> }
>
> unsigned BasicTTI::getNumberOfParts(Type *Tp) const {
> Index: test/Transforms/LoopVectorize/X86/intrinsic-cost.ll
> ===================================================================
> --- /dev/null
> +++ test/Transforms/LoopVectorize/X86/intrinsic-cost.ll
> @@ -0,0 +1,32 @@
> +; RUN: opt -S -mtriple=x86_64-apple-darwin -mcpu=core2 -loop-vectorize < %s | FileCheck %s -check-prefix=NO
> +; RUN: opt -S -mtriple=x86_64-apple-darwin -mcpu=corei7 -loop-vectorize < %s | FileCheck %s -check-prefix=YES
> +
> +define void @test1(float* nocapture %x) nounwind {
> +entry:
> + br label %for.body
> +
> +for.body: ; preds = %entry, %for.body
> + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
> + %arrayidx = getelementptr inbounds float* %x, i64 %indvars.iv
> + %0 = load float* %arrayidx, align 4
> + %call = tail call float @ceilf(float %0) nounwind readnone
> + store float %call, float* %arrayidx, align 4
> + %indvars.iv.next = add i64 %indvars.iv, 1
> + %lftr.wideiv = trunc i64 %indvars.iv.next to i32
> + %exitcond = icmp eq i32 %lftr.wideiv, 1024
> + br i1 %exitcond, label %for.end, label %for.body
> +
> +for.end: ; preds = %for.body
> + ret void
> +
> +; NO: @test1
> +; NO-NOT: llvm.ceilf
> +; NO: ret void
> +
> +; YES: @test1
> +; YES: llvm.ceil.v4f32
> +; YES: ret void
> +
> +}
> +
> +declare float @ceilf(float) nounwind readnone
> <D466.2.patch>_______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list