[llvm-commits] [llvm] r171079 - in /llvm/trunk: lib/Transforms/Vectorize/BBVectorize.cpp test/Transforms/BBVectorize/X86/simple-int.ll

Hal Finkel hfinkel at anl.gov
Tue Dec 25 17:36:57 PST 2012


Author: hfinkel
Date: Tue Dec 25 19:36:57 2012
New Revision: 171079

URL: http://llvm.org/viewvc/llvm-project?rev=171079&view=rev
Log:
BBVectorize: Use VTTI to compute costs for intrinsics vectorization

For the time being this includes only some dummy test cases. Once the
generic implementation of the intrinsics cost function does something other
than assuming scalarization in all cases, or some target specializes the
interface, some real test cases can be added.

Also, for consistency, I changed the type of IID from unsigned to Intrinsic::ID
in a few other places.

Added:
    llvm/trunk/test/Transforms/BBVectorize/X86/simple-int.ll
Modified:
    llvm/trunk/lib/Transforms/Vectorize/BBVectorize.cpp

Modified: llvm/trunk/lib/Transforms/Vectorize/BBVectorize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/BBVectorize.cpp?rev=171079&r1=171078&r2=171079&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/BBVectorize.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/BBVectorize.cpp Tue Dec 25 19:36:57 2012
@@ -642,7 +642,7 @@
       Function *F = I->getCalledFunction();
       if (!F) return false;
 
-      unsigned IID = F->getIntrinsicID();
+      Intrinsic::ID IID = (Intrinsic::ID) F->getIntrinsicID();
       if (!IID) return false;
 
       switch(IID) {
@@ -1020,14 +1020,67 @@
     // vectorized, the second arguments must be equal.
     CallInst *CI = dyn_cast<CallInst>(I);
     Function *FI;
-    if (CI && (FI = CI->getCalledFunction()) &&
-        FI->getIntrinsicID() == Intrinsic::powi) {
+    if (CI && (FI = CI->getCalledFunction())) {
+      Intrinsic::ID IID = (Intrinsic::ID) FI->getIntrinsicID();
+      if (IID == Intrinsic::powi) {
+        Value *A1I = CI->getArgOperand(1),
+              *A1J = cast<CallInst>(J)->getArgOperand(1);
+        const SCEV *A1ISCEV = SE->getSCEV(A1I),
+                   *A1JSCEV = SE->getSCEV(A1J);
+        return (A1ISCEV == A1JSCEV);
+      }
+
+      if (IID && VTTI) {
+        SmallVector<Type*, 4> Tys;
+        for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i)
+          Tys.push_back(CI->getArgOperand(i)->getType());
+        unsigned ICost = VTTI->getIntrinsicInstrCost(IID, IT1, Tys);
+
+        Tys.clear();
+        CallInst *CJ = cast<CallInst>(J);
+        for (unsigned i = 0, ie = CJ->getNumArgOperands(); i != ie; ++i)
+          Tys.push_back(CJ->getArgOperand(i)->getType());
+        unsigned JCost = VTTI->getIntrinsicInstrCost(IID, JT1, Tys);
+
+        Tys.clear();
+        assert(CI->getNumArgOperands() == CJ->getNumArgOperands() &&
+               "Intrinsic argument counts differ");
+        for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) {
+          if (IID == Intrinsic::powi && i == 1)
+            Tys.push_back(CI->getArgOperand(i)->getType());
+          else
+            Tys.push_back(getVecTypeForPair(CI->getArgOperand(i)->getType(),
+                                            CJ->getArgOperand(i)->getType()));
+        }
+
+        Type *RetTy = getVecTypeForPair(IT1, JT1);
+        unsigned VCost = VTTI->getIntrinsicInstrCost(IID, RetTy, Tys);
+
+        if (VCost > ICost + JCost)
+          return false;
+
+        // We don't want to fuse to a type that will be split, even
+        // if the two input types will also be split and there is no other
+        // associated cost.
+        unsigned RetParts = VTTI->getNumberOfParts(RetTy);
+        if (RetParts > 1)
+          return false;
+        else if (!RetParts && VCost == ICost + JCost)
+          return false;
+
+        for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) {
+          if (!Tys[i]->isVectorTy())
+            continue;
 
-      Value *A1I = CI->getArgOperand(1),
-            *A1J = cast<CallInst>(J)->getArgOperand(1);
-      const SCEV *A1ISCEV = SE->getSCEV(A1I),
-                 *A1JSCEV = SE->getSCEV(A1J);
-      return (A1ISCEV == A1JSCEV);
+          unsigned NumParts = VTTI->getNumberOfParts(Tys[i]);
+          if (NumParts > 1)
+            return false;
+          else if (!NumParts && VCost == ICost + JCost)
+            return false;
+        }
+
+        CostSavings = ICost + JCost - VCost;
+      }
     }
 
     return true;
@@ -2551,7 +2604,7 @@
         continue;
       } else if (isa<CallInst>(I)) {
         Function *F = cast<CallInst>(I)->getCalledFunction();
-        unsigned IID = F->getIntrinsicID();
+        Intrinsic::ID IID = (Intrinsic::ID) F->getIntrinsicID();
         if (o == NumOperands-1) {
           BasicBlock &BB = *I->getParent();
 
@@ -2560,8 +2613,7 @@
           Type *ArgTypeJ = J->getType();
           Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ);
 
-          ReplacedOperands[o] = Intrinsic::getDeclaration(M,
-            (Intrinsic::ID) IID, VArgType);
+          ReplacedOperands[o] = Intrinsic::getDeclaration(M, IID, VArgType);
           continue;
         } else if (IID == Intrinsic::powi && o == 1) {
           // The second argument of powi is a single integer and we've already

Added: llvm/trunk/test/Transforms/BBVectorize/X86/simple-int.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/BBVectorize/X86/simple-int.ll?rev=171079&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/BBVectorize/X86/simple-int.ll (added)
+++ llvm/trunk/test/Transforms/BBVectorize/X86/simple-int.ll Tue Dec 25 19:36:57 2012
@@ -0,0 +1,79 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
+
+declare double @llvm.fma.f64(double, double, double)
+declare double @llvm.fmuladd.f64(double, double, double)
+declare double @llvm.cos.f64(double)
+declare double @llvm.powi.f64(double, i32)
+
+; Basic depth-3 chain with fma
+define double @test1(double %A1, double %A2, double %B1, double %B2, double %C1, double %C2) {
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+	%Y1 = call double @llvm.fma.f64(double %X1, double %A1, double %C1)
+	%Y2 = call double @llvm.fma.f64(double %X2, double %A2, double %C2)
+	%Z1 = fadd double %Y1, %B1
+	%Z2 = fadd double %Y2, %B2
+	%R  = fmul double %Z1, %Z2
+	ret double %R
+; CHECK: @test1
+; CHECK: ret double %R
+}
+
+; Basic depth-3 chain with fmuladd
+define double @test1a(double %A1, double %A2, double %B1, double %B2, double %C1, double %C2) {
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+	%Y1 = call double @llvm.fmuladd.f64(double %X1, double %A1, double %C1)
+	%Y2 = call double @llvm.fmuladd.f64(double %X2, double %A2, double %C2)
+	%Z1 = fadd double %Y1, %B1
+	%Z2 = fadd double %Y2, %B2
+	%R  = fmul double %Z1, %Z2
+	ret double %R
+; CHECK: @test1a
+; CHECK: ret double %R
+}
+
+; Basic depth-3 chain with cos
+define double @test2(double %A1, double %A2, double %B1, double %B2) {
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+	%Y1 = call double @llvm.cos.f64(double %X1)
+	%Y2 = call double @llvm.cos.f64(double %X2)
+	%Z1 = fadd double %Y1, %B1
+	%Z2 = fadd double %Y2, %B2
+	%R  = fmul double %Z1, %Z2
+	ret double %R
+; CHECK: @test2
+; CHECK: ret double %R
+}
+
+; Basic depth-3 chain with powi
+define double @test3(double %A1, double %A2, double %B1, double %B2, i32 %P) {
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+	%Y1 = call double @llvm.powi.f64(double %X1, i32 %P)
+	%Y2 = call double @llvm.powi.f64(double %X2, i32 %P)
+	%Z1 = fadd double %Y1, %B1
+	%Z2 = fadd double %Y2, %B2
+	%R  = fmul double %Z1, %Z2
+	ret double %R
+; CHECK: @test3
+; CHECK: ret double %R
+}
+
+; Basic depth-3 chain with powi (different powers: should not vectorize)
+define double @test4(double %A1, double %A2, double %B1, double %B2, i32 %P) {
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+        %P2 = add i32 %P, 1
+	%Y1 = call double @llvm.powi.f64(double %X1, i32 %P)
+	%Y2 = call double @llvm.powi.f64(double %X2, i32 %P2)
+	%Z1 = fadd double %Y1, %B1
+	%Z2 = fadd double %Y2, %B2
+	%R  = fmul double %Z1, %Z2
+	ret double %R
+; CHECK: @test4
+; CHECK: ret double %R
+}
+





More information about the llvm-commits mailing list