[llvm] r179475 - SLPVectorizer: Add support for trees that don't start at binary operators, and add the cost of extracting values from the roots of the tree.

Nadav Rotem nrotem at apple.com
Sat Apr 13 22:15:54 PDT 2013


Author: nadav
Date: Sun Apr 14 00:15:53 2013
New Revision: 179475

URL: http://llvm.org/viewvc/llvm-project?rev=179475&view=rev
Log:
SLPVectorizer: Add support for trees that don't start at binary operators, and add the cost of extracting values from the roots of the tree.


Added:
    llvm/trunk/test/Transforms/SLPVectorizer/X86/reduction2.ll
Modified:
    llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
    llvm/trunk/lib/Transforms/Vectorize/VecUtils.cpp
    llvm/trunk/lib/Transforms/Vectorize/VecUtils.h

Modified: llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp?rev=179475&r1=179474&r2=179475&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp Sun Apr 14 00:15:53 2013
@@ -85,14 +85,16 @@ struct SLPVectorizer : public BasicBlock
     return true;
   }
 
-  bool tryToVectorizePair(BinaryOperator *A, BinaryOperator *B,  BoUpSLP &R) {
+  bool tryToVectorizePair(Value *A, Value *B,  BoUpSLP &R) {
     if (!A || !B) return false;
     BoUpSLP::ValueList VL;
     VL.push_back(A);
     VL.push_back(B);
     int Cost = R.getTreeCost(VL);
-    DEBUG(dbgs()<<"SLP: Cost of pair:" << Cost << ".\n");
-    if (Cost >= -SLPCostThreshold) return false;
+    int ExtrCost = R.getScalarizationCost(VL);
+    DEBUG(dbgs()<<"SLP: Cost of pair:" << Cost <<
+                  " Cost of extract:" << ExtrCost << ".\n");
+    if ((Cost+ExtrCost) >= -SLPCostThreshold) return false;
     DEBUG(dbgs()<<"SLP: Vectorizing pair.\n");
     R.vectorizeArith(VL);
     return true;
@@ -100,11 +102,12 @@ struct SLPVectorizer : public BasicBlock
 
   bool tryToVectorizeCandidate(BinaryOperator *V,  BoUpSLP &R) {
     if (!V) return false;
-    BinaryOperator *A = dyn_cast<BinaryOperator>(V->getOperand(0));
-    BinaryOperator *B = dyn_cast<BinaryOperator>(V->getOperand(1));
     // Try to vectorize V.
-    if (tryToVectorizePair(A, B, R)) return true;
+    if (tryToVectorizePair(V->getOperand(0), V->getOperand(1), R))
+      return true;
 
+    BinaryOperator *A = dyn_cast<BinaryOperator>(V->getOperand(0));
+    BinaryOperator *B = dyn_cast<BinaryOperator>(V->getOperand(1));
     // Try to skip B.
     if (B && B->hasOneUse()) {
       BinaryOperator *B0 = dyn_cast<BinaryOperator>(B->getOperand(0));

Modified: llvm/trunk/lib/Transforms/Vectorize/VecUtils.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/VecUtils.cpp?rev=179475&r1=179474&r2=179475&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/VecUtils.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/VecUtils.cpp Sun Apr 14 00:15:53 2013
@@ -173,6 +173,16 @@ bool BoUpSLP::vectorizeStores(StoreList
   return Changed;
 }
 
+int BoUpSLP::getScalarizationCost(ValueList &VL) {
+  Type *ScalarTy = VL[0]->getType();
+
+  if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
+    ScalarTy = SI->getValueOperand()->getType();
+
+  VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
+  return getScalarizationCost(VecTy);
+}
+
 int BoUpSLP::getScalarizationCost(Type *Ty) {
   int Cost = 0;
   for (unsigned i = 0, e = cast<VectorType>(Ty)->getNumElements(); i < e; ++i)

Modified: llvm/trunk/lib/Transforms/Vectorize/VecUtils.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/VecUtils.h?rev=179475&r1=179474&r2=179475&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/VecUtils.h (original)
+++ llvm/trunk/lib/Transforms/Vectorize/VecUtils.h Sun Apr 14 00:15:53 2013
@@ -61,6 +61,11 @@ struct BoUpSLP  {
   /// A negative number means that this is profitable.
   int getTreeCost(ValueList &VL);
 
+  /// \returns the scalarization cost for this ValueList. Assuming that this
+  /// subtree gets vectorized, we may need to extract the values from the
+  /// roots. This method calculates the cost of extracting the values.
+  int getScalarizationCost(ValueList &VL);
+
   /// \brief Attempts to order and vectorize a sequence of stores. This
   /// function does a quadratic scan of the given stores.
   /// \returns true if the basic block was modified.
@@ -118,7 +123,7 @@ private:
   /// by multiple lanes, or by users outside the tree.
   /// NOTICE: The vectorization methods also use this set.
   ValueSet MustScalarize;
-  
+
   // Contains a list of values that are used outside the current tree. This
   // set must be reset between runs.
   ValueSet MultiUserVals;

Added: llvm/trunk/test/Transforms/SLPVectorizer/X86/reduction2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/reduction2.ll?rev=179475&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SLPVectorizer/X86/reduction2.ll (added)
+++ llvm/trunk/test/Transforms/SLPVectorizer/X86/reduction2.ll Sun Apr 14 00:15:53 2013
@@ -0,0 +1,37 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+target triple = "i386-apple-macosx10.8.0"
+
+;CHECK: @foo
+;CHECK: load <2 x double>
+;CHECK: ret
+define double @foo(double* nocapture %D) #0 {
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %i.02 = phi i32 [ 0, %0 ], [ %10, %1 ]
+  %sum.01 = phi double [ 0.000000e+00, %0 ], [ %9, %1 ]
+  %2 = shl nsw i32 %i.02, 1
+  %3 = getelementptr inbounds double* %D, i32 %2
+  %4 = load double* %3, align 4, !tbaa !0
+  %A4 = fmul double %4, %4
+  %5 = or i32 %2, 1
+  %6 = getelementptr inbounds double* %D, i32 %5
+  %7 = load double* %6, align 4, !tbaa !0
+  %A7 = fmul double %7, %7
+  %8 = fadd double %A4, %A7
+  %9 = fadd double %sum.01, %8
+  %10 = add nsw i32 %i.02, 1
+  %exitcond = icmp eq i32 %10, 100
+  br i1 %exitcond, label %11, label %1
+
+; <label>:11                                      ; preds = %1
+  ret double %9
+}
+
+attributes #0 = { nounwind readonly ssp "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!0 = metadata !{metadata !"double", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}





More information about the llvm-commits mailing list