[llvm] r179475 - SLPVectorizer: Add support for trees that don't start at binary operators, and add the cost of extracting values from the roots of the tree.
Nadav Rotem
nrotem at apple.com
Sat Apr 13 22:15:54 PDT 2013
Author: nadav
Date: Sun Apr 14 00:15:53 2013
New Revision: 179475
URL: http://llvm.org/viewvc/llvm-project?rev=179475&view=rev
Log:
SLPVectorizer: Add support for trees that don't start at binary operators, and add the cost of extracting values from the roots of the tree.
Added:
llvm/trunk/test/Transforms/SLPVectorizer/X86/reduction2.ll
Modified:
llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/trunk/lib/Transforms/Vectorize/VecUtils.cpp
llvm/trunk/lib/Transforms/Vectorize/VecUtils.h
Modified: llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp?rev=179475&r1=179474&r2=179475&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp Sun Apr 14 00:15:53 2013
@@ -85,14 +85,16 @@ struct SLPVectorizer : public BasicBlock
return true;
}
- bool tryToVectorizePair(BinaryOperator *A, BinaryOperator *B, BoUpSLP &R) {
+ bool tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) {
if (!A || !B) return false;
BoUpSLP::ValueList VL;
VL.push_back(A);
VL.push_back(B);
int Cost = R.getTreeCost(VL);
- DEBUG(dbgs()<<"SLP: Cost of pair:" << Cost << ".\n");
- if (Cost >= -SLPCostThreshold) return false;
+ int ExtrCost = R.getScalarizationCost(VL);
+ DEBUG(dbgs()<<"SLP: Cost of pair:" << Cost <<
+ " Cost of extract:" << ExtrCost << ".\n");
+ if ((Cost+ExtrCost) >= -SLPCostThreshold) return false;
DEBUG(dbgs()<<"SLP: Vectorizing pair.\n");
R.vectorizeArith(VL);
return true;
@@ -100,11 +102,12 @@ struct SLPVectorizer : public BasicBlock
bool tryToVectorizeCandidate(BinaryOperator *V, BoUpSLP &R) {
if (!V) return false;
- BinaryOperator *A = dyn_cast<BinaryOperator>(V->getOperand(0));
- BinaryOperator *B = dyn_cast<BinaryOperator>(V->getOperand(1));
// Try to vectorize V.
- if (tryToVectorizePair(A, B, R)) return true;
+ if (tryToVectorizePair(V->getOperand(0), V->getOperand(1), R))
+ return true;
+ BinaryOperator *A = dyn_cast<BinaryOperator>(V->getOperand(0));
+ BinaryOperator *B = dyn_cast<BinaryOperator>(V->getOperand(1));
// Try to skip B.
if (B && B->hasOneUse()) {
BinaryOperator *B0 = dyn_cast<BinaryOperator>(B->getOperand(0));
Modified: llvm/trunk/lib/Transforms/Vectorize/VecUtils.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/VecUtils.cpp?rev=179475&r1=179474&r2=179475&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/VecUtils.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/VecUtils.cpp Sun Apr 14 00:15:53 2013
@@ -173,6 +173,16 @@ bool BoUpSLP::vectorizeStores(StoreList
return Changed;
}
+int BoUpSLP::getScalarizationCost(ValueList &VL) {
+ Type *ScalarTy = VL[0]->getType();
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
+ ScalarTy = SI->getValueOperand()->getType();
+
+ VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
+ return getScalarizationCost(VecTy);
+}
+
int BoUpSLP::getScalarizationCost(Type *Ty) {
int Cost = 0;
for (unsigned i = 0, e = cast<VectorType>(Ty)->getNumElements(); i < e; ++i)
Modified: llvm/trunk/lib/Transforms/Vectorize/VecUtils.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/VecUtils.h?rev=179475&r1=179474&r2=179475&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/VecUtils.h (original)
+++ llvm/trunk/lib/Transforms/Vectorize/VecUtils.h Sun Apr 14 00:15:53 2013
@@ -61,6 +61,11 @@ struct BoUpSLP {
/// A negative number means that this is profitable.
int getTreeCost(ValueList &VL);
+ /// \returns the scalarization cost for this ValueList. Assuming that this
+ /// subtree gets vectorized, we may need to extract the values from the
+ /// roots. This method calculates the cost of extracting the values.
+ int getScalarizationCost(ValueList &VL);
+
/// \brief Attempts to order and vectorize a sequence of stores. This
/// function does a quadratic scan of the given stores.
/// \returns true if the basic block was modified.
@@ -118,7 +123,7 @@ private:
/// by multiple lanes, or by users outside the tree.
/// NOTICE: The vectorization methods also use this set.
ValueSet MustScalarize;
-
+
// Contains a list of values that are used outside the current tree. This
// set must be reset between runs.
ValueSet MultiUserVals;
Added: llvm/trunk/test/Transforms/SLPVectorizer/X86/reduction2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/reduction2.ll?rev=179475&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SLPVectorizer/X86/reduction2.ll (added)
+++ llvm/trunk/test/Transforms/SLPVectorizer/X86/reduction2.ll Sun Apr 14 00:15:53 2013
@@ -0,0 +1,37 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+target triple = "i386-apple-macosx10.8.0"
+
+;CHECK: @foo
+;CHECK: load <2 x double>
+;CHECK: ret
+define double @foo(double* nocapture %D) #0 {
+ br label %1
+
+; <label>:1 ; preds = %1, %0
+ %i.02 = phi i32 [ 0, %0 ], [ %10, %1 ]
+ %sum.01 = phi double [ 0.000000e+00, %0 ], [ %9, %1 ]
+ %2 = shl nsw i32 %i.02, 1
+ %3 = getelementptr inbounds double* %D, i32 %2
+ %4 = load double* %3, align 4, !tbaa !0
+ %A4 = fmul double %4, %4
+ %5 = or i32 %2, 1
+ %6 = getelementptr inbounds double* %D, i32 %5
+ %7 = load double* %6, align 4, !tbaa !0
+ %A7 = fmul double %7, %7
+ %8 = fadd double %A4, %A7
+ %9 = fadd double %sum.01, %8
+ %10 = add nsw i32 %i.02, 1
+ %exitcond = icmp eq i32 %10, 100
+ br i1 %exitcond, label %11, label %1
+
+; <label>:11 ; preds = %1
+ ret double %9
+}
+
+attributes #0 = { nounwind readonly ssp "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!0 = metadata !{metadata !"double", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
More information about the llvm-commits
mailing list