[llvm] r181648 - SLPVectorizer: Add support for trees with external users.
Nadav Rotem
nrotem at apple.com
Fri May 10 15:59:33 PDT 2013
Author: nadav
Date: Fri May 10 17:59:33 2013
New Revision: 181648
URL: http://llvm.org/viewvc/llvm-project?rev=181648&view=rev
Log:
SLPVectorizer: Add support for trees with external users.
For example:
bar() {
int a = A[i];
int b = A[i+1];
B[i] = a;
B[i+1] = b;
foo(a); <--- a is used outside the vectorized expression.
}
Modified:
llvm/trunk/lib/Transforms/Vectorize/VecUtils.cpp
llvm/trunk/lib/Transforms/Vectorize/VecUtils.h
llvm/trunk/test/Transforms/SLPVectorizer/X86/diamond.ll
Modified: llvm/trunk/lib/Transforms/Vectorize/VecUtils.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/VecUtils.cpp?rev=181648&r1=181647&r2=181648&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/VecUtils.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/VecUtils.cpp Fri May 10 17:59:33 2013
@@ -243,6 +243,10 @@ int BoUpSLP::getTreeCost(ArrayRef<Value
LaneMap.clear();
MultiUserVals.clear();
MustScalarize.clear();
+ MustExtract.clear();
+
+ // Find the location of the last root.
+ unsigned LastRootIndex = InstrIdx[GetLastInstr(VL, VL.size())];
// Scan the tree and find which value is used by which lane, and which values
// must be scalarized.
@@ -258,15 +262,31 @@ int BoUpSLP::getTreeCost(ArrayRef<Value
for (Value::use_iterator I = (*it)->use_begin(), E = (*it)->use_end();
I != E; ++I) {
if (LaneMap.find(*I) == LaneMap.end()) {
- MustScalarize.insert(*it);
- DEBUG(dbgs()<<"SLP: Adding " << **it <<
- " to MustScalarize because of an out of tree usage.\n");
- break;
+ DEBUG(dbgs()<<"SLP: Instr " << **it << " has multiple users.\n");
+
+ // We don't have an ordering problem if the user is not in this basic
+ // block.
+ Instruction *Inst = cast<Instruction>(*I);
+ if (Inst->getParent() == BB) {
+ // We don't have an ordering problem if the user is after the last
+ // root.
+ unsigned Idx = InstrIdx[Inst];
+ if (Idx < LastRootIndex) {
+ MustScalarize.insert(*it);
+ DEBUG(dbgs()<<"SLP: Adding to MustScalarize "
+ "because of an unsafe out of tree usage.\n");
+ break;
+ }
+ }
+
+ DEBUG(dbgs()<<"SLP: Adding to MustExtract "
+ "because of a safe out of tree usage.\n");
+ MustExtract.insert(*it);
}
if (Lane == -1) Lane = LaneMap[*I];
if (Lane != LaneMap[*I]) {
MustScalarize.insert(*it);
- DEBUG(dbgs()<<"Adding " << **it <<
+ DEBUG(dbgs()<<"SLP: Adding " << **it <<
" to MustScalarize because multiple lane use it: "
<< Lane << " and " << LaneMap[*I] << ".\n");
break;
@@ -456,6 +476,13 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Va
}
}
+ // Calculate the extract cost.
+ unsigned ExternalUserExtractCost = 0;
+ for (unsigned i = 0, e = VL.size(); i < e; ++i)
+ if (MustExtract.count(VL[i]))
+ ExternalUserExtractCost +=
+ TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, i);
+
switch (Opcode) {
case Instruction::ZExt:
case Instruction::SExt:
@@ -469,7 +496,7 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Va
case Instruction::Trunc:
case Instruction::FPTrunc:
case Instruction::BitCast: {
- int Cost = 0;
+ int Cost = ExternalUserExtractCost;
ValueList Operands;
Type *SrcTy = VL0->getOperand(0)->getType();
// Prepare the operand vector.
@@ -510,7 +537,7 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Va
case Instruction::And:
case Instruction::Or:
case Instruction::Xor: {
- int Cost = 0;
+ int Cost = ExternalUserExtractCost;
// Calculate the cost of all of the operands.
for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
ValueList Operands;
@@ -540,7 +567,7 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Va
int ScalarLdCost = VecTy->getNumElements() *
TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
- return VecLdCost - ScalarLdCost;
+ return VecLdCost - ScalarLdCost + ExternalUserExtractCost;
}
case Instruction::Store: {
// We know that we can merge the stores. Calculate the cost.
@@ -556,7 +583,7 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Va
}
int TotalCost = StoreCost + getTreeCost_rec(Operands, Depth + 1);
- return TotalCost;
+ return TotalCost + ExternalUserExtractCost;
}
default:
// Unable to vectorize unknown instructions.
@@ -588,10 +615,24 @@ Value *BoUpSLP::Scalarize(ArrayRef<Value
Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL, int VF) {
Value *V = vectorizeTree_rec(VL, VF);
+
+ Instruction *LastInstr = GetLastInstr(VL, VL.size());
+ IRBuilder<> Builder(LastInstr);
+ for (ValueSet::iterator it = MustExtract.begin(), e = MustExtract.end();
+ it != e; ++it) {
+ Instruction *I = cast<Instruction>(*it);
+ Value *Vec = VectorizedValues[I];
+ assert(LaneMap.count(I) && "Unable to find the lane for the external use");
+ Value *Idx = Builder.getInt32(LaneMap[I]);
+ Value *Extract = Builder.CreateExtractElement(Vec, Idx);
+ I->replaceAllUsesWith(Extract);
+ }
+
// We moved some instructions around. We have to number them again
// before we can do any analysis.
numberInstructions();
MustScalarize.clear();
+ MustExtract.clear();
return V;
}
Modified: llvm/trunk/lib/Transforms/Vectorize/VecUtils.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/VecUtils.h?rev=181648&r1=181647&r2=181648&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/VecUtils.h (original)
+++ llvm/trunk/lib/Transforms/Vectorize/VecUtils.h Fri May 10 17:59:33 2013
@@ -127,6 +127,11 @@ private:
/// NOTICE: The vectorization methods also use this set.
ValueSet MustScalarize;
+ /// Contains values that have users outside of the vectorized graph.
+ /// We need to generate extract instructions for these values.
+ /// NOTICE: The vectorization methods also use this set.
+ ValueSet MustExtract;
+
/// Contains a list of values that are used outside the current tree. This
/// set must be reset between runs.
ValueSet MultiUserVals;
Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/diamond.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/diamond.ll?rev=181648&r1=181647&r2=181648&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/SLPVectorizer/X86/diamond.ll (original)
+++ llvm/trunk/test/Transforms/SLPVectorizer/X86/diamond.ll Fri May 10 17:59:33 2013
@@ -41,7 +41,7 @@ entry:
}
-; int foo_fail(int * restrict B, int * restrict A, int n, int m) {
+; int extr_user(int * restrict B, int * restrict A, int n, int m) {
; B[0] = n * A[0] + m * A[0];
; B[1] = n * A[1] + m * A[1];
; B[2] = n * A[2] + m * A[2];
@@ -49,10 +49,11 @@ entry:
; return A[0];
; }
-; CHECK: @foo_fail
-; CHECK-NOT: load <4 x i32>
+; CHECK: @extr_user
+; CHECK: store <4 x i32>
+; CHECK-NEXT: extractelement <4 x i32>
; CHECK: ret
-define i32 @foo_fail(i32* noalias nocapture %B, i32* noalias nocapture %A, i32 %n, i32 %m) {
+define i32 @extr_user(i32* noalias nocapture %B, i32* noalias nocapture %A, i32 %n, i32 %m) {
entry:
%0 = load i32* %A, align 4
%mul238 = add i32 %m, %n
More information about the llvm-commits
mailing list