[PATCH] Extend SLPVectorizer for cases where insertelement instructions must be rescheduled
Arch D. Robison
arch.robison at intel.com
Fri Mar 21 14:05:37 PDT 2014
Hi #llvm,
The patch extends SLPVectorizer to handle cases where operations building a vector overlap the operations to be vectorized. Motivation arises from compiling Julia tuples, as described [here](https://github.com/JuliaLang/julia/issues/5857#issuecomment-35784676), though the extension is likely useful for other languages too.
Per a suggestion of Arnold Schwaighofer, the patch adapts existing logic for `RdxOps`. The patch changes method `buildTree` to separate two roles of `RdxOps`.
* The first role is as a list of uses that can be ignored for purposes of legality checking.
* The second role is as a list of reduction uses.
The `insertelement` instructions for building a vector act the first role, but not the second. Values used for reductions act both roles.
A new method `movePrematureInserts` reschedules the `insertelement` instructions if the transform happens.
The patch refines a cost estimate to take credit for `extractelement` instructions that will be erased.
The new test checks that vectorization occurs for a case (derived from a Julia example) that previously stumped SLPVecotrizer.
http://llvm-reviews.chandlerc.com/D3143
Files:
lib/Transforms/Vectorize/SLPVectorizer.cpp
Index: lib/Transforms/Vectorize/SLPVectorizer.cpp
===================================================================
--- lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -367,7 +368,7 @@
/// Construct a vectorizable tree that starts at \p Roots and is possibly
/// used by a reduction of \p RdxOps.
- void buildTree(ArrayRef<Value *> Roots, ValueSet *RdxOps = 0);
+ void buildTree(ArrayRef<Value *> Roots, bool buildsVector=false, ValueSet *RdxOps = 0);
/// Clear the internal data structures that are created by 'buildTree'.
void deleteTree() {
@@ -391,7 +392,7 @@
int getEntryCost(TreeEntry *E);
/// This is the recursive part of buildTree.
- void buildTree_rec(ArrayRef<Value *> Roots, unsigned Depth);
+ void buildTree_rec(ArrayRef<Value *> Roots, unsigned Depth, bool buildsVector=false);
/// Vectorize a single entry in the tree.
Value *vectorizeTree(TreeEntry *E);
@@ -542,12 +543,12 @@
IRBuilder<> Builder;
};
-void BoUpSLP::buildTree(ArrayRef<Value *> Roots, ValueSet *Rdx) {
+void BoUpSLP::buildTree(ArrayRef<Value *> Roots, bool buildsVector, ValueSet *Rdx) {
deleteTree();
RdxOps = Rdx;
if (!getSameType(Roots))
return;
- buildTree_rec(Roots, 0);
+ buildTree_rec(Roots, 0, buildsVector);
// Collect the values that we need to extract from the tree.
for (int EIdx = 0, EE = VectorizableTree.size(); EIdx < EE; ++EIdx) {
@@ -589,7 +590,7 @@
}
-void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
+void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, bool buildsVector ) {
bool SameTy = getSameType(VL); (void)SameTy;
assert(SameTy && "Invalid types!");
@@ -712,6 +713,10 @@
if (RdxOps && RdxOps->count(UI))
continue;
+ // This user is part of building a vector
+ if (buildsVector)
+ continue;
+
// Make sure that we can schedule this unknown user.
BlockNumbering &BN = BlocksNumbers[BB];
int UserIndex = BN.getIndex(UI);
@@ -1012,8 +1017,16 @@
return 0;
}
case Instruction::ExtractElement: {
- if (CanReuseExtract(VL))
- return 0;
+ if (CanReuseExtract(VL)) {
+ int DeadCost = 0;
+ for (unsigned i = 0, e = VL.size(); i < e; ++i) {
+ ExtractElementInst *E = cast<ExtractElementInst>(VL[i]);
+ if( E->hasOneUse() )
+ // Take credit for instruction that will become dead.
+ DeadCost += TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, i);
+ }
+ return -DeadCost;
+ }
return getGatherCost(VecTy);
}
case Instruction::ZExt:
@@ -1948,7 +1961,7 @@
/// \brief Try to vectorize a list of operands.
/// \returns true if a value was vectorized.
- bool tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R);
+ bool tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R, bool buildsVector=false);
/// \brief Try to vectorize a chain that may start at the operands of \V;
bool tryToVectorize(BinaryOperator *V, BoUpSLP &R);
@@ -2121,7 +2134,7 @@
return tryToVectorizeList(VL, R);
}
-bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R) {
+bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R, bool buildsVector) {
if (VL.size() < 2)
return false;
@@ -2149,7 +2162,7 @@
bool Changed = false;
- // Keep track of values that were delete by vectorizing in the loop below.
+ // Keep track of values that were deleted by vectorizing in the loop below.
SmallVector<WeakVH, 8> TrackValues(VL.begin(), VL.end());
for (unsigned i = 0, e = VL.size(); i < e; ++i) {
@@ -2171,11 +2184,11 @@
<< "\n");
ArrayRef<Value *> Ops = VL.slice(i, OpsWidth);
- R.buildTree(Ops);
+ R.buildTree(Ops,buildsVector);
int Cost = R.getTreeCost();
if (Cost < -SLPCostThreshold) {
- DEBUG(dbgs() << "SLP: Vectorizing pair at cost:" << Cost << ".\n");
+ DEBUG(dbgs() << "SLP: Vectorizing list at cost:" << Cost << ".\n");
R.vectorizeTree();
// Move to the next bundle.
@@ -2417,7 +2430,7 @@
for (; i < NumReducedVals - ReduxWidth + 1; i += ReduxWidth) {
ArrayRef<Value *> ValsToReduce(&ReducedVals[i], ReduxWidth);
- V.buildTree(ValsToReduce, &ReductionOps);
+ V.buildTree(ValsToReduce, false, &ReductionOps);
// Estimate cost.
int Cost = V.getTreeCost() + getReductionCost(TTI, ReducedVals[i]);
@@ -2717,7 +2730,7 @@
if (!findBuildVector(IE, Ops))
continue;
- if (tryToVectorizeList(Ops, R)) {
+ if (tryToVectorizeList(Ops, R, true)) {
Changed = true;
it = BB->begin();
e = BB->end();
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D3143.1.patch
Type: text/x-patch
Size: 4744 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140321/11403eb1/attachment.bin>
More information about the llvm-commits
mailing list