[llvm-branch-commits] [llvm-branch] r167993 - in /llvm/branches/release_32: ./ lib/Transforms/Vectorize/BBVectorize.cpp test/Transforms/BBVectorize/X86/cmp-types.ll test/Transforms/BBVectorize/X86/sh-rec.ll test/Transforms/BBVectorize/X86/sh-rec2.ll test/Transforms/BBVectorize/X86/sh-rec3.ll test/Transforms/BBVectorize/X86/sh-types.ll
Hal Finkel
hfinkel at anl.gov
Wed Nov 14 14:58:30 PST 2012
Author: hfinkel
Date: Wed Nov 14 16:58:30 2012
New Revision: 167993
URL: http://llvm.org/viewvc/llvm-project?rev=167993&view=rev
Log:
Merge BBVectorizer changes r167731, r167743, r167750, r167784, r167811, r167817.
These changes fix a serious interaction problem with the cost model on x86 that
could cause the vectorizer to enter an infinite loop (and sometimes crash in
other ways).
Added:
llvm/branches/release_32/test/Transforms/BBVectorize/X86/cmp-types.ll
- copied unchanged from r167743, llvm/trunk/test/Transforms/BBVectorize/X86/cmp-types.ll
llvm/branches/release_32/test/Transforms/BBVectorize/X86/sh-rec.ll
- copied unchanged from r167750, llvm/trunk/test/Transforms/BBVectorize/X86/sh-rec.ll
llvm/branches/release_32/test/Transforms/BBVectorize/X86/sh-rec2.ll
- copied unchanged from r167784, llvm/trunk/test/Transforms/BBVectorize/X86/sh-rec2.ll
llvm/branches/release_32/test/Transforms/BBVectorize/X86/sh-rec3.ll
- copied unchanged from r167811, llvm/trunk/test/Transforms/BBVectorize/X86/sh-rec3.ll
llvm/branches/release_32/test/Transforms/BBVectorize/X86/sh-types.ll
- copied unchanged from r167731, llvm/trunk/test/Transforms/BBVectorize/X86/sh-types.ll
Modified:
llvm/branches/release_32/ (props changed)
llvm/branches/release_32/lib/Transforms/Vectorize/BBVectorize.cpp
Propchange: llvm/branches/release_32/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Wed Nov 14 16:58:30 2012
@@ -1,3 +1,3 @@
/llvm/branches/Apple/Pertwee:110850,110961
/llvm/branches/type-system-rewrite:133420-134817
-/llvm/trunk:155241
+/llvm/trunk:155241,167731,167743,167750,167784,167811,167817
Modified: llvm/branches/release_32/lib/Transforms/Vectorize/BBVectorize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_32/lib/Transforms/Vectorize/BBVectorize.cpp?rev=167993&r1=167992&r2=167993&view=diff
==============================================================================
--- llvm/branches/release_32/lib/Transforms/Vectorize/BBVectorize.cpp (original)
+++ llvm/branches/release_32/lib/Transforms/Vectorize/BBVectorize.cpp Wed Nov 14 16:58:30 2012
@@ -28,6 +28,7 @@
#include "llvm/Type.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
@@ -483,6 +484,10 @@
if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
T2 = SI->getCondition()->getType();
+ } else if (ShuffleVectorInst *SI = dyn_cast<ShuffleVectorInst>(I)) {
+ T2 = SI->getOperand(0)->getType();
+ } else if (CmpInst *CI = dyn_cast<CmpInst>(I)) {
+ T2 = CI->getOperand(0)->getType();
}
}
@@ -671,6 +676,19 @@
return false;
}
+
+ bool isPureIEChain(InsertElementInst *IE) {
+ InsertElementInst *IENext = IE;
+ do {
+ if (!isa<UndefValue>(IENext->getOperand(0)) &&
+ !isa<InsertElementInst>(IENext->getOperand(0))) {
+ return false;
+ }
+ } while ((IENext =
+ dyn_cast<InsertElementInst>(IENext->getOperand(0))));
+
+ return true;
+ }
};
// This function implements one vectorization iteration on the provided
@@ -987,10 +1005,11 @@
// We don't want to fuse to a type that will be split, even
// if the two input types will also be split and there is no other
// associated cost.
- unsigned VParts = VTTI->getNumberOfParts(VT1);
- if (VParts > 1)
+ unsigned VParts1 = VTTI->getNumberOfParts(VT1),
+ VParts2 = VTTI->getNumberOfParts(VT2);
+ if (VParts1 > 1 || VParts2 > 1)
return false;
- else if (!VParts && VCost == ICost + JCost)
+ else if ((!VParts1 || !VParts2) && VCost == ICost + JCost)
return false;
CostSavings = ICost + JCost - VCost;
@@ -1683,10 +1702,20 @@
// The set of pairs that have already contributed to the total cost.
DenseSet<ValuePair> IncomingPairs;
+ // If the cost model were perfect, this might not be necessary; but we
+ // need to make sure that we don't get stuck vectorizing our own
+ // shuffle chains.
+ bool HasNontrivialInsts = false;
+
// The node weights represent the cost savings associated with
// fusing the pair of instructions.
for (DenseSet<ValuePair>::iterator S = PrunedTree.begin(),
E = PrunedTree.end(); S != E; ++S) {
+ if (!isa<ShuffleVectorInst>(S->first) &&
+ !isa<InsertElementInst>(S->first) &&
+ !isa<ExtractElementInst>(S->first))
+ HasNontrivialInsts = true;
+
bool FlipOrder = false;
if (getDepthFactor(S->first)) {
@@ -1760,9 +1789,12 @@
bool NeedsExtraction = false;
for (Value::use_iterator I = S->first->use_begin(),
IE = S->first->use_end(); I != IE; ++I) {
- if (isa<ShuffleVectorInst>(*I) ||
- isa<InsertElementInst>(*I) ||
- isa<ExtractElementInst>(*I))
+ if (ShuffleVectorInst *SI = dyn_cast<ShuffleVectorInst>(*I)) {
+ // Shuffle can be folded if it has no other input
+ if (isa<UndefValue>(SI->getOperand(1)))
+ continue;
+ }
+ if (isa<ExtractElementInst>(*I))
continue;
if (PrunedTreeInstrs.count(*I))
continue;
@@ -1787,9 +1819,12 @@
NeedsExtraction = false;
for (Value::use_iterator I = S->second->use_begin(),
IE = S->second->use_end(); I != IE; ++I) {
- if (isa<ShuffleVectorInst>(*I) ||
- isa<InsertElementInst>(*I) ||
- isa<ExtractElementInst>(*I))
+ if (ShuffleVectorInst *SI = dyn_cast<ShuffleVectorInst>(*I)) {
+ // Shuffle can be folded if it has no other input
+ if (isa<UndefValue>(SI->getOperand(1)))
+ continue;
+ }
+ if (isa<ExtractElementInst>(*I))
continue;
if (PrunedTreeInstrs.count(*I))
continue;
@@ -1839,14 +1874,37 @@
// Combining vector operations of the same type is also assumed
// folded with other operations.
- if (Ty1 == Ty2 &&
- (isa<ShuffleVectorInst>(O1) ||
- isa<InsertElementInst>(O1) ||
- isa<InsertElementInst>(O1)) &&
- (isa<ShuffleVectorInst>(O2) ||
- isa<InsertElementInst>(O2) ||
- isa<InsertElementInst>(O2)))
- continue;
+ if (Ty1 == Ty2) {
+ // If both are insert elements, then both can be widened.
+ InsertElementInst *IEO1 = dyn_cast<InsertElementInst>(O1),
+ *IEO2 = dyn_cast<InsertElementInst>(O2);
+ if (IEO1 && IEO2 && isPureIEChain(IEO1) && isPureIEChain(IEO2))
+ continue;
+ // If both are extract elements, and both have the same input
+ // type, then they can be replaced with a shuffle
+ ExtractElementInst *EIO1 = dyn_cast<ExtractElementInst>(O1),
+ *EIO2 = dyn_cast<ExtractElementInst>(O2);
+ if (EIO1 && EIO2 &&
+ EIO1->getOperand(0)->getType() ==
+ EIO2->getOperand(0)->getType())
+ continue;
+ // If both are a shuffle with equal operand types and only two
+ // unqiue operands, then they can be replaced with a single
+ // shuffle
+ ShuffleVectorInst *SIO1 = dyn_cast<ShuffleVectorInst>(O1),
+ *SIO2 = dyn_cast<ShuffleVectorInst>(O2);
+ if (SIO1 && SIO2 &&
+ SIO1->getOperand(0)->getType() ==
+ SIO2->getOperand(0)->getType()) {
+ SmallSet<Value *, 4> SIOps;
+ SIOps.insert(SIO1->getOperand(0));
+ SIOps.insert(SIO1->getOperand(1));
+ SIOps.insert(SIO2->getOperand(0));
+ SIOps.insert(SIO2->getOperand(1));
+ if (SIOps.size() <= 2)
+ continue;
+ }
+ }
int ESContrib;
// This pair has already been formed.
@@ -1894,6 +1952,13 @@
}
}
}
+
+ if (!HasNontrivialInsts) {
+ DEBUG(if (DebugPairSelection) dbgs() <<
+ "\tNo non-trivial instructions in tree;"
+ " override to zero effective size\n");
+ EffSize = 0;
+ }
} else {
for (DenseSet<ValuePair>::iterator S = PrunedTree.begin(),
E = PrunedTree.end(); S != E; ++S)
@@ -2092,18 +2157,7 @@
if (InsertElementInst *LIE = dyn_cast<InsertElementInst>(LOp)) {
// If we have a pure insertelement chain, then this can be rewritten
// into a chain that directly builds the larger type.
- bool PureChain = true;
- InsertElementInst *LIENext = LIE;
- do {
- if (!isa<UndefValue>(LIENext->getOperand(0)) &&
- !isa<InsertElementInst>(LIENext->getOperand(0))) {
- PureChain = false;
- break;
- }
- } while ((LIENext =
- dyn_cast<InsertElementInst>(LIENext->getOperand(0))));
-
- if (PureChain) {
+ if (isPureIEChain(LIE)) {
SmallVector<Value *, 8> VectElemts(numElemL,
UndefValue::get(ArgTypeL->getScalarType()));
InsertElementInst *LIENext = LIE;
More information about the llvm-branch-commits
mailing list