[llvm] r184674 - SLP Vectorizer: Fix a bug in the code that does CSE on the generated gather sequences.
Nadav Rotem
nrotem at apple.com
Sun Jun 23 14:57:28 PDT 2013
Author: nadav
Date: Sun Jun 23 16:57:27 2013
New Revision: 184674
URL: http://llvm.org/viewvc/llvm-project?rev=184674&view=rev
Log:
SLP Vectorizer: Fix a bug in the code that does CSE on the generated gather sequences.
Make sure that we don't replace and RAUW two sequences if one does not dominate the other.
Modified:
llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/trunk/test/Transforms/SLPVectorizer/X86/cse.ll
Modified: llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp?rev=184674&r1=184673&r2=184674&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp Sun Jun 23 16:57:27 2013
@@ -127,8 +127,9 @@ public:
static const int MAX_COST = INT_MIN;
FuncSLP(Function *Func, ScalarEvolution *Se, DataLayout *Dl,
- TargetTransformInfo *Tti, AliasAnalysis *Aa, LoopInfo *Li) :
- F(Func), SE(Se), DL(Dl), TTI(Tti), AA(Aa), LI(Li),
+ TargetTransformInfo *Tti, AliasAnalysis *Aa, LoopInfo *Li,
+ DominatorTree *Dt) :
+ F(Func), SE(Se), DL(Dl), TTI(Tti), AA(Aa), LI(Li), DT(Dt),
Builder(Se->getContext()) {
for (Function::iterator it = F->begin(), e = F->end(); it != e; ++it) {
BasicBlock *BB = it;
@@ -255,6 +256,7 @@ public:
TargetTransformInfo *TTI;
AliasAnalysis *AA;
LoopInfo *LI;
+ DominatorTree *DT;
/// Instruction builder to construct the vectorized tree.
IRBuilder<> Builder;
};
@@ -1197,7 +1199,8 @@ void FuncSLP::optimizeGatherSequence() {
// visited instructions.
for (SmallPtrSet<Instruction*, 16>::iterator v = Visited.begin(),
ve = Visited.end(); v != ve; ++v) {
- if (Insert->isIdenticalTo(*v)) {
+ if (Insert->isIdenticalTo(*v) &&
+ DT->dominates((*v)->getParent(), Insert->getParent())) {
Insert->replaceAllUsesWith(*v);
break;
}
@@ -1224,6 +1227,7 @@ struct SLPVectorizer : public FunctionPa
TargetTransformInfo *TTI;
AliasAnalysis *AA;
LoopInfo *LI;
+ DominatorTree *DT;
virtual bool runOnFunction(Function &F) {
SE = &getAnalysis<ScalarEvolution>();
@@ -1231,6 +1235,7 @@ struct SLPVectorizer : public FunctionPa
TTI = &getAnalysis<TargetTransformInfo>();
AA = &getAnalysis<AliasAnalysis>();
LI = &getAnalysis<LoopInfo>();
+ DT = &getAnalysis<DominatorTree>();
StoreRefs.clear();
bool Changed = false;
@@ -1244,7 +1249,7 @@ struct SLPVectorizer : public FunctionPa
// Use the bollom up slp vectorizer to construct chains that start with
// he store instructions.
- FuncSLP R(&F, SE, DL, TTI, AA, LI);
+ FuncSLP R(&F, SE, DL, TTI, AA, LI, DT);
for (Function::iterator it = F.begin(), e = F.end(); it != e; ++it) {
BasicBlock *BB = it;
@@ -1274,6 +1279,7 @@ struct SLPVectorizer : public FunctionPa
AU.addRequired<AliasAnalysis>();
AU.addRequired<TargetTransformInfo>();
AU.addRequired<LoopInfo>();
+ AU.addRequired<DominatorTree>();
}
private:
Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/cse.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/cse.ll?rev=184674&r1=184673&r2=184674&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/SLPVectorizer/X86/cse.ll (original)
+++ llvm/trunk/test/Transforms/SLPVectorizer/X86/cse.ll Sun Jun 23 16:57:27 2013
@@ -83,3 +83,54 @@ entry:
ret i32 undef
}
+; int test2(double *G, int k) {
+; if (k) {
+; G[0] = 1+G[5]*4;
+; G[1] = 6+G[6]*3;
+; } else {
+; G[2] = 7+G[5]*4;
+; G[3] = 8+G[6]*3;
+; }
+; }
+
+; We can't merge the gather sequences because one does not dominate the other.
+; CHECK: test2
+; CHECK: insertelement
+; CHECK: insertelement
+; CHECK: insertelement
+; CHECK: insertelement
+; CHECK: ret
+define i32 @test2(double* nocapture %G, i32 %k) {
+ %1 = icmp eq i32 %k, 0
+ %2 = getelementptr inbounds double* %G, i64 5
+ %3 = load double* %2, align 8
+ %4 = fmul double %3, 4.000000e+00
+ br i1 %1, label %12, label %5
+
+; <label>:5 ; preds = %0
+ %6 = fadd double %4, 1.000000e+00
+ store double %6, double* %G, align 8
+ %7 = getelementptr inbounds double* %G, i64 6
+ %8 = load double* %7, align 8
+ %9 = fmul double %8, 3.000000e+00
+ %10 = fadd double %9, 6.000000e+00
+ %11 = getelementptr inbounds double* %G, i64 1
+ store double %10, double* %11, align 8
+ br label %20
+
+; <label>:12 ; preds = %0
+ %13 = fadd double %4, 7.000000e+00
+ %14 = getelementptr inbounds double* %G, i64 2
+ store double %13, double* %14, align 8
+ %15 = getelementptr inbounds double* %G, i64 6
+ %16 = load double* %15, align 8
+ %17 = fmul double %16, 3.000000e+00
+ %18 = fadd double %17, 8.000000e+00
+ %19 = getelementptr inbounds double* %G, i64 3
+ store double %18, double* %19, align 8
+ br label %20
+
+; <label>:20 ; preds = %12, %5
+ ret i32 undef
+}
+
More information about the llvm-commits
mailing list