[llvm] r184674 - SLP Vectorizer: Fix a bug in the code that does CSE on the generated gather sequences.

Nadav Rotem nrotem at apple.com
Sun Jun 23 14:57:28 PDT 2013


Author: nadav
Date: Sun Jun 23 16:57:27 2013
New Revision: 184674

URL: http://llvm.org/viewvc/llvm-project?rev=184674&view=rev
Log:
SLP Vectorizer: Fix a bug in the code that does CSE on the generated gather sequences.
Make sure that we don't replace and RAUW two sequences if one does not dominate the other.


Modified:
    llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
    llvm/trunk/test/Transforms/SLPVectorizer/X86/cse.ll

Modified: llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp?rev=184674&r1=184673&r2=184674&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp Sun Jun 23 16:57:27 2013
@@ -127,8 +127,9 @@ public:
   static const int MAX_COST = INT_MIN;
 
   FuncSLP(Function *Func, ScalarEvolution *Se, DataLayout *Dl,
-          TargetTransformInfo *Tti, AliasAnalysis *Aa, LoopInfo *Li) :
-    F(Func), SE(Se), DL(Dl), TTI(Tti), AA(Aa), LI(Li),
+          TargetTransformInfo *Tti, AliasAnalysis *Aa, LoopInfo *Li, 
+          DominatorTree *Dt) :
+    F(Func), SE(Se), DL(Dl), TTI(Tti), AA(Aa), LI(Li), DT(Dt),
     Builder(Se->getContext()) {
     for (Function::iterator it = F->begin(), e = F->end(); it != e; ++it) {
       BasicBlock *BB = it;
@@ -255,6 +256,7 @@ public:
   TargetTransformInfo *TTI;
   AliasAnalysis *AA;
   LoopInfo *LI;
+  DominatorTree *DT;
   /// Instruction builder to construct the vectorized tree.
   IRBuilder<> Builder;
 };
@@ -1197,7 +1199,8 @@ void FuncSLP::optimizeGatherSequence() {
      // visited instructions.
       for (SmallPtrSet<Instruction*, 16>::iterator v = Visited.begin(),
            ve = Visited.end(); v != ve; ++v) {
-        if (Insert->isIdenticalTo(*v)) {
+        if (Insert->isIdenticalTo(*v) &&
+          DT->dominates((*v)->getParent(), Insert->getParent())) {
           Insert->replaceAllUsesWith(*v);
           break;
         }
@@ -1224,6 +1227,7 @@ struct SLPVectorizer : public FunctionPa
   TargetTransformInfo *TTI;
   AliasAnalysis *AA;
   LoopInfo *LI;
+  DominatorTree *DT;
 
   virtual bool runOnFunction(Function &F) {
     SE = &getAnalysis<ScalarEvolution>();
@@ -1231,6 +1235,7 @@ struct SLPVectorizer : public FunctionPa
     TTI = &getAnalysis<TargetTransformInfo>();
     AA = &getAnalysis<AliasAnalysis>();
     LI = &getAnalysis<LoopInfo>();
+    DT = &getAnalysis<DominatorTree>();
 
     StoreRefs.clear();
     bool Changed = false;
@@ -1244,7 +1249,7 @@ struct SLPVectorizer : public FunctionPa
 
     // Use the bollom up slp vectorizer to construct chains that start with
     // he store instructions.
-    FuncSLP R(&F, SE, DL, TTI, AA, LI);
+    FuncSLP R(&F, SE, DL, TTI, AA, LI, DT);
 
     for (Function::iterator it = F.begin(), e = F.end(); it != e; ++it) {
       BasicBlock *BB = it;
@@ -1274,6 +1279,7 @@ struct SLPVectorizer : public FunctionPa
     AU.addRequired<AliasAnalysis>();
     AU.addRequired<TargetTransformInfo>();
     AU.addRequired<LoopInfo>();
+    AU.addRequired<DominatorTree>();
   }
 
 private:

Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/cse.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/cse.ll?rev=184674&r1=184673&r2=184674&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/SLPVectorizer/X86/cse.ll (original)
+++ llvm/trunk/test/Transforms/SLPVectorizer/X86/cse.ll Sun Jun 23 16:57:27 2013
@@ -83,3 +83,54 @@ entry:
   ret i32 undef
 }
 
+; int test2(double *G, int k) {
+;   if (k) {
+;     G[0] = 1+G[5]*4;
+;     G[1] = 6+G[6]*3;
+;   } else {
+;     G[2] = 7+G[5]*4;
+;     G[3] = 8+G[6]*3;
+;   }
+; }
+
+; We can't merge the gather sequences because one does not dominate the other.
+; CHECK: test2
+; CHECK: insertelement
+; CHECK: insertelement
+; CHECK: insertelement
+; CHECK: insertelement
+; CHECK: ret
+define i32 @test2(double* nocapture %G, i32 %k) {
+  %1 = icmp eq i32 %k, 0
+  %2 = getelementptr inbounds double* %G, i64 5
+  %3 = load double* %2, align 8
+  %4 = fmul double %3, 4.000000e+00
+  br i1 %1, label %12, label %5
+
+; <label>:5                                       ; preds = %0
+  %6 = fadd double %4, 1.000000e+00
+  store double %6, double* %G, align 8
+  %7 = getelementptr inbounds double* %G, i64 6
+  %8 = load double* %7, align 8
+  %9 = fmul double %8, 3.000000e+00
+  %10 = fadd double %9, 6.000000e+00
+  %11 = getelementptr inbounds double* %G, i64 1
+  store double %10, double* %11, align 8
+  br label %20
+
+; <label>:12                                      ; preds = %0
+  %13 = fadd double %4, 7.000000e+00
+  %14 = getelementptr inbounds double* %G, i64 2
+  store double %13, double* %14, align 8
+  %15 = getelementptr inbounds double* %G, i64 6
+  %16 = load double* %15, align 8
+  %17 = fmul double %16, 3.000000e+00
+  %18 = fadd double %17, 8.000000e+00
+  %19 = getelementptr inbounds double* %G, i64 3
+  store double %18, double* %19, align 8
+  br label %20
+
+; <label>:20                                      ; preds = %12, %5
+  ret i32 undef
+}
+





More information about the llvm-commits mailing list