[llvm] r292456 - [LV] Run loop-simplify and LCSSA explicitly instead of "requiring" them

Michael Kuperstein via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 18 16:42:29 PST 2017


Author: mkuper
Date: Wed Jan 18 18:42:28 2017
New Revision: 292456

URL: http://llvm.org/viewvc/llvm-project?rev=292456&view=rev
Log:
[LV] Run loop-simplify and LCSSA explicitly instead of "requiring" them

This changes the vectorizer to explicitly use the loopsimplify and lcssa utils,
instead of "requiring" the transformations as if they were analyses.

This is not NFC, since it changes the LCSSA behavior - we no longer run LCSSA
for all loops, but rather only for the loops we expect to modify.

Differential Revision: https://reviews.llvm.org/D28868


Added:
    llvm/trunk/test/Transforms/LoopVectorize/partial-lcssa.ll
Modified:
    llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
    llvm/trunk/test/Transforms/LoopVectorize/pr31190.ll

Modified: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp?rev=292456&r1=292455&r2=292456&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp Wed Jan 18 18:42:28 2017
@@ -92,6 +92,7 @@
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopSimplify.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
 #include "llvm/Transforms/Utils/LoopVersioning.h"
 #include "llvm/Transforms/Vectorize.h"
@@ -2134,8 +2135,6 @@ struct LoopVectorize : public FunctionPa
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<AssumptionCacheTracker>();
-    AU.addRequiredID(LoopSimplifyID);
-    AU.addRequiredID(LCSSAID);
     AU.addRequired<BlockFrequencyInfoWrapperPass>();
     AU.addRequired<DominatorTreeWrapperPass>();
     AU.addRequired<LoopInfoWrapperPass>();
@@ -7169,9 +7168,7 @@ INITIALIZE_PASS_DEPENDENCY(AssumptionCac
 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
 INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis)
 INITIALIZE_PASS_DEPENDENCY(DemandedBitsWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
@@ -7543,6 +7540,8 @@ bool LoopVectorizePass::processLoop(Loop
     DEBUG(dbgs() << "LV: Interleave Count is " << IC << '\n');
   }
 
+  formLCSSARecursively(*L, *DT, LI, SE);
+
   using namespace ore;
   if (!VectorizeLoop) {
     assert(IC > 1 && "interleave count should not be 1 or 0");
@@ -7618,6 +7617,16 @@ bool LoopVectorizePass::runImpl(
   if (!TTI->getNumberOfRegisters(true) && TTI->getMaxInterleaveFactor(1) < 2)
     return false;
 
+  bool Changed = false;
+
+  // The vectorizer requires loops to be in simplified form.
+  // Since simplification may add new inner loops, it has to run before the
+  // legality and profitability checks. This means running the loop vectorizer
+  // will simplify all loops, regardless of whether anything end up being
+  // vectorized.
+  for (auto &L : *LI)
+    Changed |= simplifyLoop(L, DT, LI, SE, AC, false /* PreserveLCSSA */);
+
   // Build up a worklist of inner-loops to vectorize. This is necessary as
   // the act of vectorizing or partially unrolling a loop creates new loops
   // and can invalidate iterators across the loops.
@@ -7629,7 +7638,6 @@ bool LoopVectorizePass::runImpl(
   LoopsAnalyzed += Worklist.size();
 
   // Now walk the identified inner loops.
-  bool Changed = false;
   while (!Worklist.empty())
     Changed |= processLoop(Worklist.pop_back_val());
 

Added: llvm/trunk/test/Transforms/LoopVectorize/partial-lcssa.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/partial-lcssa.ll?rev=292456&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/partial-lcssa.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/partial-lcssa.ll Wed Jan 18 18:42:28 2017
@@ -0,0 +1,54 @@
+; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S | FileCheck %s
+; We vectorize the inner loop, so we have to put it in LCSSA form.
+; However, there's no reason to touch the outer loop.
+
+; CHECK-LABEL: @foo
+; CHECK-LABEL: for.end.inner.loopexit:
+; CHECK: %[[LCSSAPHI:.*]] = phi i64 [ %indvars.iv, %for.body.inner ], [ %{{.*}}, %middle.block ]
+; CHECK: store i64 %[[LCSSAPHI]], i64* %O1, align 4
+; CHECK-LABEL: for.end.outer.loopexit
+; CHECK: store i64 %indvars.outer, i64* %O2, align 4
+
+
+define i64 @foo(i32* nocapture %A, i32* nocapture %B, i64 %n, i64 %m, i64* %O1, i64* %O2) {
+entry:
+  %cmp = icmp sgt i64 %n, 0
+  br i1 %cmp, label %for.body.outer.preheader, label %for.end.outer
+
+for.body.outer.preheader:                         ; preds = %entry
+  br label %for.body.outer
+
+for.body.outer:                                   ; preds = %for.body.outer.preheader, %for.end.inner
+  %indvars.outer = phi i64 [ %indvars.outer.next, %for.end.inner ], [ 0, %for.body.outer.preheader ]
+  %cmp2 = icmp sgt i64 %m, 0
+  br i1 %cmp2, label %for.body.inner.preheader, label %for.end.inner
+
+for.body.inner.preheader:                         ; preds = %for.body.outer
+  br label %for.body.inner
+
+for.body.inner:                                   ; preds = %for.body.inner.preheader, %for.body.inner
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body.inner ], [ 0, %for.body.inner.preheader ]
+  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %v = load i32, i32* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+  store i32 %v, i32* %arrayidx2, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv, %n
+  br i1 %exitcond, label %for.end.inner.loopexit, label %for.body.inner
+
+for.end.inner.loopexit:                           ; preds = %for.body.inner
+  store i64 %indvars.iv, i64 *%O1, align 4
+  br label %for.end.inner
+
+for.end.inner:                                    ; preds = %for.end.inner.loopexit, %for.body.outer
+  %indvars.outer.next = add i64 %indvars.outer, 1
+  %exitcond.outer = icmp eq i64 %indvars.outer, %m
+  br i1 %exitcond.outer, label %for.end.outer.loopexit, label %for.body.outer
+
+for.end.outer.loopexit:                           ; preds = %for.end.inner
+  store i64 %indvars.outer, i64 *%O2, align 4
+  br label %for.end.outer
+
+for.end.outer:                                    ; preds = %for.end.outer.loopexit, %entry
+  ret i64 undef
+}

Modified: llvm/trunk/test/Transforms/LoopVectorize/pr31190.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/pr31190.ll?rev=292456&r1=292455&r2=292456&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/pr31190.ll (original)
+++ llvm/trunk/test/Transforms/LoopVectorize/pr31190.ll Wed Jan 18 18:42:28 2017
@@ -9,13 +9,6 @@
 ; Since %inc54 is the IV of the outer loop, and %0 equivalent to it,
 ; we get the situation described above.
 
-; This test uses the new PM, because with the old PM, running loop-vectorize
-; would explicitly run loop-simplify. Even though this loop is already in
-; simplified form, loop-simplify would still clean up the phi.
-; The reason this matters is that in a real optimizer pipeline, LICM can create
-; such PHIs, and since it preserves loop simplified form, the cleanup has
-; no chance to run.
-
 ; Code that leads to this situation can look something like:
 ;
 ; int a, b[1], c;
@@ -28,11 +21,14 @@
 ;
 ; The PHI is an artifact of the register promotion of c.
 
+; Note that we can no longer get the vectorizer to actually see such PHIs,
+; because LV now simplifies the loop internally, but the test is still
+; useful as a regression test, and in case loop-simplify behavior changes.
+
 @c = external global i32, align 4
 @a = external global i32, align 4
 @b = external global [1 x i32], align 4
 
-; CHECK: LV: PHI is a recurrence with respect to an outer loop.
 ; CHECK: LV: Not vectorizing: Cannot prove legality.
 ; CHECK-LABEL: @test
 define void @test() {




More information about the llvm-commits mailing list