[llvm] r272715 - [LV] Enable vectorization of loops where the IV has an external use

Michael Kuperstein via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 14 14:27:28 PDT 2016


Author: mkuper
Date: Tue Jun 14 16:27:27 2016
New Revision: 272715

URL: http://llvm.org/viewvc/llvm-project?rev=272715&view=rev
Log:
[LV] Enable vectorization of loops where the IV has an external use

Vectorizing loops with "escaping" IVs has been disabled since r190790, due to
PR17179. This re-enables it, with support for external use of both
"post-increment" (last iteration) and "pre-increment" (second-to-last iteration)
IVs.

Differential Revision: http://reviews.llvm.org/D21048

Added:
    llvm/trunk/test/Transforms/LoopVectorize/iv_outside_user.ll
Modified:
    llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
    llvm/trunk/test/Transforms/LoopVectorize/no_outside_user.ll

Modified: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp?rev=272715&r1=272714&r2=272715&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp Tue Jun 14 16:27:27 2016
@@ -355,6 +355,12 @@ protected:
 
   /// Create an empty loop, based on the loop ranges of the old loop.
   void createEmptyLoop();
+
+  /// Set up the values of the IVs correctly when exiting the vector loop.
+  void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II,
+                    Value *CountRoundDown, Value *EndValue,
+                    BasicBlock *MiddleBlock);
+
   /// Create a new induction variable inside L.
   PHINode *createInductionVariable(Loop *L, Value *Start, Value *End,
                                    Value *Step, Instruction *DL);
@@ -1433,13 +1439,11 @@ private:
   /// invariant.
   void collectStridedAccess(Value *LoadOrStoreInst);
 
-  /// \brief Returns true if we can vectorize using this PHI node as an
-  /// induction.
-  ///
   /// Updates the vectorization state by adding \p Phi to the inductions list.
   /// This can set \p Phi as the main induction of the loop if \p Phi is a
   /// better choice for the main induction than the existing one.
-  bool addInductionPhi(PHINode *Phi, InductionDescriptor ID);
+  void addInductionPhi(PHINode *Phi, InductionDescriptor ID,
+                       SmallPtrSetImpl<Value *> &AllowedExit);
 
   /// Report an analysis message to assist the user in diagnosing loops that are
   /// not vectorized.  These are handled as LoopAccessReport rather than
@@ -1493,7 +1497,7 @@ private:
   /// Holds the widest induction type encountered.
   Type *WidestIndTy;
 
-  /// Allowed outside users. This holds the reduction
+  /// Allowed outside users. This holds the induction and reduction
   /// vars which can be accessed from outside the loop.
   SmallPtrSet<Value *, 4> AllowedExit;
   /// This set holds the variables which are known to be uniform after
@@ -3219,6 +3223,9 @@ void InnerLoopVectorizer::createEmptyLoo
     // or the value at the end of the vectorized loop.
     BCResumeVal->addIncoming(EndValue, MiddleBlock);
 
+    // Fix up external users of the induction variable.
+    fixupIVUsers(OrigPhi, II, CountRoundDown, EndValue, MiddleBlock);
+
     // Fix the scalar body counter (PHI node).
     unsigned BlockIdx = OrigPhi->getBasicBlockIndex(ScalarPH);
 
@@ -3258,6 +3265,59 @@ void InnerLoopVectorizer::createEmptyLoo
   Hints.setAlreadyVectorized();
 }
 
+// Fix up external users of the induction variable. At this point, we are
+// in LCSSA form, with all external PHIs that use the IV having one input value,
+// coming from the remainder loop. We need those PHIs to also have a correct
+// value for the IV when arriving directly from the middle block.
+void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
+                                       const InductionDescriptor &II,
+                                       Value *CountRoundDown, Value *EndValue,
+                                       BasicBlock *MiddleBlock) {
+  // There are two kinds of external IV usages - those that use the value 
+  // computed in the last iteration (the PHI) and those that use the penultimate
+  // value (the value that feeds into the phi from the loop latch).
+  // We allow both, but they, obviously, have different values.
+
+  // We only expect at most one of each kind of user. This is because LCSSA will
+  // canonicalize the users to a single PHI node per exit block, and we
+  // currently only vectorize loops with a single exit.
+  assert(OrigLoop->getExitBlock() && "Expected a single exit block");
+
+  // An external user of the last iteration's value should see the value that
+  // the remainder loop uses to initialize its own IV.
+  Value *PostInc = OrigPhi->getIncomingValueForBlock(OrigLoop->getLoopLatch());
+  for (User *U : PostInc->users()) {
+    Instruction *UI = cast<Instruction>(U);
+    if (!OrigLoop->contains(UI)) {
+      assert(isa<PHINode>(UI) && "Expected LCSSA form");
+      cast<PHINode>(UI)->addIncoming(EndValue, MiddleBlock);
+      break;
+    }
+  }
+
+  // An external user of the penultimate value need to see EndValue - Step.
+  // The simplest way to get this is to recompute it from the constituent SCEVs,
+  // that is Start + (Step * (CRD - 1)).
+  for (User *U : OrigPhi->users()) {
+    Instruction *UI = cast<Instruction>(U);
+    if (!OrigLoop->contains(UI)) {
+      assert(isa<PHINode>(UI) && "Expected LCSSA form");
+      const DataLayout &DL =
+          OrigLoop->getHeader()->getModule()->getDataLayout();
+
+      IRBuilder<> B(MiddleBlock->getTerminator());
+      Value *CountMinusOne = B.CreateSub(
+          CountRoundDown, ConstantInt::get(CountRoundDown->getType(), 1));
+      Value *CMO = B.CreateSExtOrTrunc(CountMinusOne, II.getStep()->getType(),
+                                       "cast.cmo");
+      Value *Escape = II.transform(B, CMO, PSE.getSE(), DL);
+      Escape->setName("ind.escape");      
+      cast<PHINode>(UI)->addIncoming(Escape, MiddleBlock);
+      break;
+    }
+  }
+}
+
 namespace {
 struct CSEDenseMapInfo {
   static bool canHandle(Instruction *I) {
@@ -4639,10 +4699,10 @@ static Type *getWiderType(const DataLayo
 /// \brief Check that the instruction has outside loop users and is not an
 /// identified reduction variable.
 static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
-                               SmallPtrSetImpl<Value *> &Reductions) {
-  // Reduction instructions are allowed to have exit users. All other
-  // instructions must not have external users.
-  if (!Reductions.count(Inst))
+                               SmallPtrSetImpl<Value *> &AllowedExit) {
+  // Reduction and Induction instructions are allowed to have exit users. All
+  // other instructions must not have external users.
+  if (!AllowedExit.count(Inst))
     // Check that all of the users of the loop are inside the BB.
     for (User *U : Inst->users()) {
       Instruction *UI = cast<Instruction>(U);
@@ -4655,8 +4715,9 @@ static bool hasOutsideLoopUser(const Loo
   return false;
 }
 
-bool LoopVectorizationLegality::addInductionPhi(PHINode *Phi,
-                                                InductionDescriptor ID) {
+void LoopVectorizationLegality::addInductionPhi(
+    PHINode *Phi, InductionDescriptor ID,
+    SmallPtrSetImpl<Value *> &AllowedExit) {
   Inductions[Phi] = ID;
   Type *PhiTy = Phi->getType();
   const DataLayout &DL = Phi->getModule()->getDataLayout();
@@ -4682,18 +4743,13 @@ bool LoopVectorizationLegality::addInduc
       Induction = Phi;
   }
 
-  DEBUG(dbgs() << "LV: Found an induction variable.\n");
+  // Both the PHI node itself, and the "post-increment" value feeding
+  // back into the PHI node may have external users.
+  AllowedExit.insert(Phi);
+  AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
 
-  // Until we explicitly handle the case of an induction variable with
-  // an outside loop user we have to give up vectorizing this loop.
-  if (hasOutsideLoopUser(TheLoop, Phi, AllowedExit)) {
-    emitAnalysis(VectorizationReport(Phi) <<
-                 "use of induction value outside of the "
-                 "loop is not handled by vectorizer");
-    return false;
-  }
-
-  return true;
+  DEBUG(dbgs() << "LV: Found an induction variable.\n");
+  return;
 }
 
 bool LoopVectorizationLegality::canVectorizeInstrs() {
@@ -4757,8 +4813,7 @@ bool LoopVectorizationLegality::canVecto
 
         InductionDescriptor ID;
         if (InductionDescriptor::isInductionPHI(Phi, PSE, ID)) {
-          if (!addInductionPhi(Phi, ID))
-            return false;
+          addInductionPhi(Phi, ID, AllowedExit);
           continue;
         }
 
@@ -4770,8 +4825,7 @@ bool LoopVectorizationLegality::canVecto
         // As a last resort, coerce the PHI to a AddRec expression
         // and re-try classifying it a an induction PHI.
         if (InductionDescriptor::isInductionPHI(Phi, PSE, ID, true)) {
-          if (!addInductionPhi(Phi, ID))
-            return false;
+          addInductionPhi(Phi, ID, AllowedExit);        
           continue;
         }
 

Added: llvm/trunk/test/Transforms/LoopVectorize/iv_outside_user.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/iv_outside_user.ll?rev=272715&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/iv_outside_user.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/iv_outside_user.ll Tue Jun 14 16:27:27 2016
@@ -0,0 +1,84 @@
+; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 < %s | FileCheck %s
+
+; CHECK-LABEL: @postinc
+; CHECK-LABEL: scalar.ph:
+; CHECK: %bc.resume.val = phi i32 [ %n.vec, %middle.block ], [ 0, %entry ]
+; CHECK-LABEL: for.end:
+; CHECK: %[[RET:.*]] = phi i32 [ {{.*}}, %for.body ], [ %n.vec, %middle.block ]
+; CHECK: ret i32 %[[RET]]
+define i32 @postinc(i32 %k)  {
+entry:
+  br label %for.body
+
+for.body:
+  %inc.phi = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %inc = add nsw i32 %inc.phi, 1
+  %cmp = icmp eq i32 %inc, %k
+  br i1 %cmp, label %for.end, label %for.body
+
+for.end:
+  ret i32 %inc
+}
+
+; CHECK-LABEL: @preinc
+; CHECK-LABEL: middle.block:
+; CHECK: %3 = sub i32 %n.vec, 1
+; CHECK: %ind.escape = add i32 0, %3
+; CHECK-LABEL: scalar.ph:
+; CHECK: %bc.resume.val = phi i32 [ %n.vec, %middle.block ], [ 0, %entry ]
+; CHECK-LABEL: for.end:
+; CHECK: %[[RET:.*]] = phi i32 [ {{.*}}, %for.body ], [ %ind.escape, %middle.block ]
+; CHECK: ret i32 %[[RET]]
+define i32 @preinc(i32 %k)  {
+entry:
+  br label %for.body
+
+for.body:
+  %inc.phi = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %inc = add nsw i32 %inc.phi, 1
+  %cmp = icmp eq i32 %inc, %k
+  br i1 %cmp, label %for.end, label %for.body
+
+for.end:
+  ret i32 %inc.phi
+}
+
+; CHECK-LABEL: @constpre
+; CHECK-LABEL: for.end:
+; CHECK: %[[RET:.*]] = phi i32 [ {{.*}}, %for.body ], [ 2, %middle.block ]
+; CHECK: ret i32 %[[RET]]
+define i32 @constpre()  {
+entry:
+  br label %for.body
+
+for.body:
+  %inc.phi = phi i32 [ 32, %entry ], [ %inc, %for.body ]
+  %inc = sub nsw i32 %inc.phi, 2
+  %cmp = icmp eq i32 %inc, 0
+  br i1 %cmp, label %for.end, label %for.body
+
+for.end:
+  ret i32 %inc.phi
+}
+
+; CHECK-LABEL: @geppre
+; CHECK-LABEL: middle.block:
+; CHECK: %ind.escape = getelementptr i32, i32* %ptr, i64 124
+; CHECK-LABEL: for.end:
+; CHECK: %[[RET:.*]] = phi i32* [ {{.*}}, %for.body ], [ %ind.escape, %middle.block ]
+; CHECK: ret i32* %[[RET]]
+define i32* @geppre(i32* %ptr) {
+entry:
+  br label %for.body
+
+for.body:
+  %inc.phi = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %ptr.phi = phi i32* [ %ptr, %entry ], [ %inc.ptr, %for.body ]
+  %inc = add nsw i32 %inc.phi, 1
+  %inc.ptr = getelementptr i32, i32* %ptr.phi, i32 4
+  %cmp = icmp eq i32 %inc, 32
+  br i1 %cmp, label %for.end, label %for.body
+
+for.end:
+  ret i32* %ptr.phi
+}

Modified: llvm/trunk/test/Transforms/LoopVectorize/no_outside_user.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/no_outside_user.ll?rev=272715&r1=272714&r2=272715&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/no_outside_user.ll (original)
+++ llvm/trunk/test/Transforms/LoopVectorize/no_outside_user.ll Tue Jun 14 16:27:27 2016
@@ -1,7 +1,6 @@
 ; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 < %s 2>&1 | FileCheck %s
 
 ; CHECK: remark: {{.*}}: loop not vectorized: value could not be identified as an induction or reduction variable
-; CHECK: remark: {{.*}}: loop not vectorized: use of induction value outside of the loop is not handled by vectorizer
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
 
@@ -41,34 +40,3 @@ f1.exit.loopexit:
   %.lcssa = phi i32 [ %tmp17, %bb16 ]
   ret i32 %.lcssa
 }
-
-; Don't vectorize this loop. Its phi node (induction variable) has an outside
-; loop user. We currently don't handle this case.
-; PR17179
-
-; CHECK-LABEL: @test2(
-; CHECK-NOT:  <2 x
-
- at x1 = common global i32 0, align 4
- at x2 = common global i32 0, align 4
- at x0 = common global i32 0, align 4
-
-define i32 @test2()  {
-entry:
-  store i32 0, i32* @x1, align 4
-  %0 = load i32, i32* @x0, align 4
-  br label %for.cond1.preheader
-
-for.cond1.preheader:
-  %inc7 = phi i32 [ 0, %entry ], [ %inc, %for.cond1.preheader ]
-  %inc = add nsw i32 %inc7, 1
-  %cmp = icmp eq i32 %inc, 52
-  br i1 %cmp, label %for.end5, label %for.cond1.preheader
-
-for.end5:
-  %inc7.lcssa = phi i32 [ %inc7, %for.cond1.preheader ]
-  %xor = xor i32 %inc7.lcssa, %0
-  store i32 52, i32* @x1, align 4
-  store i32 1, i32* @x2, align 4
-  ret i32 %xor
-}




More information about the llvm-commits mailing list