[llvm] r272742 - Recommit [LV] Enable vectorization of loops where the IV has an external use

Michael Kuperstein via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 14 17:35:27 PDT 2016


Author: mkuper
Date: Tue Jun 14 19:35:26 2016
New Revision: 272742

URL: http://llvm.org/viewvc/llvm-project?rev=272742&view=rev
Log:
Recommit [LV] Enable vectorization of loops where the IV has an external use

r272715 broke libcxx because it did not correctly handle cases where the
last iteration of one IV is the second-to-last iteration of another.

Original commit message:
Vectorizing loops with "escaping" IVs has been disabled since r190790, due to
PR17179. This re-enables it, with support for external use of both
"post-increment" (last iteration) and "pre-increment" (second-to-last iteration)
IVs.

Added:
    llvm/trunk/test/Transforms/LoopVectorize/iv_outside_user.ll
      - copied, changed from r272729, llvm/trunk/test/Transforms/LoopVectorize/iv_outside_user.ll
Modified:
    llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
    llvm/trunk/test/Transforms/LoopVectorize/no_outside_user.ll

Modified: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp?rev=272742&r1=272741&r2=272742&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp Tue Jun 14 19:35:26 2016
@@ -355,6 +355,12 @@ protected:
 
   /// Create an empty loop, based on the loop ranges of the old loop.
   void createEmptyLoop();
+
+  /// Set up the values of the IVs correctly when exiting the vector loop.
+  void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II,
+                    Value *CountRoundDown, Value *EndValue,
+                    BasicBlock *MiddleBlock);
+
   /// Create a new induction variable inside L.
   PHINode *createInductionVariable(Loop *L, Value *Start, Value *End,
                                    Value *Step, Instruction *DL);
@@ -1433,13 +1439,11 @@ private:
   /// invariant.
   void collectStridedAccess(Value *LoadOrStoreInst);
 
-  /// \brief Returns true if we can vectorize using this PHI node as an
-  /// induction.
-  ///
   /// Updates the vectorization state by adding \p Phi to the inductions list.
   /// This can set \p Phi as the main induction of the loop if \p Phi is a
   /// better choice for the main induction than the existing one.
-  bool addInductionPhi(PHINode *Phi, InductionDescriptor ID);
+  void addInductionPhi(PHINode *Phi, InductionDescriptor ID,
+                       SmallPtrSetImpl<Value *> &AllowedExit);
 
   /// Report an analysis message to assist the user in diagnosing loops that are
   /// not vectorized.  These are handled as LoopAccessReport rather than
@@ -1493,7 +1497,7 @@ private:
   /// Holds the widest induction type encountered.
   Type *WidestIndTy;
 
-  /// Allowed outside users. This holds the reduction
+  /// Allowed outside users. This holds the induction and reduction
   /// vars which can be accessed from outside the loop.
   SmallPtrSet<Value *, 4> AllowedExit;
   /// This set holds the variables which are known to be uniform after
@@ -3219,6 +3223,9 @@ void InnerLoopVectorizer::createEmptyLoo
     // or the value at the end of the vectorized loop.
     BCResumeVal->addIncoming(EndValue, MiddleBlock);
 
+    // Fix up external users of the induction variable.
+    fixupIVUsers(OrigPhi, II, CountRoundDown, EndValue, MiddleBlock);
+
     // Fix the scalar body counter (PHI node).
     unsigned BlockIdx = OrigPhi->getBasicBlockIndex(ScalarPH);
 
@@ -3258,6 +3265,71 @@ void InnerLoopVectorizer::createEmptyLoo
   Hints.setAlreadyVectorized();
 }
 
+// Fix up external users of the induction variable. At this point, we are
+// in LCSSA form, with all external PHIs that use the IV having one input value,
+// coming from the remainder loop. We need those PHIs to also have a correct
+// value for the IV when arriving directly from the middle block.
+void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
+                                       const InductionDescriptor &II,
+                                       Value *CountRoundDown, Value *EndValue,
+                                       BasicBlock *MiddleBlock) {
+  // There are two kinds of external IV usages - those that use the value 
+  // computed in the last iteration (the PHI) and those that use the penultimate
+  // value (the value that feeds into the phi from the loop latch).
+  // We allow both, but they, obviously, have different values.
+
+  // We only expect at most one of each kind of user. This is because LCSSA will
+  // canonicalize the users to a single PHI node per exit block, and we
+  // currently only vectorize loops with a single exit.
+  assert(OrigLoop->getExitBlock() && "Expected a single exit block");
+
+  // An external user of the last iteration's value should see the value that
+  // the remainder loop uses to initialize its own IV.
+  Value *PostInc = OrigPhi->getIncomingValueForBlock(OrigLoop->getLoopLatch());
+  for (User *U : PostInc->users()) {
+    Instruction *UI = cast<Instruction>(U);
+    if (!OrigLoop->contains(UI)) {
+      assert(isa<PHINode>(UI) && "Expected LCSSA form");
+      // One corner case we have to handle is two IVs "chasing" each-other,
+      // that is %IV2 = phi [...], [ %IV1, %latch ]
+      // In this case, if IV1 has an external use, we need to avoid adding both      
+      // "last value of IV1" and "penultimate value of IV2". Since we don't know
+      // which IV will be handled first, check we haven't handled this user yet.
+      PHINode *User = cast<PHINode>(UI);
+      if (User->getBasicBlockIndex(MiddleBlock) == -1)
+        User->addIncoming(EndValue, MiddleBlock);
+      break;
+    }
+  }
+
+  // An external user of the penultimate value need to see EndValue - Step.
+  // The simplest way to get this is to recompute it from the constituent SCEVs,
+  // that is Start + (Step * (CRD - 1)).
+  for (User *U : OrigPhi->users()) {
+    Instruction *UI = cast<Instruction>(U);
+    if (!OrigLoop->contains(UI)) {
+      const DataLayout &DL =
+          OrigLoop->getHeader()->getModule()->getDataLayout();
+
+      assert(isa<PHINode>(UI) && "Expected LCSSA form");
+      PHINode *User = cast<PHINode>(UI);
+      // As above, check we haven't already handled this user.
+      if (User->getBasicBlockIndex(MiddleBlock) != -1)
+        break;
+
+      IRBuilder<> B(MiddleBlock->getTerminator());
+      Value *CountMinusOne = B.CreateSub(
+          CountRoundDown, ConstantInt::get(CountRoundDown->getType(), 1));
+      Value *CMO = B.CreateSExtOrTrunc(CountMinusOne, II.getStep()->getType(),
+                                       "cast.cmo");
+      Value *Escape = II.transform(B, CMO, PSE.getSE(), DL);
+      Escape->setName("ind.escape");      
+      User->addIncoming(Escape, MiddleBlock);
+      break;
+    }
+  }
+}
+
 namespace {
 struct CSEDenseMapInfo {
   static bool canHandle(Instruction *I) {
@@ -4639,10 +4711,10 @@ static Type *getWiderType(const DataLayo
 /// \brief Check that the instruction has outside loop users and is not an
 /// identified reduction variable.
 static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
-                               SmallPtrSetImpl<Value *> &Reductions) {
-  // Reduction instructions are allowed to have exit users. All other
-  // instructions must not have external users.
-  if (!Reductions.count(Inst))
+                               SmallPtrSetImpl<Value *> &AllowedExit) {
+  // Reduction and Induction instructions are allowed to have exit users. All
+  // other instructions must not have external users.
+  if (!AllowedExit.count(Inst))
     // Check that all of the users of the loop are inside the BB.
     for (User *U : Inst->users()) {
       Instruction *UI = cast<Instruction>(U);
@@ -4655,8 +4727,9 @@ static bool hasOutsideLoopUser(const Loo
   return false;
 }
 
-bool LoopVectorizationLegality::addInductionPhi(PHINode *Phi,
-                                                InductionDescriptor ID) {
+void LoopVectorizationLegality::addInductionPhi(
+    PHINode *Phi, InductionDescriptor ID,
+    SmallPtrSetImpl<Value *> &AllowedExit) {
   Inductions[Phi] = ID;
   Type *PhiTy = Phi->getType();
   const DataLayout &DL = Phi->getModule()->getDataLayout();
@@ -4682,18 +4755,13 @@ bool LoopVectorizationLegality::addInduc
       Induction = Phi;
   }
 
-  DEBUG(dbgs() << "LV: Found an induction variable.\n");
-
-  // Until we explicitly handle the case of an induction variable with
-  // an outside loop user we have to give up vectorizing this loop.
-  if (hasOutsideLoopUser(TheLoop, Phi, AllowedExit)) {
-    emitAnalysis(VectorizationReport(Phi) <<
-                 "use of induction value outside of the "
-                 "loop is not handled by vectorizer");
-    return false;
-  }
+  // Both the PHI node itself, and the "post-increment" value feeding
+  // back into the PHI node may have external users.
+  AllowedExit.insert(Phi);
+  AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
 
-  return true;
+  DEBUG(dbgs() << "LV: Found an induction variable.\n");
+  return;
 }
 
 bool LoopVectorizationLegality::canVectorizeInstrs() {
@@ -4757,8 +4825,7 @@ bool LoopVectorizationLegality::canVecto
 
         InductionDescriptor ID;
         if (InductionDescriptor::isInductionPHI(Phi, PSE, ID)) {
-          if (!addInductionPhi(Phi, ID))
-            return false;
+          addInductionPhi(Phi, ID, AllowedExit);
           continue;
         }
 
@@ -4770,8 +4837,7 @@ bool LoopVectorizationLegality::canVecto
         // As a last resort, coerce the PHI to a AddRec expression
         // and re-try classifying it a an induction PHI.
         if (InductionDescriptor::isInductionPHI(Phi, PSE, ID, true)) {
-          if (!addInductionPhi(Phi, ID))
-            return false;
+          addInductionPhi(Phi, ID, AllowedExit);        
           continue;
         }
 

Copied: llvm/trunk/test/Transforms/LoopVectorize/iv_outside_user.ll (from r272729, llvm/trunk/test/Transforms/LoopVectorize/iv_outside_user.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/iv_outside_user.ll?p2=llvm/trunk/test/Transforms/LoopVectorize/iv_outside_user.ll&p1=llvm/trunk/test/Transforms/LoopVectorize/iv_outside_user.ll&r1=272729&r2=272742&rev=272742&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/iv_outside_user.ll (original)
+++ llvm/trunk/test/Transforms/LoopVectorize/iv_outside_user.ll Tue Jun 14 19:35:26 2016
@@ -82,3 +82,29 @@ for.body:
 for.end:
   ret i32* %ptr.phi
 }
+
+; CHECK-LABEL: @both
+; CHECK-LABEL: middle.block:
+; CHECK: %[[END:.*]] = sub i64 %n.vec, 1
+; CHECK: %ind.escape = getelementptr i32, i32* %base, i64 %[[END]]
+; CHECK-LABEL: for.end:
+; CHECK: %[[RET:.*]] = phi i32* [ %inc.lag1, %for.body ], [ %ind.escape, %middle.block ]
+; CHECK: ret i32* %[[RET]]
+
+define i32* @both(i32 %k)  {
+entry:
+  %base = getelementptr inbounds i32, i32* undef, i64 1
+  br label %for.body
+
+for.body:
+  %inc.phi = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %inc.lag1 = phi i32* [ %base, %entry ], [ %tmp, %for.body]
+  %inc.lag2 = phi i32* [ undef, %entry ], [ %inc.lag1, %for.body]  
+  %tmp = getelementptr inbounds i32, i32* %inc.lag1, i64 1    
+  %inc = add nsw i32 %inc.phi, 1
+  %cmp = icmp eq i32 %inc, %k
+  br i1 %cmp, label %for.end, label %for.body
+
+for.end:
+  ret i32* %inc.lag1
+}

Modified: llvm/trunk/test/Transforms/LoopVectorize/no_outside_user.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/no_outside_user.ll?rev=272742&r1=272741&r2=272742&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/no_outside_user.ll (original)
+++ llvm/trunk/test/Transforms/LoopVectorize/no_outside_user.ll Tue Jun 14 19:35:26 2016
@@ -1,7 +1,6 @@
 ; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 < %s 2>&1 | FileCheck %s
 
 ; CHECK: remark: {{.*}}: loop not vectorized: value could not be identified as an induction or reduction variable
-; CHECK: remark: {{.*}}: loop not vectorized: use of induction value outside of the loop is not handled by vectorizer
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
 
@@ -41,34 +40,3 @@ f1.exit.loopexit:
   %.lcssa = phi i32 [ %tmp17, %bb16 ]
   ret i32 %.lcssa
 }
-
-; Don't vectorize this loop. Its phi node (induction variable) has an outside
-; loop user. We currently don't handle this case.
-; PR17179
-
-; CHECK-LABEL: @test2(
-; CHECK-NOT:  <2 x
-
- at x1 = common global i32 0, align 4
- at x2 = common global i32 0, align 4
- at x0 = common global i32 0, align 4
-
-define i32 @test2()  {
-entry:
-  store i32 0, i32* @x1, align 4
-  %0 = load i32, i32* @x0, align 4
-  br label %for.cond1.preheader
-
-for.cond1.preheader:
-  %inc7 = phi i32 [ 0, %entry ], [ %inc, %for.cond1.preheader ]
-  %inc = add nsw i32 %inc7, 1
-  %cmp = icmp eq i32 %inc, 52
-  br i1 %cmp, label %for.end5, label %for.cond1.preheader
-
-for.end5:
-  %inc7.lcssa = phi i32 [ %inc7, %for.cond1.preheader ]
-  %xor = xor i32 %inc7.lcssa, %0
-  store i32 52, i32* @x1, align 4
-  store i32 1, i32* @x2, align 4
-  ret i32 %xor
-}




More information about the llvm-commits mailing list