[llvm] r299985 - [LV] Avoid vectorizing first order recurrence when phi uses are outside loop

Anna Thomas via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 11 14:02:01 PDT 2017


Author: annat
Date: Tue Apr 11 16:02:00 2017
New Revision: 299985

URL: http://llvm.org/viewvc/llvm-project?rev=299985&view=rev
Log:
[LV] Avoid vectorizing first order recurrence when phi uses are outside loop

In the vectorization of first order recurrence, we vectorize such
that the last element in the vector will be the one extracted to pass into the
scalar remainder loop. However, this is not true when there is a phi (other
than the primary induction variable) is used outside the loop.
In such a case, we need the value from the second last iteration (i.e.
the phi value), not the last iteration (which would be the phi update).
I've added a test case for this. Also see PR32396.

A follow up patch would generate the correct code gen for such cases,
and turn this vectorization on.

Differential Revision: https://reviews.llvm.org/D31910

Reviewers: mssimpso

Modified:
    llvm/trunk/lib/Transforms/Utils/LoopUtils.cpp
    llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
    llvm/trunk/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll
    llvm/trunk/test/Transforms/LoopVectorize/first-order-recurrence.ll

Modified: llvm/trunk/lib/Transforms/Utils/LoopUtils.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/LoopUtils.cpp?rev=299985&r1=299984&r2=299985&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Utils/LoopUtils.cpp (original)
+++ llvm/trunk/lib/Transforms/Utils/LoopUtils.cpp Tue Apr 11 16:02:00 2017
@@ -553,13 +553,23 @@ bool RecurrenceDescriptor::isFirstOrderR
   if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous))
     return false;
 
-  // Ensure every user of the phi node is dominated by the previous value. The
-  // dominance requirement ensures the loop vectorizer will not need to
-  // vectorize the initial value prior to the first iteration of the loop.
   for (User *U : Phi->users())
-    if (auto *I = dyn_cast<Instruction>(U))
+    if (auto *I = dyn_cast<Instruction>(U)) {
+      // Ensure every user of the phi node is dominated by the previous value.
+      // The dominance requirement ensures the loop vectorizer will not need to
+      // vectorize the initial value prior to the first iteration of the loop.
       if (!DT->dominates(Previous, I))
         return false;
+      // When the phi node has users outside the loop, the current logic for
+      // fixFirstOrderRecurrences may generate incorrect code. Specifically, we
+      // extract the last element from the vectorized phi, which would be the
+      // update to the phi before exiting the loop. However, what we want is the
+      // previous phi value before the update (i.e. the second last update
+      // before end of the vectorized loop).
+      // See added test cases in first-order-recurrence.ll
+      if (!TheLoop->contains(I))
+        return false;
+    }
 
   return true;
 }

Modified: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp?rev=299985&r1=299984&r2=299985&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp Tue Apr 11 16:02:00 2017
@@ -4069,7 +4069,12 @@ void InnerLoopVectorizer::fixFirstOrderR
   VecPhi->addIncoming(Incoming, LI->getLoopFor(LoopVectorBody)->getLoopLatch());
 
   // Extract the last vector element in the middle block. This will be the
-  // initial value for the recurrence when jumping to the scalar loop.
+  // initial value for the recurrence when jumping to the scalar loop. 
+  // FIXME: Note that the last vector element need not always be the correct one:
+  // consider a loop  where we have phi uses outside the loop - we need the
+  // second last iteration value and not the last one). For now, we avoid
+  // considering such cases as firstOrderRecurrences (see
+  // isFirstOrderRecurrence).
   auto *Extract = Incoming;
   if (VF > 1) {
     Builder.SetInsertPoint(LoopMiddleBlock->getTerminator());

Modified: llvm/trunk/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll?rev=299985&r1=299984&r2=299985&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll (original)
+++ llvm/trunk/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll Tue Apr 11 16:02:00 2017
@@ -235,10 +235,13 @@ for.body:
 }
 
 ; CHECK-LABEL: @add_phifail2(
-; CHECK: load <16 x i8>, <16 x i8>*
-; CHECK: add nuw nsw <16 x i32>
-; CHECK: store <16 x i8>
+; CHECK-NOT: load <16 x i8>, <16 x i8>*
+; CHECK-NOT: add nuw nsw <16 x i32>
+; CHECK-NOT: store <16 x i8>
 ; Function Attrs: nounwind
+; FIXME: Currently, if we vectorize this loop, we will generate incorrect code
+; if %len evenly divides VF. Vectorized loop code gen returns a_phi = p[len -1],
+; whereas it should be the previous value a_phi = p[len -2]
 define i8 @add_phifail2(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i32 %len) #0 {
 entry:
   br label %for.body

Modified: llvm/trunk/test/Transforms/LoopVectorize/first-order-recurrence.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/first-order-recurrence.ll?rev=299985&r1=299984&r2=299985&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/first-order-recurrence.ll (original)
+++ llvm/trunk/test/Transforms/LoopVectorize/first-order-recurrence.ll Tue Apr 11 16:02:00 2017
@@ -349,3 +349,25 @@ scalar.body:
 for.end:
   ret void
 }
+
+; FIXME: we can vectorize this first order recurrence, by generating two
+; extracts - one for the phi `val.phi` and other for the phi update `addx`.
+; val.phi at end of loop is 94 + x.
+; CHECK-LABEL: extract_second_last_iteration
+; CHECK-NOT: vector.body
+define i32 @extract_second_last_iteration(i32* %cval, i32 %x)  {
+entry:
+  br label %for.body
+
+for.body:
+  %inc.phi = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %val.phi = phi i32 [ 0, %entry ], [ %addx, %for.body ]
+  %inc = add i32 %inc.phi, 1
+  %bc = zext i32 %inc.phi to i64
+  %addx = add i32 %inc.phi, %x
+  %cmp = icmp eq i32 %inc.phi, 95
+  br i1 %cmp, label %for.end, label %for.body
+
+for.end:
+  ret i32 %val.phi
+}




More information about the llvm-commits mailing list