[llvm] r314981 - [LV] Fix PR34711 - widen instruction ranges when sinking casts

Ayal Zaks via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 5 05:41:49 PDT 2017


Author: ayalz
Date: Thu Oct  5 05:41:49 2017
New Revision: 314981

URL: http://llvm.org/viewvc/llvm-project?rev=314981&view=rev
Log:
[LV] Fix PR34711 - widen instruction ranges when sinking casts

Instead of trying to keep LastWidenRecipe updated after creating each recipe,
have tryToWiden() retrieve the last recipe of the current VPBasicBlock and check
if it's a VPWidenRecipe when attempting to extend its range. This ensures that
such extensions, optimized to maintain the original instruction order, do so
only when the instructions are to maintain their relative order. The latter does
not always hold, e.g., when a cast needs to sink to unravel first order
recurrence (r306884).

Testcase derived from reproducer of PR34711.

Differential Revision: https://reviews.llvm.org/D38339

Modified:
    llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
    llvm/trunk/test/Transforms/LoopVectorize/first-order-recurrence.ll

Modified: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp?rev=314981&r1=314980&r2=314981&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp Thu Oct  5 05:41:49 2017
@@ -2215,14 +2215,13 @@ private:
   VPWidenIntOrFpInductionRecipe *tryToOptimizeInduction(Instruction *I,
                                                         VFRange &Range);
 
-  /// Check if \I can be widened within the given VF \p Range. If \I can be
-  /// widened for Range.Start, extend \p LastWidenRecipe to include \p I if
-  /// possible or else build a new VPWidenRecipe for it, and return the
-  /// VPWidenRecipe that includes \p I. If \p I cannot be widened for
-  /// Range.Start \return null. Range.End may be decreased to ensure same
-  /// decision from \p Range.Start to \p Range.End.
-  VPWidenRecipe *tryToWiden(Instruction *I, VPWidenRecipe *LastWidenRecipe,
-                            VFRange &Range);
+  /// Check if \p I can be widened within the given VF \p Range. If \p I can be
+  /// widened for \p Range.Start, check if the last recipe of \p VPBB can be
+  /// extended to include \p I or else build a new VPWidenRecipe for it and
+  /// append it to \p VPBB. Return true if \p I can be widened for Range.Start,
+  /// false otherwise. Range.End may be decreased to ensure same decision from
+  /// \p Range.Start to \p Range.End.
+  bool tryToWiden(Instruction *I, VPBasicBlock *VPBB, VFRange &Range);
 
   /// Build a VPReplicationRecipe for \p I and enclose it within a Region if it
   /// is predicated. \return \p VPBB augmented with this new recipe if \p I is
@@ -7988,11 +7987,11 @@ LoopVectorizationPlanner::tryToOptimizeI
   return nullptr;
 }
 
-VPWidenRecipe *LoopVectorizationPlanner::tryToWiden(
-    Instruction *I, VPWidenRecipe *LastWidenRecipe, VFRange &Range) {
+bool LoopVectorizationPlanner::tryToWiden(Instruction *I, VPBasicBlock *VPBB,
+                                          VFRange &Range) {
 
   if (Legal->isScalarWithPredication(I))
-    return nullptr;
+    return false;
 
   auto IsVectorizableOpcode = [](unsigned Opcode) {
     switch (Opcode) {
@@ -8041,13 +8040,13 @@ VPWidenRecipe *LoopVectorizationPlanner:
   };
 
   if (!IsVectorizableOpcode(I->getOpcode()))
-    return nullptr;
+    return false;
 
   if (CallInst *CI = dyn_cast<CallInst>(I)) {
     Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
     if (ID && (ID == Intrinsic::assume || ID == Intrinsic::lifetime_end ||
                ID == Intrinsic::lifetime_start))
-      return nullptr;
+      return false;
   }
 
   auto willWiden = [&](unsigned VF) -> bool {
@@ -8079,13 +8078,18 @@ VPWidenRecipe *LoopVectorizationPlanner:
   };
 
   if (!getDecisionAndClampRange(willWiden, Range))
-    return nullptr;
+    return false;
 
   // Success: widen this instruction. We optimize the common case where
   // consecutive instructions can be represented by a single recipe.
-  if (LastWidenRecipe && LastWidenRecipe->appendInstruction(I))
-    return LastWidenRecipe;
-  return new VPWidenRecipe(I);
+  if (!VPBB->empty()) {
+    VPWidenRecipe *LastWidenRecipe = dyn_cast<VPWidenRecipe>(&VPBB->back());
+    if (LastWidenRecipe && LastWidenRecipe->appendInstruction(I))
+      return true;
+  }
+
+  VPBB->appendRecipe(new VPWidenRecipe(I));
+  return true;
 }
 
 VPBasicBlock *LoopVectorizationPlanner::handleReplication(
@@ -8182,7 +8186,6 @@ VPlan *LoopVectorizationPlanner::buildVP
     auto *FirstVPBBForBB = new VPBasicBlock(BB->getName());
     VPBB->setOneSuccessor(FirstVPBBForBB);
     VPBB = FirstVPBBForBB;
-    VPWidenRecipe *LastWidenRecipe = nullptr;
 
     std::vector<Instruction *> Ingredients;
 
@@ -8250,12 +8253,8 @@ VPlan *LoopVectorizationPlanner::buildVP
       // Check if Instr is to be widened by a general VPWidenRecipe, after
       // having first checked for specific widening recipes that deal with
       // Interleave Groups, Inductions and Phi nodes.
-      if ((Recipe = tryToWiden(Instr, LastWidenRecipe, Range))) {
-        if (Recipe != LastWidenRecipe)
-          VPBB->appendRecipe(Recipe);
-        LastWidenRecipe = cast<VPWidenRecipe>(Recipe);
+      if (tryToWiden(Instr, VPBB, Range))
         continue;
-      }
 
       // Otherwise, if all widening options failed, Instruction is to be
       // replicated. This may create a successor for VPBB.

Modified: llvm/trunk/test/Transforms/LoopVectorize/first-order-recurrence.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/first-order-recurrence.ll?rev=314981&r1=314980&r2=314981&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/first-order-recurrence.ll (original)
+++ llvm/trunk/test/Transforms/LoopVectorize/first-order-recurrence.ll Thu Oct  5 05:41:49 2017
@@ -491,6 +491,55 @@ for.end:
   ret void
 }
 
+; PR34711: given three consecutive instructions such that the first will be
+; widened, the second is a cast that will be widened and needs to sink after the
+; third, and the third is a first-order-recurring load that will be replicated
+; instead of widened. Although the cast and the first instruction will both be
+; widened, and are originally adjacent to each other, make sure the replicated
+; load ends up appearing between them.
+;
+; void PR34711(short[2] *a, int *b, int *c, int n) {
+;   for(int i = 0; i < n; i++) {
+;     c[i] = 7;
+;     b[i] = (a[i][0] * a[i][1]);
+;   }
+; }
+;
+; SINK-AFTER-LABEL: @PR34711
+; Check that the sext sank after the load in the vector loop.
+; SINK-AFTER: vector.body
+; SINK-AFTER:   %vector.recur = phi <4 x i16> [ %vector.recur.init, %vector.ph ], [ {{.*}}, %vector.body ]
+; SINK-AFTER:   %[[VSHUF:.+]] = shufflevector <4 x i16> %vector.recur, <4 x i16> %{{.*}}, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; SINK-AFTER:   %[[VCONV:.+]] = sext <4 x i16> %[[VSHUF]] to <4 x i32>
+; SINK-AFTER:   %[[VCONV3:.+]] = sext <4 x i16> {{.*}} to <4 x i32>
+; SINK-AFTER:   mul nsw <4 x i32> %[[VCONV3]], %[[VCONV]]
+;
+define void @PR34711([2 x i16]* %a, i32* %b, i32* %c, i64 %n) {
+entry:
+  %pre.index = getelementptr inbounds [2 x i16], [2 x i16]* %a, i64 0, i64 0
+  %.pre = load i16, i16* %pre.index
+  br label %for.body
+
+for.body:
+  %0 = phi i16 [ %.pre, %entry ], [ %1, %for.body ]
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arraycidx = getelementptr inbounds i32, i32* %c, i64 %indvars.iv
+  %cur.index = getelementptr inbounds [2 x i16], [2 x i16]* %a, i64 %indvars.iv, i64 1
+  store i32 7, i32* %arraycidx   ; 1st instruction, to be widened.
+  %conv = sext i16 %0 to i32     ; 2nd, cast to sink after third.
+  %1 = load i16, i16* %cur.index ; 3rd, first-order-recurring load not widened.
+  %conv3 = sext i16 %1 to i32
+  %mul = mul nsw i32 %conv3, %conv
+  %arrayidx5 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
+  store i32 %mul, i32* %arrayidx5
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
 ; void no_sink_after(short *a, int n, int *b) {
 ;   for(int i = 0; i < n; i++)
 ;     b[i] = ((a[i] + 2) * a[i + 1]);




More information about the llvm-commits mailing list