[llvm] 24b5f8e - [VPlan] Make sure optimizeInductions removes wide ind from scalar plan.

Thu Jun 30 01:15:25 PDT 2022

Author: Florian Hahn
Date: 2022-06-30T09:11:48+01:00
New Revision: 24b5f8e0d0bd6851e01dbe2de4c71df4b3b0760b

URL: https://github.com/llvm/llvm-project/commit/24b5f8e0d0bd6851e01dbe2de4c71df4b3b0760b
DIFF: https://github.com/llvm/llvm-project/commit/24b5f8e0d0bd6851e01dbe2de4c71df4b3b0760b.diff

LOG: [VPlan] Make sure optimizeInductions removes wide ind from scalar plan.

In some cases, there may be widened users of inductions even though the
plan includes the scalar VF. In those cases, make sure we still replace
the VPWidenIntOrFpInductionRecipe with scalar steps, as otherwise we may
try to execute a VPWidenIntOrFpInductionRecipe with a scalar VF.

Alternatively the patch could also split the range if needed.

This fixes a crash exposed by D123720.

Reviewed By: Ayal

Differential Revision: https://reviews.llvm.org/D128755

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
    llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index ef0de6d6d9e0f..cca484e13bf14 100644

--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -381,10 +381,13 @@ void VPlanTransforms::removeDeadRecipes(VPlan &Plan) {
 void VPlanTransforms::optimizeInductions(VPlan &Plan, ScalarEvolution &SE) {
   SmallVector<VPRecipeBase *> ToRemove;
   VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
+  bool HasOnlyVectorVFs = !Plan.hasVF(ElementCount::getFixed(1));
   for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
     auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
-    if (!IV ||
-        all_of(IV->users(), [IV](VPUser *U) { return !U->usesScalars(IV); }))
+    if (!IV)
+      continue;
+    if (HasOnlyVectorVFs &&
+        none_of(IV->users(), [IV](VPUser *U) { return U->usesScalars(IV); }))
       continue;
 
     const InductionDescriptor &ID = IV->getInductionDescriptor();
@@ -400,7 +403,7 @@ void VPlanTransforms::optimizeInductions(VPlan &Plan, ScalarEvolution &SE) {
     // the list of users doesn't contain duplicates.
     SetVector<VPUser *> Users(IV->user_begin(), IV->user_end());
     for (VPUser *U : Users) {
-      if (!U->usesScalars(IV))
+      if (HasOnlyVectorVFs && !U->usesScalars(IV))
         continue;
       for (unsigned I = 0, E = U->getNumOperands(); I != E; I++) {
         if (U->getOperand(I) != IV)

diff  --git a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll
index 0e9b686affef1..da07eadd09b22 100644
--- a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll
@@ -168,3 +168,62 @@ loop.latch:
 exit:
   ret void
 }
+
+; Make sure the widened induction gets replaced by scalar-steps for plans
+; including the scalar VF, if it is used in first-order recurrences.
+
+; DBG-LABEL: 'first_order_recurrence_using_induction'
+; DBG:      VPlan 'Initial VPlan for VF={1},UF>=1' {
+; DBG-NEXT: Live-in vp<%1> = vector-trip-count
+; DBG-EMPTY:
+; DBG-NEXT: vector.ph:
+; DBG-NEXT: Successor(s): vector loop
+; DBG-EMPTY:
+; DBG-NEXT: <x1> vector loop: {
+; DBG-NEXT:   vector.body:
+; DBG-NEXT:     EMIT vp<%2> = CANONICAL-INDUCTION
+; DBG-NEXT:     FIRST-ORDER-RECURRENCE-PHI ir<%for> = phi ir<0>, vp<%4>
+; DBG-NEXT:     vp<%4>    = SCALAR-STEPS vp<%2>, ir<0>, ir<1>
+; DBG-NEXT:     EMIT vp<%5> = first-order splice ir<%for> vp<%4>
+; DBG-NEXT:     CLONE store vp<%5>, ir<%dst>
+; DBG-NEXT:     EMIT vp<%7> = VF * UF +(nuw)  vp<%2>
+; DBG-NEXT:     EMIT branch-on-count  vp<%7> vp<%1>
+; DBG-NEXT:   No successors
+; DBG-NEXT: }
+; DBG-NEXT: Successor(s): middle.block
+; DBG-EMPTY:
+; DBG-NEXT: middle.block:
+; DBG-NEXT: No successors
+; DBG-NEXT: }
+
+define void @first_order_recurrence_using_induction(i32 %n, ptr %dst) {
+; CHECK-LABEL: @first_order_recurrence_using_induction(
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
+; CHECK-NEXT:    [[VECTOR_RECUR:%.*]] = phi i32 [ 0, %vector.ph ], [ [[INDUCTION1:%.*]], %vector.body ]
+; CHECK-NEXT:    [[TMP3:%.*]] = trunc i64 [[INDEX]] to i32
+; CHECK-NEXT:    [[INDUCTION:%.*]] = add i32 [[TMP3]], 0
+; CHECK-NEXT:    [[INDUCTION1]] = add i32 [[TMP3]], 1
+; CHECK-NEXT:    store i32 [[VECTOR_RECUR]], ptr [[DST:%.*]], align 4
+; CHECK-NEXT:    store i32 [[INDUCTION]], ptr [[DST]], align 4
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], %n.vec
+; CHECK-NEXT:    br i1 [[TMP4]], label %middle.block, label %vector.body
+; CHECK:       middle.block:
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ],[ %iv.next, %loop ]
+  %for = phi i32 [ 0, %entry ], [ %iv.trunc, %loop ]
+  %iv.trunc = trunc i64 %iv to i32
+  store i32 %for, ptr %dst
+  %iv.next = add nuw nsw i64 %iv, 1
+  %iv.next.trunc = trunc i64 %iv.next to i32
+  %ec = icmp slt i32 %iv.next.trunc, %n
+  br i1 %ec, label %loop, label %exit
+
+exit:
+  ret void
+}