[llvm] e7f1232 - [LV] Move optimized IV recipes to phi section of header after sinking.

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 10 03:41:27 PST 2021


Author: Florian Hahn
Date: 2021-11-10T11:41:08Z
New Revision: e7f1232cb777ac7408c7b4b7f5c421986bad3a82

URL: https://github.com/llvm/llvm-project/commit/e7f1232cb777ac7408c7b4b7f5c421986bad3a82
DIFF: https://github.com/llvm/llvm-project/commit/e7f1232cb777ac7408c7b4b7f5c421986bad3a82.diff

LOG: [LV] Move optimized IV recipes to phi section of header after sinking.

Unfortunately sinking recipes for first-order recurrences relies on
the original position of recipes. So if a recipes needs to be sunk after
an optimized induction, it needs to stay in the original position, until
sinking is done. This is causing PR52460.

To fix the crash, keep the recipes in the original position until
sink-after is done.

Post-commit follow-up to c45045bfd04af9 to address PR52460.

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
    llvm/test/Transforms/LoopVectorize/induction.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 1f11301974573..bd9675f1dda59 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9332,6 +9332,8 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
   DFS.perform(LI);
 
   VPBasicBlock *VPBB = nullptr;
+  VPBasicBlock *HeaderVPBB = nullptr;
+  SmallVector<VPWidenIntOrFpInductionRecipe *> InductionsToMove;
   for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO())) {
     // Relevant instructions from basic block BB will be grouped into VPRecipe
     // ingredients and fill a new VPBasicBlock.
@@ -9339,8 +9341,10 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
     auto *FirstVPBBForBB = new VPBasicBlock(BB->getName());
     if (VPBB)
       VPBlockUtils::insertBlockAfter(FirstVPBBForBB, VPBB);
-    else
+    else {
       Plan->setEntry(FirstVPBBForBB);
+      HeaderVPBB = FirstVPBBForBB;
+    }
     VPBB = FirstVPBBForBB;
     Builder.setInsertPoint(VPBB);
 
@@ -9382,15 +9386,19 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
           Plan->addVPValue(UV, Def);
         }
 
+        if (isa<VPWidenIntOrFpInductionRecipe>(Recipe) &&
+            HeaderVPBB->getFirstNonPhi() != VPBB->end()) {
+          // Keep track of VPWidenIntOrFpInductionRecipes not in the phi section
+          // of the header block. That can happen for truncates of induction
+          // variables. Those recipes are moved to the phi section of the header
+          // block after applying SinkAfter, which relies on the original
+          // position of the trunc.
+          assert(isa<TruncInst>(Instr));
+          InductionsToMove.push_back(
+              cast<VPWidenIntOrFpInductionRecipe>(Recipe));
+        }
         RecipeBuilder.setRecipe(Instr, Recipe);
-        if (isa<VPWidenIntOrFpInductionRecipe>(Recipe)) {
-          // Make sure induction recipes are all kept in the header block.
-          // VPWidenIntOrFpInductionRecipe may be generated when reaching a
-          // Trunc of an induction Phi, where Trunc may not be in the header.
-          auto *Header = Plan->getEntry()->getEntryBasicBlock();
-          Header->insert(Recipe, Header->getFirstNonPhi());
-        } else
-          VPBB->appendRecipe(Recipe);
+        VPBB->appendRecipe(Recipe);
         continue;
       }
 
@@ -9479,6 +9487,11 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
     }
   }
 
+  // Now that sink-after is done, move induction recipes for optimized truncates
+  // to the phi section of the header block.
+  for (VPWidenIntOrFpInductionRecipe *Ind : InductionsToMove)
+    Ind->moveBefore(*HeaderVPBB, HeaderVPBB->getFirstNonPhi());
+
   // Adjust the recipes for any inloop reductions.
   adjustRecipesForReductions(VPBB, Plan, RecipeBuilder, Range.Start);
 

diff  --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll
index b14ac068c759b..1a4167f5c3b9b 100644
--- a/llvm/test/Transforms/LoopVectorize/induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/induction.ll
@@ -896,3 +896,40 @@ loop:                                         ; preds = %loop, %entry
   br i1 %exitcond.i, label %exit, label %loop
 
 }
+
+; Test case for PR52460.
+define void @pr52460_first_order_recurrence_truncated_iv(i32* noalias %src, i32* %dst) {
+; CHECK-LABEL: vector.body:
+; CHECK-NEXT:  [[MAIN_IV:%.+]] = phi i64 [ 0, %vector.ph ], [ [[MAIN_IV_NEXT:%.+]], %vector.body ]
+; CHECK-NEXT:  [[VEC_IV_64:%.+]] = phi <2 x i64> [ <i64 0, i64 1>, %vector.ph ], [ [[VEC_IV_64_NEXT:%.+]], %vector.body ]
+; CHECK-NEXT:  [[VEC_RECUR:%.+]] = phi <2 x i32> [ <i32 poison, i32 0>, %vector.ph ], [ [[VEC_IV_32:%.+]], %vector.body ]
+; CHECK-NEXT:  [[VEC_IV_32]] = phi <2 x i32> [ <i32 0, i32 1>, %vector.ph ], [ [[VEC_IV_32_NEXT:%.+]], %vector.body ]
+; CHECK:       [[RECUR_SHUFFLE:%.+]] = shufflevector <2 x i32> [[VEC_RECUR]], <2 x i32> [[VEC_IV_32]], <2 x i32> <i32 1, i32 2>
+; CHECK:       mul nsw <2 x i32> {{.+}}, [[RECUR_SHUFFLE]]
+; CHECK:       [[ADD:%.+]] = add <2 x i32> [[VEC_IV_32]]
+; CHECK:       store <2 x i32> [[ADD]], <2 x i32>*
+; CHECK-NEXT:  [[MAIN_IV_NEXT]] = add nuw i64 [[MAIN_IV]], 2
+; CHECK-NEXT:  [[VEC_IV_64_NEXT]] = add <2 x i64> [[VEC_IV_64]], <i64 2, i64 2>
+; CHECK-NEXT:  [[VEC_IV_32_NEXT]] = add <2 x i32> [[VEC_IV_32]], <i32 2, i32 2>
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %trunc.iv = phi i32 [ 0, %entry ], [ %trunc.iv.next, %loop ]
+  %recur = phi i32 [ 0, %entry ], [ %iv.trunc, %loop ]
+  %lv = load i32, i32* %src, align 4
+  %mul = mul nsw i32 %lv, %recur
+  %trunc.iv.next  = add i32 %trunc.iv, 1
+  %iv.next = add nuw nsw i64 %iv, 1
+  %iv.trunc = trunc i64 %iv to i32
+  %dst.gep = getelementptr i32, i32* %dst, i32 %iv.trunc
+  %add = add i32 %iv.trunc, %mul
+  store i32 %add, i32* %dst.gep
+  %exitcond = icmp eq i32 %trunc.iv.next, 100
+  br i1 %exitcond, label %exit, label %loop
+
+exit:
+  ret void
+}


        


More information about the llvm-commits mailing list