[llvm] f9369cc - [VPlan] Make sure last IV increment value is available if needed.

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 6 14:41:58 PST 2025


Author: Florian Hahn
Date: 2025-01-06T22:40:41Z
New Revision: f9369cc602272796c15de1065a782f812e791df3

URL: https://github.com/llvm/llvm-project/commit/f9369cc602272796c15de1065a782f812e791df3
DIFF: https://github.com/llvm/llvm-project/commit/f9369cc602272796c15de1065a782f812e791df3.diff

LOG: [VPlan] Make sure last IV increment value is available if needed.

Legalize extract-from-ends using uniform VPReplicateRecipe of wide
inductions to use regular VPReplicateRecipe, so the correct end value
is available.

Fixes https://github.com/llvm/llvm-project/issues/121745.

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
    llvm/test/Transforms/LoopVectorize/iv_outside_user.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 395287bde76f37..3e3f5adf73a0f0 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -562,21 +562,63 @@ createScalarIVSteps(VPlan &Plan, InductionDescriptor::InductionKind Kind,
   return Builder.createScalarIVSteps(InductionOpcode, FPBinOp, BaseIV, Step);
 }
 
+static SmallVector<VPUser *> collectUsersRecursively(VPValue *V) {
+  SetVector<VPUser *> Users(V->user_begin(), V->user_end());
+  for (unsigned I = 0; I != Users.size(); ++I) {
+    VPRecipeBase *Cur = cast<VPRecipeBase>(Users[I]);
+    if (isa<VPHeaderPHIRecipe>(Cur))
+      continue;
+    for (VPValue *V : Cur->definedValues())
+      Users.insert(V->user_begin(), V->user_end());
+  }
+  return Users.takeVector();
+}
+
 /// Legalize VPWidenPointerInductionRecipe, by replacing it with a PtrAdd
 /// (IndStart, ScalarIVSteps (0, Step)) if only its scalar values are used, as
 /// VPWidenPointerInductionRecipe will generate vectors only. If some users
 /// require vectors while other require scalars, the scalar uses need to extract
 /// the scalars from the generated vectors (Note that this is 
diff erent to how
-/// int/fp inductions are handled). Also optimize VPWidenIntOrFpInductionRecipe,
-/// if any of its users needs scalar values, by providing them scalar steps
-/// built on the canonical scalar IV and update the original IV's users. This is
-/// an optional optimization to reduce the needs of vector extracts.
+/// int/fp inductions are handled). Legalize extract-from-ends using uniform
+/// VPReplicateRecipe of wide inductions to use regular VPReplicateRecipe, so
+/// the correct end value is available. Also optimize
+/// VPWidenIntOrFpInductionRecipe, if any of its users needs scalar values, by
+/// providing them scalar steps built on the canonical scalar IV and update the
+/// original IV's users. This is an optional optimization to reduce the needs of
+/// vector extracts.
 static void legalizeAndOptimizeInductions(VPlan &Plan) {
+  using namespace llvm::VPlanPatternMatch;
   SmallVector<VPRecipeBase *> ToRemove;
   VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
   bool HasOnlyVectorVFs = !Plan.hasVF(ElementCount::getFixed(1));
   VPBuilder Builder(HeaderVPBB, HeaderVPBB->getFirstNonPhi());
   for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
+    auto *PhiR = dyn_cast<VPHeaderPHIRecipe>(&Phi);
+    if (!PhiR)
+      break;
+
+    // Check if any uniform VPReplicateRecipes using the phi recipe are used by
+    // ExtractFromEnd. Those must be replaced by a regular VPReplicateRecipe to
+    // ensure the final value is available.
+    // TODO: Remove once uniformity analysis is done on VPlan.
+    for (VPUser *U : collectUsersRecursively(PhiR)) {
+      auto *ExitIRI = dyn_cast<VPIRInstruction>(U);
+      VPValue *Op;
+      if (!ExitIRI || !match(ExitIRI->getOperand(0),
+                             m_VPInstruction<VPInstruction::ExtractFromEnd>(
+                                 m_VPValue(Op), m_VPValue())))
+        continue;
+      auto *RepR = dyn_cast<VPReplicateRecipe>(Op);
+      if (!RepR || !RepR->isUniform())
+        continue;
+      assert(!RepR->isPredicated() && "RepR must not be predicated");
+      Instruction *I = RepR->getUnderlyingInstr();
+      auto *Clone =
+          new VPReplicateRecipe(I, RepR->operands(), /*IsUniform*/ false);
+      Clone->insertAfter(RepR);
+      RepR->replaceAllUsesWith(Clone);
+    }
+
     // Replace wide pointer inductions which have only their scalars used by
     // PtrAdd(IndStart, ScalarIVSteps (0, Step)).
     if (auto *PtrIV = dyn_cast<VPWidenPointerInductionRecipe>(&Phi)) {
@@ -1086,18 +1128,6 @@ bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan,
   return true;
 }
 
-static SmallVector<VPUser *> collectUsersRecursively(VPValue *V) {
-  SetVector<VPUser *> Users(V->user_begin(), V->user_end());
-  for (unsigned I = 0; I != Users.size(); ++I) {
-    VPRecipeBase *Cur = cast<VPRecipeBase>(Users[I]);
-    if (isa<VPHeaderPHIRecipe>(Cur))
-      continue;
-    for (VPValue *V : Cur->definedValues())
-      Users.insert(V->user_begin(), V->user_end());
-  }
-  return Users.takeVector();
-}
-
 void VPlanTransforms::clearReductionWrapFlags(VPlan &Plan) {
   for (VPRecipeBase &R :
        Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {

diff  --git a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll
index f03ca2d6e23c6a..482f731afd8136 100644
--- a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll
+++ b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll
@@ -981,7 +981,6 @@ exit:
 }
 
 ; Test case for https://github.com/llvm/llvm-project/issues/121745.
-; FIXME: At the moment an incorrect exit value is used for %iv.next.
 define i32 @test_iv_uniform_with_outside_use_scev_simplification(ptr %dst) {
 ; VEC-LABEL: define i32 @test_iv_uniform_with_outside_use_scev_simplification(
 ; VEC-SAME: ptr [[DST:%.*]]) {
@@ -994,10 +993,12 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification(ptr %dst) {
 ; VEC:       [[VECTOR_BODY]]:
 ; VEC-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; VEC-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
+; VEC-NEXT:    [[TMP6:%.*]] = add i32 [[INDEX]], 1
 ; VEC-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[TMP0]]
 ; VEC-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 0
 ; VEC-NEXT:    store <2 x i16> zeroinitializer, ptr [[TMP2]], align 2
 ; VEC-NEXT:    [[TMP4:%.*]] = add i32 [[STEP_2]], [[TMP0]]
+; VEC-NEXT:    [[TMP5:%.*]] = add i32 [[STEP_2]], [[TMP6]]
 ; VEC-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
 ; VEC-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 8
 ; VEC-NEXT:    br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
@@ -1014,7 +1015,7 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification(ptr %dst) {
 ; VEC-NEXT:    [[CMP_I:%.*]] = icmp slt i32 [[IV_NEXT]], 8
 ; VEC-NEXT:    br i1 [[CMP_I]], label %[[LOOP]], label %[[E_EXIT]], {{!llvm.loop ![0-9]+}}
 ; VEC:       [[E_EXIT]]:
-; VEC-NEXT:    [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ [[TMP4]], %[[MIDDLE_BLOCK]] ]
+; VEC-NEXT:    [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ]
 ; VEC-NEXT:    ret i32 [[RES]]
 ;
 ; INTERLEAVE-LABEL: define i32 @test_iv_uniform_with_outside_use_scev_simplification(
@@ -1071,7 +1072,6 @@ e.exit:
   ret i32 %res
 }
 
-; FIXME: At the moment an incorrect exit value is used for %iv.next.
 define i32 @test_iv_uniform_with_outside_use_scev_simplification_2(ptr %dst) {
 ; VEC-LABEL: define i32 @test_iv_uniform_with_outside_use_scev_simplification_2(
 ; VEC-SAME: ptr [[DST:%.*]]) {


        


More information about the llvm-commits mailing list