[llvm] f9369cc - [VPlan] Make sure last IV increment value is available if needed.
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 6 14:41:58 PST 2025
Author: Florian Hahn
Date: 2025-01-06T22:40:41Z
New Revision: f9369cc602272796c15de1065a782f812e791df3
URL: https://github.com/llvm/llvm-project/commit/f9369cc602272796c15de1065a782f812e791df3
DIFF: https://github.com/llvm/llvm-project/commit/f9369cc602272796c15de1065a782f812e791df3.diff
LOG: [VPlan] Make sure last IV increment value is available if needed.
Legalize extract-from-ends using uniform VPReplicateRecipe of wide
inductions to use regular VPReplicateRecipe, so the correct end value
is available.
Fixes https://github.com/llvm/llvm-project/issues/121745.
Added:
Modified:
llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
llvm/test/Transforms/LoopVectorize/iv_outside_user.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 395287bde76f37..3e3f5adf73a0f0 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -562,21 +562,63 @@ createScalarIVSteps(VPlan &Plan, InductionDescriptor::InductionKind Kind,
return Builder.createScalarIVSteps(InductionOpcode, FPBinOp, BaseIV, Step);
}
+static SmallVector<VPUser *> collectUsersRecursively(VPValue *V) {
+ SetVector<VPUser *> Users(V->user_begin(), V->user_end());
+ for (unsigned I = 0; I != Users.size(); ++I) {
+ VPRecipeBase *Cur = cast<VPRecipeBase>(Users[I]);
+ if (isa<VPHeaderPHIRecipe>(Cur))
+ continue;
+ for (VPValue *V : Cur->definedValues())
+ Users.insert(V->user_begin(), V->user_end());
+ }
+ return Users.takeVector();
+}
+
/// Legalize VPWidenPointerInductionRecipe, by replacing it with a PtrAdd
/// (IndStart, ScalarIVSteps (0, Step)) if only its scalar values are used, as
/// VPWidenPointerInductionRecipe will generate vectors only. If some users
/// require vectors while other require scalars, the scalar uses need to extract
/// the scalars from the generated vectors (Note that this is
diff erent to how
-/// int/fp inductions are handled). Also optimize VPWidenIntOrFpInductionRecipe,
-/// if any of its users needs scalar values, by providing them scalar steps
-/// built on the canonical scalar IV and update the original IV's users. This is
-/// an optional optimization to reduce the needs of vector extracts.
+/// int/fp inductions are handled). Legalize extract-from-ends using uniform
+/// VPReplicateRecipe of wide inductions to use regular VPReplicateRecipe, so
+/// the correct end value is available. Also optimize
+/// VPWidenIntOrFpInductionRecipe, if any of its users needs scalar values, by
+/// providing them scalar steps built on the canonical scalar IV and update the
+/// original IV's users. This is an optional optimization to reduce the needs of
+/// vector extracts.
static void legalizeAndOptimizeInductions(VPlan &Plan) {
+ using namespace llvm::VPlanPatternMatch;
SmallVector<VPRecipeBase *> ToRemove;
VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
bool HasOnlyVectorVFs = !Plan.hasVF(ElementCount::getFixed(1));
VPBuilder Builder(HeaderVPBB, HeaderVPBB->getFirstNonPhi());
for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
+ auto *PhiR = dyn_cast<VPHeaderPHIRecipe>(&Phi);
+ if (!PhiR)
+ break;
+
+ // Check if any uniform VPReplicateRecipes using the phi recipe are used by
+ // ExtractFromEnd. Those must be replaced by a regular VPReplicateRecipe to
+ // ensure the final value is available.
+ // TODO: Remove once uniformity analysis is done on VPlan.
+ for (VPUser *U : collectUsersRecursively(PhiR)) {
+ auto *ExitIRI = dyn_cast<VPIRInstruction>(U);
+ VPValue *Op;
+ if (!ExitIRI || !match(ExitIRI->getOperand(0),
+ m_VPInstruction<VPInstruction::ExtractFromEnd>(
+ m_VPValue(Op), m_VPValue())))
+ continue;
+ auto *RepR = dyn_cast<VPReplicateRecipe>(Op);
+ if (!RepR || !RepR->isUniform())
+ continue;
+ assert(!RepR->isPredicated() && "RepR must not be predicated");
+ Instruction *I = RepR->getUnderlyingInstr();
+ auto *Clone =
+ new VPReplicateRecipe(I, RepR->operands(), /*IsUniform*/ false);
+ Clone->insertAfter(RepR);
+ RepR->replaceAllUsesWith(Clone);
+ }
+
// Replace wide pointer inductions which have only their scalars used by
// PtrAdd(IndStart, ScalarIVSteps (0, Step)).
if (auto *PtrIV = dyn_cast<VPWidenPointerInductionRecipe>(&Phi)) {
@@ -1086,18 +1128,6 @@ bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan,
return true;
}
-static SmallVector<VPUser *> collectUsersRecursively(VPValue *V) {
- SetVector<VPUser *> Users(V->user_begin(), V->user_end());
- for (unsigned I = 0; I != Users.size(); ++I) {
- VPRecipeBase *Cur = cast<VPRecipeBase>(Users[I]);
- if (isa<VPHeaderPHIRecipe>(Cur))
- continue;
- for (VPValue *V : Cur->definedValues())
- Users.insert(V->user_begin(), V->user_end());
- }
- return Users.takeVector();
-}
-
void VPlanTransforms::clearReductionWrapFlags(VPlan &Plan) {
for (VPRecipeBase &R :
Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
diff --git a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll
index f03ca2d6e23c6a..482f731afd8136 100644
--- a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll
+++ b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll
@@ -981,7 +981,6 @@ exit:
}
; Test case for https://github.com/llvm/llvm-project/issues/121745.
-; FIXME: At the moment an incorrect exit value is used for %iv.next.
define i32 @test_iv_uniform_with_outside_use_scev_simplification(ptr %dst) {
; VEC-LABEL: define i32 @test_iv_uniform_with_outside_use_scev_simplification(
; VEC-SAME: ptr [[DST:%.*]]) {
@@ -994,10 +993,12 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification(ptr %dst) {
; VEC: [[VECTOR_BODY]]:
; VEC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; VEC-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
+; VEC-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 1
; VEC-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[TMP0]]
; VEC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 0
; VEC-NEXT: store <2 x i16> zeroinitializer, ptr [[TMP2]], align 2
; VEC-NEXT: [[TMP4:%.*]] = add i32 [[STEP_2]], [[TMP0]]
+; VEC-NEXT: [[TMP5:%.*]] = add i32 [[STEP_2]], [[TMP6]]
; VEC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; VEC-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 8
; VEC-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
@@ -1014,7 +1015,7 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification(ptr %dst) {
; VEC-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[IV_NEXT]], 8
; VEC-NEXT: br i1 [[CMP_I]], label %[[LOOP]], label %[[E_EXIT]], {{!llvm.loop ![0-9]+}}
; VEC: [[E_EXIT]]:
-; VEC-NEXT: [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ [[TMP4]], %[[MIDDLE_BLOCK]] ]
+; VEC-NEXT: [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ]
; VEC-NEXT: ret i32 [[RES]]
;
; INTERLEAVE-LABEL: define i32 @test_iv_uniform_with_outside_use_scev_simplification(
@@ -1071,7 +1072,6 @@ e.exit:
ret i32 %res
}
-; FIXME: At the moment an incorrect exit value is used for %iv.next.
define i32 @test_iv_uniform_with_outside_use_scev_simplification_2(ptr %dst) {
; VEC-LABEL: define i32 @test_iv_uniform_with_outside_use_scev_simplification_2(
; VEC-SAME: ptr [[DST:%.*]]) {
More information about the llvm-commits
mailing list