[llvm] c482b8f - [VPlan] Only execute VPExpandSCEVRecipes once and remove them (NFC).
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Sun Mar 23 02:06:28 PDT 2025
Author: Florian Hahn
Date: 2025-03-23T09:06:01Z
New Revision: c482b8faeace855332a8c070cffaf3d0732cc79e
URL: https://github.com/llvm/llvm-project/commit/c482b8faeace855332a8c070cffaf3d0732cc79e
DIFF: https://github.com/llvm/llvm-project/commit/c482b8faeace855332a8c070cffaf3d0732cc79e.diff
LOG: [VPlan] Only execute VPExpandSCEVRecipes once and remove them (NFC).
Instead of executing the whole entry VPIRBB twice, first only execute
the VPExpandSCEVRecipes and replace their uses with the expanded
VPValue, which will be a live-in. This allows removing special logic in
VPExpandSCEVRecipe to support executing twice and allows moving the
ExpandedSCEVs map out of VPTransformState.
It will also allow adding other recipes to the entry VPBB in the future.
Added:
Modified:
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/lib/Transforms/Vectorize/VPlanHelpers.h
llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
llvm/test/Transforms/LoopVectorize/pr45259.ll
llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll
llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 823af82191c5a..8484288456634 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7695,8 +7695,21 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
// 0. Generate SCEV-dependent code in the entry, including TripCount, before
// making any changes to the CFG.
- if (!BestVPlan.getEntry()->empty())
- BestVPlan.getEntry()->execute(&State);
+ DenseMap<const SCEV *, Value *> ExpandedSCEVs;
+ auto *Entry = cast<VPIRBasicBlock>(BestVPlan.getEntry());
+ State.Builder.SetInsertPoint(Entry->getIRBasicBlock()->getTerminator());
+ for (VPRecipeBase &R : make_early_inc_range(*Entry)) {
+ auto *ExpSCEV = dyn_cast<VPExpandSCEVRecipe>(&R);
+ if (!ExpSCEV)
+ continue;
+ ExpSCEV->execute(State);
+ ExpandedSCEVs[ExpSCEV->getSCEV()] = State.get(ExpSCEV, VPLane(0));
+ VPValue *Exp = BestVPlan.getOrAddLiveIn(ExpandedSCEVs[ExpSCEV->getSCEV()]);
+ ExpSCEV->replaceAllUsesWith(Exp);
+ if (BestVPlan.getTripCount() == ExpSCEV)
+ BestVPlan.resetTripCount(Exp);
+ ExpSCEV->eraseFromParent();
+ }
if (!ILV.getTripCount())
ILV.setTripCount(State.get(BestVPlan.getTripCount(), VPLane(0)));
@@ -7706,9 +7719,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
// 1. Set up the skeleton for vectorization, including vector pre-header and
// middle block. The vector loop is created during VPlan execution.
- VPBasicBlock *VectorPH =
- cast<VPBasicBlock>(BestVPlan.getEntry()->getSingleSuccessor());
-
+ VPBasicBlock *VectorPH = cast<VPBasicBlock>(Entry->getSingleSuccessor());
State.CFG.PrevBB = ILV.createVectorizedLoopSkeleton();
if (VectorizingEpilogue)
VPlanTransforms::removeDeadRecipes(BestVPlan);
@@ -7821,7 +7832,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
}
}
- return State.ExpandedSCEVs;
+ return ExpandedSCEVs;
}
//===--------------------------------------------------------------------===//
diff --git a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h
index d53b72bb2258a..8e5b974d887f4 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h
@@ -355,10 +355,6 @@ struct VPTransformState {
/// memchecks. The actually versioning is performed manually.
LoopVersioning *LVer = nullptr;
- /// Map SCEVs to their expanded values. Populated when executing
- /// VPExpandSCEVRecipes.
- DenseMap<const SCEV *, Value *> ExpandedSCEVs;
-
/// VPlan-based type analysis.
VPTypeAnalysis TypeAnalysis;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 2af29c8c72471..c7190b3187d94 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -3444,23 +3444,10 @@ void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent,
void VPExpandSCEVRecipe::execute(VPTransformState &State) {
assert(!State.Lane && "cannot be used in per-lane");
- if (State.ExpandedSCEVs.contains(Expr)) {
- // SCEV Expr has already been expanded, result must already be set. At the
- // moment we have to execute the entry block twice (once before skeleton
- // creation to get expanded SCEVs used by the skeleton and once during
- // regular VPlan execution).
- State.Builder.SetInsertPoint(State.CFG.VPBB2IRBB[getParent()]);
- assert(State.get(this, VPLane(0)) == State.ExpandedSCEVs[Expr] &&
- "Results must match");
- return;
- }
-
const DataLayout &DL = SE.getDataLayout();
SCEVExpander Exp(SE, DL, "induction", /*PreserveLCSSA=*/true);
-
Value *Res = Exp.expandCodeFor(Expr, Expr->getType(),
&*State.Builder.GetInsertPoint());
- State.ExpandedSCEVs[Expr] = Res;
State.set(this, Res, VPLane(0));
}
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
index c21d229ec4f12..95df397ecdf41 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
@@ -152,11 +152,10 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
; CHECK-NEXT: Live-in ir<[[VF:%.+]]> = VF
; CHECK-NEXT: Live-in ir<[[VFxUF:%.+]]>.1 = VF * UF
; CHECK-NEXT: Live-in ir<[[VEC_TC:%.+]]> = vector-trip-count
-; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count
+; CHECK-NEXT: ir<%0> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<for.body.preheader>:
; CHECK-NEXT: IR %0 = zext i32 %n to i64
-; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (zext i32 %n to i64)
; CHECK-NEXT: Successor(s): ir-bb<scalar.ph>, ir-bb<vector.scevcheck>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<vector.scevcheck>:
@@ -213,7 +212,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
-; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq vp<[[TC]]>, ir<[[VEC_TC]]>
+; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<%0>, ir<[[VEC_TC]]>
; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]>
; CHECK-NEXT: Successor(s): ir-bb<for.cond.cleanup.loopexit>, ir-bb<scalar.ph>
; CHECK-EMPTY:
@@ -402,11 +401,10 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
; CHECK-NEXT: Live-in ir<[[VF:%.+]]> = VF
; CHECK-NEXT: Live-in ir<[[VFxUF:%.+]]>.1 = VF * UF
; CHECK-NEXT: Live-in ir<[[VEC_TC:%.+]]> = vector-trip-count
-; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count
+; CHECK-NEXT: ir<%0> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<for.body.preheader>:
; CHECK-NEXT: IR %0 = zext i32 %n to i64
-; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (zext i32 %n to i64)
; CHECK-NEXT: Successor(s): ir-bb<scalar.ph>, ir-bb<vector.scevcheck>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<vector.scevcheck>:
@@ -463,7 +461,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
-; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq vp<[[TC]]>, ir<[[VEC_TC]]>
+; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<%0>, ir<[[VEC_TC]]>
; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]>
; CHECK-NEXT: Successor(s): ir-bb<for.cond.cleanup.loopexit>, ir-bb<scalar.ph>
; CHECK-EMPTY:
diff --git a/llvm/test/Transforms/LoopVectorize/pr45259.ll b/llvm/test/Transforms/LoopVectorize/pr45259.ll
index 6baed089fb6b6..c7f2d7ac0bcf9 100644
--- a/llvm/test/Transforms/LoopVectorize/pr45259.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr45259.ll
@@ -16,12 +16,12 @@ define i8 @widget(ptr %arr, i8 %t9) {
; CHECK-NEXT: [[T1_0_LCSSA:%.*]] = phi ptr [ [[T1_0]], [[BB6]] ]
; CHECK-NEXT: [[T1_0_LCSSA4:%.*]] = phi ptr [ [[T1_0]], [[BB6]] ]
; CHECK-NEXT: [[T1_0_LCSSA1:%.*]] = phi ptr [ [[T1_0]], [[BB6]] ]
+; CHECK-NEXT: [[T1_0_LCSSA3:%.*]] = ptrtoint ptr [[T1_0_LCSSA]] to i64
+; CHECK-NEXT: [[T1_0_LCSSA2:%.*]] = ptrtoint ptr [[T1_0_LCSSA4]] to i64
; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[ARR1]] to i32
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 0, [[TMP0]]
-; CHECK-NEXT: [[T1_0_LCSSA3:%.*]] = ptrtoint ptr [[T1_0_LCSSA]] to i64
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[T1_0_LCSSA3]] to i32
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[T1_0_LCSSA2:%.*]] = ptrtoint ptr [[T1_0_LCSSA4]] to i64
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP3]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
; CHECK: vector.scevcheck:
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll b/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll
index 56ba4ccbe99e0..8374ac88c8bad 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll
@@ -5,10 +5,9 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
; CHECK: VPlan 'Final VPlan for VF={2},UF={1}' {
; CHECK-NEXT: Live-in ir<[[VFxUF:.+]]> = VF * UF
; CHECK-NEXT: Live-in ir<[[VTC:%.+]]> = vector-trip-count
-; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count
+; CHECK-NEXT: ir<%0> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
-; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV ((-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64))
; CHECK-NEXT: Successor(s): ir-bb<scalar.ph>, ir-bb<vector.ph>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<vector.ph>:
@@ -86,7 +85,7 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
-; CHECK-NEXT: EMIT vp<[[MIDDLE_CMP:%.+]]> = icmp eq vp<[[TC]]>, ir<[[VTC]]>
+; CHECK-NEXT: EMIT vp<[[MIDDLE_CMP:%.+]]> = icmp eq ir<%0>, ir<[[VTC]]>
; CHECK-NEXT: EMIT branch-on-cond vp<[[MIDDLE_CMP]]>
; CHECK-NEXT: Successor(s): ir-bb<exit>, ir-bb<scalar.ph>
; CHECK-EMPTY:
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll
index db3e31c1a15ae..eeabb9b6a85b0 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll
@@ -62,11 +62,10 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) {
; CHECK: Executing best plan with VF=8, UF=2
; CHECK-NEXT: VPlan 'Final VPlan for VF={8},UF={2}' {
; CHECK-NEXT: Live-in ir<[[VTC:%.+]]> = vector-trip-count
-; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count
+; CHECK-NEXT: ir<%and> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
; CHECK-NEXT: IR %and = and i64 %N, 15
-; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (zext i4 (trunc i64 %N to i4) to i64)
; CHECK-NEXT: Successor(s): ir-bb<scalar.ph>, ir-bb<vector.ph>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<vector.ph>:
@@ -92,7 +91,7 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) {
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
-; CHECK-NEXT: EMIT vp<[[C:%.+]]> = icmp eq vp<[[TC]]>, ir<[[VTC]]>
+; CHECK-NEXT: EMIT vp<[[C:%.+]]> = icmp eq ir<%and>, ir<[[VTC]]>
; CHECK-NEXT: EMIT branch-on-cond vp<[[C]]>
; CHECK-NEXT: Successor(s): ir-bb<exit>, ir-bb<scalar.ph>
; CHECK-EMPTY:
More information about the llvm-commits
mailing list