[llvm] [VPlan] Rewrite sinkScalarOperands (PR #151696)
Ramkumar Ramachandra via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 16 03:00:07 PDT 2025
https://github.com/artagnon updated https://github.com/llvm/llvm-project/pull/151696
>From 402ba0faf1e87aa58ee939b4f960b7e00c9e5142 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Fri, 1 Aug 2025 13:50:05 +0100
Subject: [PATCH] [VPlan] Rewrite sinkScalarOperands
Rewrite sinkScalarOperands in VPlanTransforms for clarity, with minimal
test changes.
---
.../Transforms/Vectorize/VPlanTransforms.cpp | 73 ++++++++++---------
...-order-recurrence-sink-replicate-region.ll | 4 +-
...eref-pred-poison-ub-ops-feeding-pointer.ll | 12 +--
3 files changed, 46 insertions(+), 43 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 503140213c116..500270398ac96 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -131,60 +131,63 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(
}
static bool sinkScalarOperands(VPlan &Plan) {
- auto Iter = vp_depth_first_deep(Plan.getEntry());
+ bool ScalarVFOnly = Plan.hasScalarVFOnly();
bool Changed = false;
// First, collect the operands of all recipes in replicate blocks as seeds for
// sinking.
SetVector<std::pair<VPBasicBlock *, VPSingleDefRecipe *>> WorkList;
- for (VPRegionBlock *VPR : VPBlockUtils::blocksOnly<VPRegionBlock>(Iter)) {
+ for (VPRegionBlock *VPR : VPBlockUtils::blocksOnly<VPRegionBlock>(
+ vp_depth_first_deep(Plan.getEntry()))) {
VPBasicBlock *EntryVPBB = VPR->getEntryBasicBlock();
if (!VPR->isReplicator() || EntryVPBB->getSuccessors().size() != 2)
continue;
- VPBasicBlock *VPBB = dyn_cast<VPBasicBlock>(EntryVPBB->getSuccessors()[0]);
+ VPBasicBlock *VPBB =
+ dyn_cast<VPBasicBlock>(EntryVPBB->getSuccessors().front());
if (!VPBB || VPBB->getSingleSuccessor() != VPR->getExitingBasicBlock())
continue;
for (auto &Recipe : *VPBB) {
- for (VPValue *Op : Recipe.operands())
- if (auto *Def =
- dyn_cast_or_null<VPSingleDefRecipe>(Op->getDefiningRecipe()))
- WorkList.insert({VPBB, Def});
+ for (VPValue *Op : Recipe.operands()) {
+ auto *Def =
+ dyn_cast_or_null<VPSingleDefRecipe>(Op->getDefiningRecipe());
+ if (!Def)
+ continue;
+
+ // We only know how to duplicate VPReplicateRecipes and
+ // VPScalarIVStepsRecipes for now.
+ if (!isa<VPReplicateRecipe, VPScalarIVStepsRecipe>(Def))
+ continue;
+
+ if (Def->getParent() == VPBB || Def->mayHaveSideEffects() ||
+ Def->mayReadOrWriteMemory())
+ continue;
+
+ if (auto *RepR = dyn_cast<VPReplicateRecipe>(Op))
+ if (!ScalarVFOnly && RepR->isSingleScalar())
+ continue;
+
+ WorkList.insert({VPBB, Def});
+ }
}
}
- bool ScalarVFOnly = Plan.hasScalarVFOnly();
// Try to sink each replicate or scalar IV steps recipe in the worklist.
- for (unsigned I = 0; I != WorkList.size(); ++I) {
+ for (const auto &Item : WorkList) {
VPBasicBlock *SinkTo;
VPSingleDefRecipe *SinkCandidate;
- std::tie(SinkTo, SinkCandidate) = WorkList[I];
- if (SinkCandidate->getParent() == SinkTo ||
- SinkCandidate->mayHaveSideEffects() ||
- SinkCandidate->mayReadOrWriteMemory())
- continue;
- if (auto *RepR = dyn_cast<VPReplicateRecipe>(SinkCandidate)) {
- if (!ScalarVFOnly && RepR->isSingleScalar())
- continue;
- } else if (!isa<VPScalarIVStepsRecipe>(SinkCandidate))
- continue;
+ std::tie(SinkTo, SinkCandidate) = Item;
- bool NeedsDuplicating = false;
// All recipe users of the sink candidate must be in the same block SinkTo
- // or all users outside of SinkTo must be uniform-after-vectorization (
- // i.e., only first lane is used) . In the latter case, we need to duplicate
- // SinkCandidate.
- auto CanSinkWithUser = [SinkTo, &NeedsDuplicating,
- SinkCandidate](VPUser *U) {
- auto *UI = cast<VPRecipeBase>(U);
- if (UI->getParent() == SinkTo)
- return true;
- NeedsDuplicating = UI->onlyFirstLaneUsed(SinkCandidate);
- // We only know how to duplicate VPReplicateRecipes and
- // VPScalarIVStepsRecipes for now.
- return NeedsDuplicating &&
- isa<VPReplicateRecipe, VPScalarIVStepsRecipe>(SinkCandidate);
- };
- if (!all_of(SinkCandidate->users(), CanSinkWithUser))
+ // or all users outside of SinkTo must have only their first lane used. In
+ // the latter case, we need to duplicate SinkCandidate.
+ auto UsersOutsideSinkTo =
+ make_filter_range(SinkCandidate->users(), [SinkTo](VPUser *U) {
+ return cast<VPRecipeBase>(U)->getParent() != SinkTo;
+ });
+ if (any_of(UsersOutsideSinkTo, [SinkCandidate](VPUser *U) {
+ return !U->onlyFirstLaneUsed(SinkCandidate);
+ }))
continue;
+ bool NeedsDuplicating = !UsersOutsideSinkTo.empty();
if (NeedsDuplicating) {
if (ScalarVFOnly)
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll
index 9deab9063d710..f9648c50e3876 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll
@@ -133,8 +133,8 @@ define void @sink_replicate_region_2(i32 %x, i8 %y, ptr %ptr) optsize {
; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
; CHECK-EMPTY:
; CHECK-NEXT: pred.store.if:
-; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<[[SPLICE]]>, ir<%x>
+; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, vp<[[STEPS]]>
; CHECK-NEXT: REPLICATE ir<%add> = add ir<%rem>, ir<%recur.next>
; CHECK-NEXT: REPLICATE store ir<%add>, ir<%gep>
@@ -292,8 +292,8 @@ define void @sink_replicate_region_4_requires_split_at_end_of_block(i32 %x, ptr
; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
; CHECK-EMPTY:
; CHECK: pred.store.if:
-; CHECK-NEXT: REPLICATE ir<%lv.2> = load ir<%gep>
; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<[[SPLICE]]>, ir<%x>
+; CHECK-NEXT: REPLICATE ir<%lv.2> = load ir<%gep>
; CHECK-NEXT: REPLICATE ir<%conv.lv.2> = sext ir<%lv.2>
; CHECK-NEXT: REPLICATE ir<%add.1> = add ir<%conv>, ir<%rem>
; CHECK-NEXT: REPLICATE ir<%gep.dst> = getelementptr ir<%dst>, vp<[[STEPS]]>
diff --git a/llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll b/llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll
index 70e730f0284c0..19e87c12dbee3 100644
--- a/llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll
+++ b/llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll
@@ -44,10 +44,10 @@ define void @ptr_depends_on_sdiv(ptr noalias %dst, i16 noundef %off) {
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
; CHECK-NEXT: br i1 [[TMP19]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
+; CHECK-NEXT: [[TMP10:%.*]] = add i16 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i16> [[TMP23]], i32 0
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP13]]
; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP24]], align 1
-; CHECK-NEXT: [[TMP10:%.*]] = add i16 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP10]]
; CHECK-NEXT: store i64 [[TMP9]], ptr [[TMP11]], align 1
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
@@ -55,10 +55,10 @@ define void @ptr_depends_on_sdiv(ptr noalias %dst, i16 noundef %off) {
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1
; CHECK-NEXT: br i1 [[TMP12]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4]]
; CHECK: [[PRED_STORE_IF3]]:
+; CHECK-NEXT: [[TMP16:%.*]] = add i16 [[OFFSET_IDX]], 1
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i16> [[TMP23]], i32 1
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP25]]
; CHECK-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP20]], align 1
-; CHECK-NEXT: [[TMP16:%.*]] = add i16 [[OFFSET_IDX]], 1
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP16]]
; CHECK-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 1
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE4]]
@@ -120,10 +120,10 @@ define void @ptr_depends_on_possibly_poison_value(ptr noalias %dst, i16 %off) {
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
+; CHECK-NEXT: [[TMP7:%.*]] = add i16 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i16> [[TMP3]], i32 0
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP5]]
; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP14]], align 1
-; CHECK-NEXT: [[TMP7:%.*]] = add i16 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP7]]
; CHECK-NEXT: store i64 [[TMP9]], ptr [[TMP8]], align 1
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
@@ -131,10 +131,10 @@ define void @ptr_depends_on_possibly_poison_value(ptr noalias %dst, i16 %off) {
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]]
; CHECK: [[PRED_STORE_IF1]]:
+; CHECK-NEXT: [[TMP11:%.*]] = add i16 [[OFFSET_IDX]], 1
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i16> [[TMP3]], i32 1
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP15]]
; CHECK-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP16]], align 1
-; CHECK-NEXT: [[TMP11:%.*]] = add i16 [[OFFSET_IDX]], 1
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP11]]
; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 1
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]]
@@ -266,10 +266,10 @@ define void @ptr_depends_on_possibly_poison_value_from_load(ptr noalias %dst) {
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
; CHECK-NEXT: br i1 [[TMP7]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
+; CHECK-NEXT: [[TMP8:%.*]] = add i16 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i16> [[TMP4]], i32 0
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP6]]
; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP15]], align 1
-; CHECK-NEXT: [[TMP8:%.*]] = add i16 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP8]]
; CHECK-NEXT: store i64 [[TMP10]], ptr [[TMP9]], align 1
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
@@ -277,10 +277,10 @@ define void @ptr_depends_on_possibly_poison_value_from_load(ptr noalias %dst) {
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1
; CHECK-NEXT: br i1 [[TMP11]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]]
; CHECK: [[PRED_STORE_IF1]]:
+; CHECK-NEXT: [[TMP12:%.*]] = add i16 [[OFFSET_IDX]], 1
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i16> [[TMP4]], i32 1
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP16]]
; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP17]], align 1
-; CHECK-NEXT: [[TMP12:%.*]] = add i16 [[OFFSET_IDX]], 1
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP12]]
; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP13]], align 1
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]]
More information about the llvm-commits
mailing list