[llvm] [VPlan] Introduces explicit broadcast for live-in constants. (PR #133213)
Elvis Wang via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 26 23:25:08 PDT 2025
https://github.com/ElvisWang123 created https://github.com/llvm/llvm-project/pull/133213
This patch focus on represent the broadcast for the live-in constants explicitly. This can help the VPlan-based cost model the broadcast cost and track the register pressure of the broadcast value in the future.
This patch will not change the generated vector IR and it only changes the output of VPlan.
Note that `materializeBroadcast()` pass will execute after cost model and register pressure model at this moment. So only affect the output of `Final` plan.
>From 5d523fbdc5516bc56c72575758952ed8c794b36a Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Wed, 26 Mar 2025 18:34:25 -0700
Subject: [PATCH] [VPlan] Introduces explicit broadcast for live-in constants.
This patch focus on explicit show the broadcast for the live-in
constants. This can help the VPlan-based cost model the broadcast cost
and track the register pressure of the broadcast value in the future.
Live-in constants usually only has single user so
insert the `broadcast` before the user to reduce the live range of the
broadcast value and prevent generated vector IR changes.
---
llvm/lib/Transforms/Vectorize/VPlan.h | 19 ++++++++++++++
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 2 ++
.../Transforms/Vectorize/VPlanTransforms.cpp | 26 ++++++++++++++-----
.../RISCV/riscv-vector-reverse.ll | 6 +++--
.../RISCV/vplan-vp-select-intrinsics.ll | 3 ++-
.../LoopVectorize/vplan-predicate-switch.ll | 6 +++--
6 files changed, 51 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index a7c85d30ba9f0..026f5b349987d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1127,6 +1127,17 @@ class VPWidenRecipe : public VPRecipeWithIRFlags {
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
#endif
+
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ switch (Opcode) {
+ default:
+ return false;
+ case Instruction::ExtractValue:
+ return Op == getOperand(1);
+ }
+ }
};
/// VPWidenCastRecipe is a recipe to create vector cast instructions.
@@ -1365,6 +1376,14 @@ class VPWidenCallRecipe : public VPRecipeWithIRFlags {
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
#endif
+
+ /// Returns true if the recipe only uses the first lane of operand \p Op.
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ // Scalar called fuction cannot be vectorized.
+ return Op == getOperand(getNumOperands() - 1);
+ }
};
/// A recipe representing a sequence of load -> update -> store as part of
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index cdef7972f3bdc..f992496735f0e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -915,6 +915,8 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
default:
return false;
case Instruction::ExtractElement:
+ case Instruction::ExtractValue:
+ case VPInstruction::ExtractFromEnd:
return Op == getOperand(1);
case Instruction::PHI:
return true;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 67f77b41e878a..9df744a0629c6 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -2207,10 +2207,7 @@ void VPlanTransforms::materializeBroadcasts(VPlan &Plan) {
auto *VectorPreheader = Plan.getVectorPreheader();
for (VPValue *VPV : VPValues) {
- if (all_of(VPV->users(),
- [VPV](VPUser *U) { return U->usesScalars(VPV); }) ||
- (VPV->isLiveIn() && VPV->getLiveInIRValue() &&
- isa<Constant>(VPV->getLiveInIRValue())))
+ if (all_of(VPV->users(), [VPV](VPUser *U) { return U->usesScalars(VPV); }))
continue;
// Add explicit broadcast at the insert point that dominates all users.
@@ -2227,8 +2224,25 @@ void VPlanTransforms::materializeBroadcasts(VPlan &Plan) {
"All users must be in the vector preheader or dominated by it");
}
- VPBuilder Builder(cast<VPBasicBlock>(HoistBlock), HoistPoint);
- auto *Broadcast = Builder.createNaryOp(VPInstruction::Broadcast, {VPV});
+ VPInstruction *Broadcast;
+ if (VPV->isLiveIn() && isa_and_nonnull<Constant>(VPV->getLiveInIRValue())) {
+ // We cannot replace the constant live-ins for PHIs by broadcast in the
+ // same VPBB because it will break PHI. Also cannot replace the
+ // VPWidenGEPRecipe since it broadcasts the generated pointer instead of
+ // operands.
+ if (auto *R = dyn_cast_if_present<VPRecipeBase>(*(VPV->users().begin()));
+ R && !isa<VPHeaderPHIRecipe, VPWidenPHIRecipe, VPWidenGEPRecipe>(R) &&
+ !VPV->hasMoreThanOneUniqueUser()) {
+ Broadcast = new VPInstruction(VPInstruction::Broadcast, {VPV});
+ // Insert just before the user to reduce register pressure.
+ Broadcast->insertBefore(R);
+ } else {
+ continue;
+ }
+ } else {
+ VPBuilder Builder(cast<VPBasicBlock>(HoistBlock), HoistPoint);
+ Broadcast = Builder.createNaryOp(VPInstruction::Broadcast, {VPV});
+ }
VPV->replaceUsesWithIf(Broadcast,
[VPV, Broadcast](VPUser &U, unsigned Idx) {
return Broadcast != &U && !U.usesScalars(VPV);
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
index 95df397ecdf41..8d79d18bc7f57 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
@@ -201,7 +201,8 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%B>, ir<%idxprom>
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-end-pointer inbounds ir<%arrayidx>, ir<[[VF]]>
; CHECK-NEXT: WIDEN ir<[[L:%.+]]> = load vp<[[VEC_PTR]]>
-; CHECK-NEXT: WIDEN ir<%add9> = add ir<[[L]]>, ir<1>
+; CHECK-NEXT: EMIT vp<[[BROADCAST:%.+]]> = broadcast ir<1>
+; CHECK-NEXT: WIDEN ir<%add9> = add ir<[[L]]>, vp<[[BROADCAST]]>
; CHECK-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%A>, ir<%idxprom>
; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-end-pointer inbounds ir<%arrayidx3>, ir<[[VF]]>
; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%add9>
@@ -450,7 +451,8 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%B>, ir<%idxprom>
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-end-pointer inbounds ir<%arrayidx>, ir<[[VF]]>
; CHECK-NEXT: WIDEN ir<[[L:%.+]]> = load vp<[[VEC_PTR]]>
-; CHECK-NEXT: WIDEN ir<%conv1> = fadd ir<[[L]]>, ir<1.000000e+00>
+; CHECK-NEXT: EMIT vp<[[BROADCAST:%.+]]> = broadcast ir<1.000000e+00>
+; CHECK-NEXT: WIDEN ir<%conv1> = fadd ir<[[L]]>, vp<[[BROADCAST]]>
; CHECK-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%A>, ir<%idxprom>
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-end-pointer inbounds ir<%arrayidx3>, ir<[[VF]]>
; CHECK-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<%conv1>
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll
index dfc2fffdad2bb..cddf2f0a00cfa 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll
@@ -44,7 +44,8 @@
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]>
; IF-EVL-NEXT: WIDEN ir<[[CMP:%.+]]> = icmp sgt ir<[[LD1]]>, ir<[[LD2]]>
- ; IF-EVL-NEXT: WIDEN ir<[[SUB:%.+]]> = sub ir<0>, ir<[[LD2]]>
+ ; IF-EVL-NEXT: EMIT vp<[[BROADCAST:%.+]]> = broadcast ir<0>
+ ; IF-EVL-NEXT: WIDEN ir<[[SUB:%.+]]> = sub vp<[[BROADCAST]]>, ir<[[LD2]]>
; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[SELECT:%.+]]> = call llvm.vp.select(ir<[[CMP]]>, ir<[[LD2]]>, ir<[[SUB]]>, vp<[[EVL]]>)
; IF-EVL-NEXT: WIDEN ir<[[ADD:%.+]]> = add vp<[[SELECT]]>, ir<[[LD1]]>
; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll b/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll
index 8374ac88c8bad..299ba9c8a0e95 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll
@@ -23,8 +23,10 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
; CHECK-NEXT: EMIT vp<[[PTR:%.+]]> = ptradd ir<%start>, vp<[[STEPS]]>
; CHECK-NEXT: vp<[[WIDE_PTR:%.+]]> = vector-pointer vp<[[PTR]]>
; CHECK-NEXT: WIDEN ir<%l> = load vp<[[WIDE_PTR]]>
-; CHECK-NEXT: EMIT vp<[[C1:%.+]]> = icmp eq ir<%l>, ir<-12>
-; CHECK-NEXT: EMIT vp<[[C2:%.+]]> = icmp eq ir<%l>, ir<13>
+; CHECK-NEXT: EMIT vp<[[BROADCAST1:%.+]]> = broadcast ir<-12>
+; CHECK-NEXT: EMIT vp<[[C1:%.+]]> = icmp eq ir<%l>, vp<[[BROADCAST1]]>
+; CHECK-NEXT: EMIT vp<[[BROADCAST2:%.+]]> = broadcast ir<13>
+; CHECK-NEXT: EMIT vp<[[C2:%.+]]> = icmp eq ir<%l>, vp<[[BROADCAST2]]>
; CHECK-NEXT: EMIT vp<[[OR_CASES:%.+]]> = or vp<[[C1]]>, vp<[[C2]]>
; CHECK-NEXT: EMIT vp<[[DEFAULT_MASK:%.+]]> = not vp<[[OR_CASES]]>
; CHECK-NEXT: Successor(s): pred.store
More information about the llvm-commits
mailing list