[llvm] [VPlan] Fix broadcasted values using loop region during execution (PR #142594)
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 3 05:38:07 PDT 2025
https://github.com/lukel97 created https://github.com/llvm/llvm-project/pull/142594
I noticed this after updating #118638 on top of #117506, and seeing that some broadcasts were no longer being hoisted into the loop preheader.
It was calling VPlan::getVectorPreheader(), which at this point now is dissolved and returns null.
This fixes it by getting the header from vputils::getFirstLoopHeader and getting the preheader from its first successor.
I've also added an assertion in getVectorPreheader() to make sure its not called after the regions are dissolved.
>From f2a577e28cf169c9fda2e31a2051968c8fd59eac Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Tue, 3 Jun 2025 13:32:55 +0100
Subject: [PATCH] [VPlan] Fix broadcasted values using loop region during
execution
I noticed this after updating #118638 on top of #117506, and seeing that some broadcasts were no longer being hoisted into the loop preheader.
It was calling VPlan::getVectorPreheader(), which at this point now is dissolved and returns null.
This fixes it by getting the header from vputils::getFirstLoopHeader and getting the preheader from its first successor.
I've also added an assertion in getVectorPreheader() to make sure its not called after the regions are dissolved.
---
llvm/lib/Transforms/Vectorize/VPlan.cpp | 14 +++++++++-----
llvm/lib/Transforms/Vectorize/VPlan.h | 8 +++-----
.../X86/pr141968-instsimplifyfolder.ll | 4 ++--
3 files changed, 14 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 280ea47c5d7cc..b69278bac047e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -290,18 +290,22 @@ Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) {
return Data.VPV2Vector[Def];
auto GetBroadcastInstrs = [this, Def](Value *V) {
+ VPBasicBlock *PreheaderVPBB = nullptr;
+ if (auto *Header = vputils::getFirstLoopHeader(*Plan, VPDT))
+ PreheaderVPBB = cast<VPBasicBlock>(Header->getPredecessors()[0]);
+
bool SafeToHoist =
- !Def->hasDefiningRecipe() ||
- VPDT.properlyDominates(Def->getDefiningRecipe()->getParent(),
- Plan->getVectorPreheader());
+ PreheaderVPBB &&
+ (!Def->hasDefiningRecipe() ||
+ VPDT.properlyDominates(Def->getDefiningRecipe()->getParent(),
+ PreheaderVPBB));
if (VF.isScalar())
return V;
// Place the code for broadcasting invariant variables in the new preheader.
IRBuilder<>::InsertPointGuard Guard(Builder);
if (SafeToHoist) {
- BasicBlock *LoopVectorPreHeader =
- CFG.VPBB2IRBB[Plan->getVectorPreheader()];
+ BasicBlock *LoopVectorPreHeader = CFG.VPBB2IRBB[PreheaderVPBB];
if (LoopVectorPreHeader)
Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator());
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 44f0b6d964a6e..736f600773094 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -3984,13 +3984,11 @@ class VPlan {
VPBasicBlock *getEntry() { return Entry; }
const VPBasicBlock *getEntry() const { return Entry; }
- /// Returns the preheader of the vector loop region, if one exists, or null
- /// otherwise.
+ /// Returns the preheader of the vector loop region, provided it exists.
VPBasicBlock *getVectorPreheader() {
VPRegionBlock *VectorRegion = getVectorLoopRegion();
- return VectorRegion
- ? cast<VPBasicBlock>(VectorRegion->getSinglePredecessor())
- : nullptr;
+ assert(VectorRegion && "vector loop region no longer exists?");
+ return cast<VPBasicBlock>(VectorRegion->getSinglePredecessor());
}
/// Returns the VPRegionBlock of the vector loop.
diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr141968-instsimplifyfolder.ll b/llvm/test/Transforms/LoopVectorize/X86/pr141968-instsimplifyfolder.ll
index 9a70ed451cf42..9d377a95e8252 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/pr141968-instsimplifyfolder.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr141968-instsimplifyfolder.ll
@@ -14,6 +14,8 @@ define i8 @pr141968(i1 %cond, i8 %v) {
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i1> poison, i1 [[COND]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i1> [[BROADCAST_SPLATINSERT]], <16 x i1> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: [[TMP0:%.*]] = xor <16 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT31:%.*]] = insertelement <16 x i8> poison, i8 [[V]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT32:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT31]], <16 x i8> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_SDIV_CONTINUE30:.*]] ]
@@ -97,8 +99,6 @@ define i8 @pr141968(i1 %cond, i8 %v) {
; CHECK: [[PRED_SDIV_IF29]]:
; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE30]]
; CHECK: [[PRED_SDIV_CONTINUE30]]:
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT31:%.*]] = insertelement <16 x i8> poison, i8 [[V]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT32:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT31]], <16 x i8> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: [[PREDPHI:%.*]] = select <16 x i1> [[BROADCAST_SPLAT]], <16 x i8> zeroinitializer, <16 x i8> [[BROADCAST_SPLAT32]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
More information about the llvm-commits
mailing list