[llvm] [VPlan] Invert condition if needed when creating inner regions. (PR #132292)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Sat Apr 26 13:29:48 PDT 2025
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/132292
>From 29bfcd064cf5d26d03d84857c678173386e565d4 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 20 Mar 2025 21:41:54 +0000
Subject: [PATCH 1/2] [VPlan] Invert condition if needed when creating inner
regions.
---
llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp | 11 +++++++++++
llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 3 +++
.../outer-loop-inner-latch-successors.ll | 6 +++---
3 files changed, 17 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index f24d42256caef..b803081804aac 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -420,6 +420,17 @@ static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB) {
auto *PreheaderVPBB = HeaderVPB->getPredecessors()[0];
auto *LatchVPBB = HeaderVPB->getPredecessors()[1];
+ // We are canonicalizing the successors of the latch when introducing the
+ // region. We will exit the region of the latch condition is true; invert the
+ // original condition if the original CFG branches to the header on true.
+ if (!LatchVPBB->getSingleSuccessor() &&
+ LatchVPBB->getSuccessors()[0] == HeaderVPB) {
+ auto *Term = cast<VPBasicBlock>(LatchVPBB)->getTerminator();
+ auto *Not = new VPInstruction(VPInstruction::Not, {Term->getOperand(0)});
+ Not->insertBefore(Term);
+ Term->setOperand(0, Not);
+ }
+
VPBlockUtils::disconnectBlocks(PreheaderVPBB, HeaderVPB);
VPBlockUtils::disconnectBlocks(LatchVPBB, HeaderVPB);
VPBlockBase *Succ = LatchVPBB->getSingleSuccessor();
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 3d82742f0adab..ac12b0923b5ab 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -55,6 +55,9 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(
make_early_inc_range(make_range(VPBB->begin(), EndIter))) {
VPValue *VPV = Ingredient.getVPSingleValue();
+ if (!VPV->getUnderlyingValue())
+ continue;
+
Instruction *Inst = cast<Instruction>(VPV->getUnderlyingValue());
VPRecipeBase *NewRecipe = nullptr;
diff --git a/llvm/test/Transforms/LoopVectorize/outer-loop-inner-latch-successors.ll b/llvm/test/Transforms/LoopVectorize/outer-loop-inner-latch-successors.ll
index 388da8540646f..afd1308a2d24a 100644
--- a/llvm/test/Transforms/LoopVectorize/outer-loop-inner-latch-successors.ll
+++ b/llvm/test/Transforms/LoopVectorize/outer-loop-inner-latch-successors.ll
@@ -4,7 +4,6 @@
@A = common global [1024 x i64] zeroinitializer, align 16
@B = common global [1024 x i64] zeroinitializer, align 16
-; FIXME: The exit condition of the inner loop is incorrect when vectorizing.
define void @inner_latch_header_first_successor(i64 %N, i32 %c, i64 %M) {
; CHECK-LABEL: define void @inner_latch_header_first_successor(
; CHECK-SAME: i64 [[N:%.*]], i32 [[C:%.*]], i64 [[M:%.*]]) {
@@ -35,8 +34,9 @@ define void @inner_latch_header_first_successor(i64 %N, i32 %c, i64 %M) {
; CHECK-NEXT: [[TMP3]] = add nsw <4 x i64> [[TMP2]], [[VEC_PHI4]]
; CHECK-NEXT: [[TMP4]] = add nuw nsw <4 x i64> [[VEC_PHI]], splat (i64 1)
; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <4 x i64> [[TMP4]], [[BROADCAST_SPLAT2]]
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP5]], i32 0
-; CHECK-NEXT: br i1 [[TMP6]], label %[[VECTOR_LATCH]], label %[[INNER3]]
+; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true)
+; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0
+; CHECK-NEXT: br i1 [[TMP9]], label %[[VECTOR_LATCH]], label %[[INNER3]]
; CHECK: [[VECTOR_LATCH]]:
; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <4 x i64> [ [[TMP3]], %[[INNER3]] ]
; CHECK-NEXT: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[VEC_PHI6]], <4 x ptr> [[TMP0]], i32 4, <4 x i1> splat (i1 true))
>From fa812b5c298cdfd75ff30eaa07623f8e7e6e5230 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sat, 26 Apr 2025 21:28:08 +0100
Subject: [PATCH 2/2] !fixup address latest comments, thanks
---
.../Vectorize/VPlanConstruction.cpp | 65 +++++++++++--------
1 file changed, 37 insertions(+), 28 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index b803081804aac..9ae6d333c5310 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -386,51 +386,60 @@ std::unique_ptr<VPlan> VPlanTransforms::buildPlainCFG(
/// Checks if \p HeaderVPB is a loop header block in the plain CFG; that is, it
/// has exactly 2 predecessors (preheader and latch), where the block
/// dominates the latch and the preheader dominates the block. If it is a
-/// header block return true, making sure the preheader appears first and
-/// the latch second. Otherwise return false.
-static bool canonicalHeader(VPBlockBase *HeaderVPB,
- const VPDominatorTree &VPDT) {
+/// header block return true and canonicalize the predecessors of the header
+/// (making sure the preheader appears first and the latch second) and the
+/// successors of the latch (making sure the loop exit comes first). Otherwise
+/// return false.
+static bool canonicalHeaderAndLatch(VPBlockBase *HeaderVPB,
+ const VPDominatorTree &VPDT) {
ArrayRef<VPBlockBase *> Preds = HeaderVPB->getPredecessors();
if (Preds.size() != 2)
return false;
auto *PreheaderVPBB = Preds[0];
auto *LatchVPBB = Preds[1];
- if (VPDT.dominates(PreheaderVPBB, HeaderVPB) &&
- VPDT.dominates(HeaderVPB, LatchVPBB))
- return true;
-
- std::swap(PreheaderVPBB, LatchVPBB);
+ if (!VPDT.dominates(PreheaderVPBB, HeaderVPB) ||
+ !VPDT.dominates(HeaderVPB, LatchVPBB)) {
+ std::swap(PreheaderVPBB, LatchVPBB);
- if (VPDT.dominates(PreheaderVPBB, HeaderVPB) &&
- VPDT.dominates(HeaderVPB, LatchVPBB)) {
- // Canonicalize predecessors of header so that preheader is first and latch
- // second.
- HeaderVPB->swapPredecessors();
- for (VPRecipeBase &R : cast<VPBasicBlock>(HeaderVPB)->phis())
- R.swapOperands();
- return true;
+ if (!VPDT.dominates(PreheaderVPBB, HeaderVPB) ||
+ !VPDT.dominates(HeaderVPB, LatchVPBB)) {
+ return false;
+ } else {
+ // Canonicalize predecessors of header so that preheader is first and
+ // latch second.
+ HeaderVPB->swapPredecessors();
+ for (VPRecipeBase &R : cast<VPBasicBlock>(HeaderVPB)->phis())
+ R.swapOperands();
+ }
}
- return false;
-}
-
-/// Create a new VPRegionBlock for the loop starting at \p HeaderVPB.
-static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB) {
- auto *PreheaderVPBB = HeaderVPB->getPredecessors()[0];
- auto *LatchVPBB = HeaderVPB->getPredecessors()[1];
-
- // We are canonicalizing the successors of the latch when introducing the
- // region. We will exit the region of the latch condition is true; invert the
+ // The two successors of conditional branch match the condition, with the
+ // first successor corresponding to true and the second to false. We
+ // canonicalize the successors of the latch when introducing the region, such
+ // that the latch exits the region when its condition is true; invert the
// original condition if the original CFG branches to the header on true.
if (!LatchVPBB->getSingleSuccessor() &&
LatchVPBB->getSuccessors()[0] == HeaderVPB) {
+ assert(LatchVPBB->getNumSuccessors() == 2 && "Must have 2 successors");
auto *Term = cast<VPBasicBlock>(LatchVPBB)->getTerminator();
+ assert(cast<VPInstruction>(Term)->getOpcode() ==
+ VPInstruction::BranchOnCond &&
+ "terminator must be a BranchOnCond");
auto *Not = new VPInstruction(VPInstruction::Not, {Term->getOperand(0)});
Not->insertBefore(Term);
Term->setOperand(0, Not);
+ LatchVPBB->swapSuccessors();
}
+ return true;
+}
+
+/// Create a new VPRegionBlock for the loop starting at \p HeaderVPB.
+static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB) {
+ auto *PreheaderVPBB = HeaderVPB->getPredecessors()[0];
+ auto *LatchVPBB = HeaderVPB->getPredecessors()[1];
+
VPBlockUtils::disconnectBlocks(PreheaderVPBB, HeaderVPB);
VPBlockUtils::disconnectBlocks(LatchVPBB, HeaderVPB);
VPBlockBase *Succ = LatchVPBB->getSingleSuccessor();
@@ -458,7 +467,7 @@ void VPlanTransforms::createLoopRegions(VPlan &Plan, Type *InductionTy,
VPDominatorTree VPDT;
VPDT.recalculate(Plan);
for (VPBlockBase *HeaderVPB : vp_depth_first_shallow(Plan.getEntry()))
- if (canonicalHeader(HeaderVPB, VPDT))
+ if (canonicalHeaderAndLatch(HeaderVPB, VPDT))
createLoopRegion(Plan, HeaderVPB);
VPRegionBlock *TopRegion = Plan.getVectorLoopRegion();
More information about the llvm-commits
mailing list