[llvm] 92bfbbc - [VPlan] Invert condition if needed when creating inner regions. (#132292)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 28 01:40:48 PDT 2025
Author: Florian Hahn
Date: 2025-04-28T09:40:43+01:00
New Revision: 92bfbbc4e5f4c7c6a7b677b1da9765b2507a98ce
URL: https://github.com/llvm/llvm-project/commit/92bfbbc4e5f4c7c6a7b677b1da9765b2507a98ce
DIFF: https://github.com/llvm/llvm-project/commit/92bfbbc4e5f4c7c6a7b677b1da9765b2507a98ce.diff
LOG: [VPlan] Invert condition if needed when creating inner regions. (#132292)
As pointed out by @iamlouk in
https://github.com/llvm/llvm-project/pull/129402, the current code
doesn't handle latches with different successor orders correctly.
Introduce a `NOT`, if needed.
Depends on https://github.com/llvm/llvm-project/pull/129402
PR: https://github.com/llvm/llvm-project/pull/132292
Added:
Modified:
llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
llvm/test/Transforms/LoopVectorize/outer-loop-inner-latch-successors.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index b374371667b5e..5eb2f058f329f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -386,33 +386,54 @@ std::unique_ptr<VPlan> VPlanTransforms::buildPlainCFG(
/// Checks if \p HeaderVPB is a loop header block in the plain CFG; that is, it
/// has exactly 2 predecessors (preheader and latch), where the block
/// dominates the latch and the preheader dominates the block. If it is a
-/// header block return true, making sure the preheader appears first and
-/// the latch second. Otherwise return false.
-static bool canonicalHeader(VPBlockBase *HeaderVPB,
- const VPDominatorTree &VPDT) {
+/// header block return true and canonicalize the predecessors of the header
+/// (making sure the preheader appears first and the latch second) and the
+/// successors of the latch (making sure the loop exit comes first). Otherwise
+/// return false.
+static bool canonicalHeaderAndLatch(VPBlockBase *HeaderVPB,
+ const VPDominatorTree &VPDT) {
ArrayRef<VPBlockBase *> Preds = HeaderVPB->getPredecessors();
if (Preds.size() != 2)
return false;
auto *PreheaderVPBB = Preds[0];
auto *LatchVPBB = Preds[1];
- if (VPDT.dominates(PreheaderVPBB, HeaderVPB) &&
- VPDT.dominates(HeaderVPB, LatchVPBB))
- return true;
+ if (!VPDT.dominates(PreheaderVPBB, HeaderVPB) ||
+ !VPDT.dominates(HeaderVPB, LatchVPBB)) {
+ std::swap(PreheaderVPBB, LatchVPBB);
- std::swap(PreheaderVPBB, LatchVPBB);
+ if (!VPDT.dominates(PreheaderVPBB, HeaderVPB) ||
+ !VPDT.dominates(HeaderVPB, LatchVPBB))
+ return false;
- if (VPDT.dominates(PreheaderVPBB, HeaderVPB) &&
- VPDT.dominates(HeaderVPB, LatchVPBB)) {
- // Canonicalize predecessors of header so that preheader is first and latch
- // second.
+ // Canonicalize predecessors of header so that preheader is first and
+ // latch second.
HeaderVPB->swapPredecessors();
for (VPRecipeBase &R : cast<VPBasicBlock>(HeaderVPB)->phis())
R.swapOperands();
- return true;
}
- return false;
+ // The two successors of conditional branch match the condition, with the
+ // first successor corresponding to true and the second to false. We
+ // canonicalize the successors of the latch when introducing the region, such
+ // that the latch exits the region when its condition is true; invert the
+ // original condition if the original CFG branches to the header on true.
+ // Note that the exit edge is not yet connected for top-level loops.
+ if (LatchVPBB->getSingleSuccessor() ||
+ LatchVPBB->getSuccessors()[0] != HeaderVPB)
+ return true;
+
+ assert(LatchVPBB->getNumSuccessors() == 2 && "Must have 2 successors");
+ auto *Term = cast<VPBasicBlock>(LatchVPBB)->getTerminator();
+ assert(cast<VPInstruction>(Term)->getOpcode() ==
+ VPInstruction::BranchOnCond &&
+ "terminator must be a BranchOnCond");
+ auto *Not = new VPInstruction(VPInstruction::Not, {Term->getOperand(0)});
+ Not->insertBefore(Term);
+ Term->setOperand(0, Not);
+ LatchVPBB->swapSuccessors();
+
+ return true;
}
/// Create a new VPRegionBlock for the loop starting at \p HeaderVPB.
@@ -447,7 +468,7 @@ void VPlanTransforms::createLoopRegions(VPlan &Plan, Type *InductionTy,
VPDominatorTree VPDT;
VPDT.recalculate(Plan);
for (VPBlockBase *HeaderVPB : vp_depth_first_shallow(Plan.getEntry()))
- if (canonicalHeader(HeaderVPB, VPDT))
+ if (canonicalHeaderAndLatch(HeaderVPB, VPDT))
createLoopRegion(Plan, HeaderVPB);
VPRegionBlock *TopRegion = Plan.getVectorLoopRegion();
diff --git a/llvm/test/Transforms/LoopVectorize/outer-loop-inner-latch-successors.ll b/llvm/test/Transforms/LoopVectorize/outer-loop-inner-latch-successors.ll
index 388da8540646f..afd1308a2d24a 100644
--- a/llvm/test/Transforms/LoopVectorize/outer-loop-inner-latch-successors.ll
+++ b/llvm/test/Transforms/LoopVectorize/outer-loop-inner-latch-successors.ll
@@ -4,7 +4,6 @@
@A = common global [1024 x i64] zeroinitializer, align 16
@B = common global [1024 x i64] zeroinitializer, align 16
-; FIXME: The exit condition of the inner loop is incorrect when vectorizing.
define void @inner_latch_header_first_successor(i64 %N, i32 %c, i64 %M) {
; CHECK-LABEL: define void @inner_latch_header_first_successor(
; CHECK-SAME: i64 [[N:%.*]], i32 [[C:%.*]], i64 [[M:%.*]]) {
@@ -35,8 +34,9 @@ define void @inner_latch_header_first_successor(i64 %N, i32 %c, i64 %M) {
; CHECK-NEXT: [[TMP3]] = add nsw <4 x i64> [[TMP2]], [[VEC_PHI4]]
; CHECK-NEXT: [[TMP4]] = add nuw nsw <4 x i64> [[VEC_PHI]], splat (i64 1)
; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <4 x i64> [[TMP4]], [[BROADCAST_SPLAT2]]
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP5]], i32 0
-; CHECK-NEXT: br i1 [[TMP6]], label %[[VECTOR_LATCH]], label %[[INNER3]]
+; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true)
+; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0
+; CHECK-NEXT: br i1 [[TMP9]], label %[[VECTOR_LATCH]], label %[[INNER3]]
; CHECK: [[VECTOR_LATCH]]:
; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <4 x i64> [ [[TMP3]], %[[INNER3]] ]
; CHECK-NEXT: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[VEC_PHI6]], <4 x ptr> [[TMP0]], i32 4, <4 x i1> splat (i1 true))
More information about the llvm-commits
mailing list