[llvm] [LoopVectorize] Add initial support to VPRegionBlock for multiple successors (PR #108563)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 13 07:00:52 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: David Sherwood (david-arm)
<details>
<summary>Changes</summary>
This first patch changes VPRegionBlock to support both SESE (single-entry-single-exit) and SEME (single-entry-multiple-exit) CFGs. Currently it only permits a maximum of two successors, i.e. one normal exit from the vector latch to the middle block successor, and an early exit from a non-latch block. This is in preparation for PR #<!-- -->88385, which will start vectorising more simple cases of early exit loops. In the original implementation of PR #<!-- -->88385 I wasn't really modelling the early exit correctly in the CFG, and this patch is the first step to do this properly.
I've modelled this by adding new EarlyExiting block to the VPRegionBlock class, which for normal loops this value is null. I also added a new getNumExitingBlocks method to VPRegionBlock, which returns either 1 or 2, depending upon whether EarlyExiting is null or not. When verifying the vplan we assert that the number of successors matches the number of exiting blocks. This patch also ensures the VPlanTransforms::optimize() handles the new CFG sensibly, for example it has to be careful not to merge the latch block into a predecessor that is an early exiting block.
If we assume that the middle block is always the first successor, then we don't need to maintain a map between successors and exiting blocks.
The only way that I can test the CFG right now is via unit tests, which I've added to
unittests/Transforms/Vectorize/VPlanVerifierTest.cpp
---
Full diff: https://github.com/llvm/llvm-project/pull/108563.diff
4 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/VPlan.cpp (+9-1)
- (modified) llvm/lib/Transforms/Vectorize/VPlan.h (+37-7)
- (modified) llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp (+16-1)
- (modified) llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp (+133)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 316468651df1ab..5da8c320519605 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -628,7 +628,11 @@ const VPRecipeBase *VPBasicBlock::getTerminator() const {
}
bool VPBasicBlock::isExiting() const {
- return getParent() && getParent()->getExitingBasicBlock() == this;
+ const VPRegionBlock *VPRB = getParent();
+ if (!VPRB)
+ return false;
+ return VPRB->getExitingBasicBlock() == this ||
+ VPRB->getEarlyExiting() == this;
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -989,6 +993,9 @@ static void replaceVPBBWithIRVPBB(VPBasicBlock *VPBB, BasicBlock *IRBB) {
/// Assumes a single pre-header basic-block was created for this. Introduce
/// additional basic-blocks as needed, and fill them all.
void VPlan::execute(VPTransformState *State) {
+ assert(!getVectorLoopRegion()->getEarlyExiting() &&
+ "Executing vplans with an early exit not yet supported");
+
// Initialize CFG state.
State->CFG.PrevVPBB = nullptr;
State->CFG.ExitBB = State->CFG.PrevBB->getSingleSuccessor();
@@ -1007,6 +1014,7 @@ void VPlan::execute(VPTransformState *State) {
BasicBlock *MiddleBB = State->CFG.ExitBB;
VPBasicBlock *MiddleVPBB =
cast<VPBasicBlock>(getVectorLoopRegion()->getSingleSuccessor());
+
// Find the VPBB for the scalar preheader, relying on the current structure
// when creating the middle block and its successrs: if there's a single
// predecessor, it must be the scalar preheader. Otherwise, the second
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 64242e43c56bc8..8c63156b042f3e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -3236,21 +3236,33 @@ class VPIRBasicBlock : public VPBasicBlock {
};
/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
-/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
+/// which form a Single-Entry-Single-Exiting or Single-Entry-Multiple-Exiting
+/// subgraph of the output IR CFG. For the multiple-exiting case only a total
+/// of two exits are currently supported and the early exit is tracked
+/// separately. The first successor should always correspond to the normal
+/// exiting block, i.e. vector latch -> middle.block. An optional second
+/// successor corresponds to the early exit.
/// A VPRegionBlock may indicate that its contents are to be replicated several
/// times. This is designed to support predicated scalarization, in which a
/// scalar if-then code structure needs to be generated VF * UF times. Having
/// this replication indicator helps to keep a single model for multiple
/// candidate VF's. The actual replication takes place only once the desired VF
/// and UF have been determined.
+/// TODO: The SEME case is a work in progress and any attempt to execute a
+/// VPlan containing a region with multiple exits will assert.
class VPRegionBlock : public VPBlockBase {
- /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
+ /// Hold the Single Entry of the SESE/SEME region modelled by the
+ /// VPRegionBlock.
VPBlockBase *Entry;
- /// Hold the Single Exiting block of the SESE region modelled by the
+ /// Hold the normal Exiting block of the SESE/SEME region modelled by the
/// VPRegionBlock.
VPBlockBase *Exiting;
+ /// Hold the Early Exiting block of the SEME region. If this is a SESE region
+ /// this value should be nullptr.
+ VPBlockBase *EarlyExiting;
+
/// An indicator whether this region is to generate multiple replicated
/// instances of output IR corresponding to its VPBlockBases.
bool IsReplicator;
@@ -3259,7 +3271,7 @@ class VPRegionBlock : public VPBlockBase {
VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
const std::string &Name = "", bool IsReplicator = false)
: VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting),
- IsReplicator(IsReplicator) {
+ EarlyExiting(nullptr), IsReplicator(IsReplicator) {
assert(Entry->getPredecessors().empty() && "Entry block has predecessors.");
assert(Exiting->getSuccessors().empty() && "Exit block has successors.");
Entry->setParent(this);
@@ -3267,7 +3279,7 @@ class VPRegionBlock : public VPBlockBase {
}
VPRegionBlock(const std::string &Name = "", bool IsReplicator = false)
: VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exiting(nullptr),
- IsReplicator(IsReplicator) {}
+ EarlyExiting(nullptr), IsReplicator(IsReplicator) {}
~VPRegionBlock() override {
if (Entry) {
@@ -3297,8 +3309,8 @@ class VPRegionBlock : public VPBlockBase {
const VPBlockBase *getExiting() const { return Exiting; }
VPBlockBase *getExiting() { return Exiting; }
- /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
- /// ExitingBlock must have no successors.
+ /// Set \p ExitingBlock as the normal exiting VPBlockBase of this
+ /// VPRegionBlock. \p ExitingBlock must have no successors.
void setExiting(VPBlockBase *ExitingBlock) {
assert(ExitingBlock->getSuccessors().empty() &&
"Exit block cannot have successors.");
@@ -3306,6 +3318,24 @@ class VPRegionBlock : public VPBlockBase {
ExitingBlock->setParent(this);
}
+ /// Set \p EarlyExitingBlock as the early exiting VPBlockBase of this
+ /// VPRegionBlock. \p EarlyExitingBlock must have a successor, since
+ /// it cannot be the latch.
+ void setEarlyExiting(VPBlockBase *EarlyExitingBlock) {
+ assert(EarlyExitingBlock->getNumSuccessors() == 1 &&
+ "Early exit block must have a successor.");
+ assert(EarlyExitingBlock->getParent() == this &&
+ "Early exit block should already be in loop region");
+ EarlyExiting = EarlyExitingBlock;
+ }
+
+ const VPBlockBase *getEarlyExiting() const { return EarlyExiting; }
+ VPBlockBase *getEarlyExiting() { return EarlyExiting; }
+
+ /// Return the number of exiting blocks from this region. It should match
+ /// the number of successors.
+ unsigned getNumExitingBlocks() const { return EarlyExiting ? 2 : 1; }
+
/// Returns the pre-header VPBasicBlock of the loop region.
VPBasicBlock *getPreheaderVPBB() {
assert(!isReplicator() && "should only get pre-header of loop regions");
diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
index dfddb5b45f623d..2f394eeaa544ec 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
@@ -186,7 +186,9 @@ static bool hasDuplicates(const SmallVectorImpl<VPBlockBase *> &VPBlockVec) {
bool VPlanVerifier::verifyBlock(const VPBlockBase *VPB) {
auto *VPBB = dyn_cast<VPBasicBlock>(VPB);
// Check block's condition bit.
- if (VPB->getNumSuccessors() > 1 ||
+ // NOTE: A VPRegionBlock can legally have multiple successors due to
+ // early exits from the loop.
+ if ((VPB->getNumSuccessors() > 1 && !isa<VPRegionBlock>(VPB)) ||
(VPBB && VPBB->getParent() && VPBB->isExiting() &&
!VPBB->getParent()->isReplicator())) {
if (!VPBB || !VPBB->getTerminator()) {
@@ -210,6 +212,19 @@ bool VPlanVerifier::verifyBlock(const VPBlockBase *VPB) {
return false;
}
+ // If this is a loop region with multiple successors it must have as many
+ // exiting blocks as successors, even if the original scalar loop only had a
+ // single exit block. That's because in the vector loop we always create a
+ // middle block for the vector latch exit, which is distinct from the early
+ // exit.
+ auto *VPRB = dyn_cast<VPRegionBlock>(VPB);
+ if (VPRB && VPRB->getNumExitingBlocks() != VPB->getNumSuccessors()) {
+ errs() << "Number of exiting blocks (" << VPRB->getNumExitingBlocks()
+ << ") does not match number of successors ("
+ << VPB->getNumSuccessors() << ")!\n";
+ return false;
+ }
+
for (const VPBlockBase *Succ : Successors) {
// There must be a bi-directional link between block and successor.
const auto &SuccPreds = Succ->getPredecessors();
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp
index 9958d6ea124f81..95525484a3f4a8 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp
@@ -8,6 +8,7 @@
#include "../lib/Transforms/Vectorize/VPlanVerifier.h"
#include "../lib/Transforms/Vectorize/VPlan.h"
+#include "../lib/Transforms/Vectorize/VPlanTransforms.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "gtest/gtest.h"
@@ -28,6 +29,10 @@ TEST(VPVerifierTest, VPInstructionUseBeforeDefSameBB) {
VPBasicBlock *VPBB2 = new VPBasicBlock();
VPRegionBlock *R1 = new VPRegionBlock(VPBB2, VPBB2, "R1");
VPBlockUtils::connectBlocks(VPBB1, R1);
+
+ VPBasicBlock *VPMIDDLE = new VPBasicBlock("middle.block");
+ VPBlockUtils::connectBlocks(R1, VPMIDDLE);
+
VPlan Plan(VPPH, &*TC, VPBB1);
#if GTEST_HAS_STREAM_REDIRECTION
@@ -59,6 +64,9 @@ TEST(VPVerifierTest, VPInstructionUseBeforeDefDifferentBB) {
VPRegionBlock *R1 = new VPRegionBlock(VPBB2, VPBB2, "R1");
VPBlockUtils::connectBlocks(VPBB1, R1);
+ VPBasicBlock *VPMIDDLE = new VPBasicBlock("middle.block");
+ VPBlockUtils::connectBlocks(R1, VPMIDDLE);
+
auto TC = std::make_unique<VPValue>();
VPlan Plan(VPPH, &*TC, VPBB1);
@@ -102,6 +110,9 @@ TEST(VPVerifierTest, VPBlendUseBeforeDefDifferentBB) {
VPBlockUtils::connectBlocks(VPBB1, R1);
VPBB3->setParent(R1);
+ VPBasicBlock *VPMIDDLE = new VPBasicBlock("middle.block");
+ VPBlockUtils::connectBlocks(R1, VPMIDDLE);
+
auto TC = std::make_unique<VPValue>();
VPlan Plan(VPPH, &*TC, VPBB1);
@@ -138,6 +149,9 @@ TEST(VPVerifierTest, DuplicateSuccessorsOutsideRegion) {
VPBlockUtils::connectBlocks(VPBB1, R1);
VPBlockUtils::connectBlocks(VPBB1, R1);
+ VPBasicBlock *VPMIDDLE = new VPBasicBlock("middle.block");
+ VPBlockUtils::connectBlocks(R1, VPMIDDLE);
+
auto TC = std::make_unique<VPValue>();
VPlan Plan(VPPH, &*TC, VPBB1);
@@ -175,6 +189,9 @@ TEST(VPVerifierTest, DuplicateSuccessorsInsideRegion) {
VPBlockUtils::connectBlocks(VPBB1, R1);
VPBB3->setParent(R1);
+ VPBasicBlock *VPMIDDLE = new VPBasicBlock("middle.block");
+ VPBlockUtils::connectBlocks(R1, VPMIDDLE);
+
auto TC = std::make_unique<VPValue>();
VPlan Plan(VPPH, &*TC, VPBB1);
@@ -204,6 +221,9 @@ TEST(VPVerifierTest, BlockOutsideRegionWithParent) {
VPBlockUtils::connectBlocks(VPBB1, R1);
VPBB1->setParent(R1);
+ VPBasicBlock *VPMIDDLE = new VPBasicBlock("middle.block");
+ VPBlockUtils::connectBlocks(R1, VPMIDDLE);
+
auto TC = std::make_unique<VPValue>();
VPlan Plan(VPPH, &*TC, VPBB1);
@@ -217,4 +237,117 @@ TEST(VPVerifierTest, BlockOutsideRegionWithParent) {
#endif
}
+TEST(VPVerifierTest, LoopRegionMultipleSuccessors1) {
+ VPInstruction *TC = new VPInstruction(Instruction::Add, {});
+ VPBasicBlock *VPBBPH = new VPBasicBlock("preheader");
+ VPBBPH->appendRecipe(TC);
+
+ VPInstruction *TC2 = new VPInstruction(Instruction::Add, {});
+ VPBasicBlock *VPBBENTRY = new VPBasicBlock("entry");
+ VPBBENTRY->appendRecipe(TC2);
+
+ // Add a VPCanonicalIVPHIRecipe starting at 0 to the header.
+ auto *CanonicalIVPHI = new VPCanonicalIVPHIRecipe(TC2, {});
+ VPInstruction *I1 = new VPInstruction(Instruction::Add, {});
+ VPInstruction *I2 = new VPInstruction(Instruction::Sub, {I1});
+ VPInstruction *I3 = new VPInstruction(VPInstruction::BranchOnCond, {I1});
+
+ VPBasicBlock *RBB1 = new VPBasicBlock();
+ RBB1->appendRecipe(CanonicalIVPHI);
+ RBB1->appendRecipe(I1);
+ RBB1->appendRecipe(I2);
+ RBB1->appendRecipe(I3);
+ RBB1->setName("bb1");
+
+ VPInstruction *I4 = new VPInstruction(Instruction::Mul, {I2, I1});
+ VPInstruction *I5 = new VPInstruction(VPInstruction::BranchOnCond, {I4});
+ VPBasicBlock *RBB2 = new VPBasicBlock();
+ RBB2->appendRecipe(I4);
+ RBB2->appendRecipe(I5);
+ RBB2->setName("bb2");
+
+ VPRegionBlock *R1 = new VPRegionBlock(RBB1, RBB2, "R1");
+ VPBlockUtils::connectBlocks(RBB1, RBB2);
+ VPBlockUtils::connectBlocks(VPBBENTRY, R1);
+
+ VPBasicBlock *VPMIDDLE = new VPBasicBlock("middle.block");
+ VPBasicBlock *VPEARLYEXIT = new VPBasicBlock("early.exit");
+ VPBlockUtils::connectBlocks(R1, VPMIDDLE);
+ VPBlockUtils::connectBlocks(R1, VPEARLYEXIT);
+
+ VPlan Plan(VPBBPH, TC, VPBBENTRY);
+ Plan.setName("TestPlan");
+ Plan.addVF(ElementCount::getFixed(4));
+ Plan.getVectorLoopRegion()->setEarlyExiting(RBB1);
+
+ EXPECT_TRUE(verifyVPlanIsValid(Plan));
+}
+
+TEST(VPVerifierTest, LoopRegionMultipleSuccessors2) {
+ VPInstruction *TC = new VPInstruction(Instruction::Add, {});
+ VPBasicBlock *VPBBPH = new VPBasicBlock("preheader");
+ VPBBPH->appendRecipe(TC);
+
+ VPInstruction *TC2 = new VPInstruction(Instruction::Add, {});
+ VPBasicBlock *VPBBENTRY = new VPBasicBlock("entry");
+ VPBBENTRY->appendRecipe(TC2);
+
+ // We can't create a live-in without a VPlan, but we can't create
+ // a VPlan without the blocks. So we initialize this to a silly
+ // value here, then fix it up later.
+ auto *CanonicalIVPHI = new VPCanonicalIVPHIRecipe(TC2, {});
+ VPInstruction *I1 = new VPInstruction(Instruction::Add, {});
+ VPInstruction *I2 = new VPInstruction(Instruction::Sub, {I1});
+ VPInstruction *I3 = new VPInstruction(VPInstruction::BranchOnCond, {I1});
+
+ VPBasicBlock *RBB1 = new VPBasicBlock();
+ RBB1->appendRecipe(CanonicalIVPHI);
+ RBB1->appendRecipe(I1);
+ RBB1->appendRecipe(I2);
+ RBB1->appendRecipe(I3);
+ RBB1->setName("vector.body");
+
+ // This really is what the vplan cfg looks like before optimising!
+ VPBasicBlock *RBB2 = new VPBasicBlock();
+ RBB2->setName("loop.inc");
+ // A block that inherits the latch name from the original scalar loop.
+
+ VPBasicBlock *RBB3 = new VPBasicBlock();
+ // No name
+
+ VPInstruction *I4 = new VPInstruction(Instruction::Mul, {I2, I1});
+ VPInstruction *I5 = new VPInstruction(VPInstruction::BranchOnCond, {I4});
+ VPBasicBlock *RBB4 = new VPBasicBlock();
+ RBB4->appendRecipe(I4);
+ RBB4->appendRecipe(I5);
+ RBB4->setName("vector.latch");
+
+ VPRegionBlock *R1 = new VPRegionBlock(RBB1, RBB4, "R1");
+ VPBlockUtils::insertBlockAfter(RBB2, RBB1);
+ VPBlockUtils::insertBlockAfter(RBB3, RBB2);
+ VPBlockUtils::insertBlockAfter(RBB4, RBB3);
+ VPBlockUtils::connectBlocks(VPBBENTRY, R1);
+
+ VPBasicBlock *VPMIDDLE = new VPBasicBlock("middle.block");
+ VPBasicBlock *VPEARLYEXIT = new VPBasicBlock("early.exit");
+ VPBlockUtils::connectBlocks(R1, VPMIDDLE);
+ VPBlockUtils::connectBlocks(R1, VPEARLYEXIT);
+
+ VPlan Plan(VPBBPH, TC, VPBBENTRY);
+ Plan.setName("TestPlan");
+ Plan.addVF(ElementCount::getFixed(4));
+ Plan.getVectorLoopRegion()->setEarlyExiting(RBB1);
+
+ // Update the VPCanonicalIVPHIRecipe to have a live-in IR value.
+ LLVMContext C;
+ IntegerType *Int32 = IntegerType::get(C, 32);
+ Value *StartIV = PoisonValue::get(Int32);
+ CanonicalIVPHI->setStartValue(Plan.getOrAddLiveIn(StartIV));
+
+ EXPECT_TRUE(verifyVPlanIsValid(Plan));
+
+ VPlanTransforms::optimize(Plan, C);
+ EXPECT_TRUE(verifyVPlanIsValid(Plan));
+}
+
} // namespace
``````````
</details>
https://github.com/llvm/llvm-project/pull/108563
More information about the llvm-commits
mailing list