[llvm] 09a29fc - [VPlan] Don't collect live-ins in collectUsersInExitBlocks. (NFC) (#123819)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 27 08:12:10 PST 2025
Author: Florian Hahn
Date: 2025-01-27T16:12:07Z
New Revision: 09a29fcc8dbbb2cc1c0fdf26ef4f8fafab4e03d9
URL: https://github.com/llvm/llvm-project/commit/09a29fcc8dbbb2cc1c0fdf26ef4f8fafab4e03d9
DIFF: https://github.com/llvm/llvm-project/commit/09a29fcc8dbbb2cc1c0fdf26ef4f8fafab4e03d9.diff
LOG: [VPlan] Don't collect live-ins in collectUsersInExitBlocks. (NFC) (#123819)
Live-ins don't need to be handled, other than adding to the exit phi
recipe. Do that early and assert that otherwise the exit value is
defined in the vector loop region.
This should enable simply skipping other exit values that do not need
further fixing, e.g. if handling the exit value from the early exit
directly in handleUncountableEarlyExit.
PR: https://github.com/llvm/llvm-project/pull/123819
Added:
Modified:
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/lib/Transforms/Vectorize/VPlan.h
llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
llvm/lib/Transforms/Vectorize/VPlanTransforms.h
llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 3a4f637f177e19..f592e5557c17d5 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9033,7 +9033,6 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
static SetVector<VPIRInstruction *>
collectUsersInExitBlocks(Loop *OrigLoop, VPRecipeBuilder &Builder,
VPlan &Plan) {
- auto *MiddleVPBB = Plan.getMiddleBlock();
SetVector<VPIRInstruction *> ExitUsersToFix;
for (VPIRBasicBlock *ExitVPBB : Plan.getExitBlocks()) {
for (VPRecipeBase &R : *ExitVPBB) {
@@ -9043,33 +9042,33 @@ collectUsersInExitBlocks(Loop *OrigLoop, VPRecipeBuilder &Builder,
auto *ExitPhi = dyn_cast<PHINode>(&ExitIRI->getInstruction());
if (!ExitPhi)
break;
- for (VPBlockBase *PredVPBB : ExitVPBB->getPredecessors()) {
- BasicBlock *ExitingBB = OrigLoop->getLoopLatch();
- if (PredVPBB != MiddleVPBB) {
- SmallVector<BasicBlock *> ExitingBlocks;
- OrigLoop->getExitingBlocks(ExitingBlocks);
- assert(ExitingBlocks.size() == 2 && "only support 2 exiting blocks");
- ExitingBB = ExitingBB == ExitingBlocks[0] ? ExitingBlocks[1]
- : ExitingBlocks[0];
- }
- Value *IncomingValue = ExitPhi->getIncomingValueForBlock(ExitingBB);
- VPValue *V = Builder.getVPValueOrAddLiveIn(IncomingValue);
- ExitUsersToFix.insert(ExitIRI);
- ExitIRI->addOperand(V);
+ if (ExitVPBB->getSinglePredecessor() != Plan.getMiddleBlock()) {
+ assert(ExitIRI->getNumOperands() ==
+ ExitVPBB->getPredecessors().size() &&
+ "early-exit must update exit values on construction");
+ continue;
}
+ BasicBlock *ExitingBB = OrigLoop->getLoopLatch();
+ Value *IncomingValue = ExitPhi->getIncomingValueForBlock(ExitingBB);
+ VPValue *V = Builder.getVPValueOrAddLiveIn(IncomingValue);
+ ExitIRI->addOperand(V);
+ if (V->isLiveIn())
+ continue;
+ assert(V->getDefiningRecipe()->getParent()->getEnclosingLoopRegion() &&
+ "Only recipes defined inside a region should need fixing.");
+ ExitUsersToFix.insert(ExitIRI);
}
}
return ExitUsersToFix;
}
// Add exit values to \p Plan. Extracts are added for each entry in \p
-// ExitUsersToFix if needed and their operands are updated. Returns true if all
-// exit users can be handled, otherwise return false.
-static bool
+// ExitUsersToFix if needed and their operands are updated.
+static void
addUsersInExitBlocks(VPlan &Plan,
const SetVector<VPIRInstruction *> &ExitUsersToFix) {
if (ExitUsersToFix.empty())
- return true;
+ return;
auto *MiddleVPBB = Plan.getMiddleBlock();
VPBuilder B(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
@@ -9078,25 +9077,12 @@ addUsersInExitBlocks(VPlan &Plan,
// Introduce extract for exiting values and update the VPIRInstructions
// modeling the corresponding LCSSA phis.
for (VPIRInstruction *ExitIRI : ExitUsersToFix) {
- for (const auto &[Idx, Op] : enumerate(ExitIRI->operands())) {
- // Pass live-in values used by exit phis directly through to their users
- // in the exit block.
- if (Op->isLiveIn())
- continue;
-
- // Currently only live-ins can be used by exit values from blocks not
- // exiting via the vector latch through to the middle block.
- if (ExitIRI->getParent()->getSinglePredecessor() != MiddleVPBB)
- return false;
-
- LLVMContext &Ctx = ExitIRI->getInstruction().getContext();
- VPValue *Ext = B.createNaryOp(VPInstruction::ExtractFromEnd,
- {Op, Plan.getOrAddLiveIn(ConstantInt::get(
- IntegerType::get(Ctx, 32), 1))});
- ExitIRI->setOperand(Idx, Ext);
- }
+ assert(ExitIRI->getNumOperands() == 1 &&
+ ExitIRI->getParent()->getSinglePredecessor() == MiddleVPBB &&
+ "exit values from early exits must be fixed when branch to "
+ "early-exit is added");
+ ExitIRI->extractLastLaneOfOperand(B);
}
- return true;
}
/// Handle users in the exit block for first order reductions in the original
@@ -9392,20 +9378,21 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
if (auto *UncountableExitingBlock =
Legal->getUncountableEarlyExitingBlock()) {
- VPlanTransforms::handleUncountableEarlyExit(
- *Plan, *PSE.getSE(), OrigLoop, UncountableExitingBlock, RecipeBuilder);
+ if (!VPlanTransforms::handleUncountableEarlyExit(
+ *Plan, *PSE.getSE(), OrigLoop, UncountableExitingBlock,
+ RecipeBuilder)) {
+ reportVectorizationFailure(
+ "Some exit values in loop with uncountable exit not supported yet",
+ "UncountableEarlyExitLoopsUnsupportedExitValue", ORE, OrigLoop);
+ return nullptr;
+ }
}
DenseMap<VPValue *, VPValue *> IVEndValues;
addScalarResumePhis(RecipeBuilder, *Plan, IVEndValues);
SetVector<VPIRInstruction *> ExitUsersToFix =
collectUsersInExitBlocks(OrigLoop, RecipeBuilder, *Plan);
addExitUsersForFirstOrderRecurrences(*Plan, ExitUsersToFix);
- if (!addUsersInExitBlocks(*Plan, ExitUsersToFix)) {
- reportVectorizationFailure(
- "Some exit values in loop with uncountable exit not supported yet",
- "UncountableEarlyExitLoopsUnsupportedExitValue", ORE, OrigLoop);
- return nullptr;
- }
+ addUsersInExitBlocks(*Plan, ExitUsersToFix);
// ---------------------------------------------------------------------------
// Transform initial VPlan: Apply previously taken decisions, in order, to
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 9124905c997176..253f22d299b623 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -60,6 +60,7 @@ class RecurrenceDescriptor;
class SCEV;
class Type;
class VPBasicBlock;
+class VPBuilder;
class VPRegionBlock;
class VPlan;
class VPReplicateRecipe;
@@ -1422,6 +1423,11 @@ class VPIRInstruction : public VPRecipeBase {
"Op must be an operand of the recipe");
return true;
}
+
+ /// Update the recipes single operand to the last lane of the operand using \p
+ /// Builder. Must only be used for single operand VPIRInstructions wrapping a
+ /// PHINode.
+ void extractLastLaneOfOperand(VPBuilder &Builder);
};
/// VPWidenRecipe is a recipe for producing a widened instruction using the
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index aa30eccdbd734b..2ae539c98fef4e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -11,6 +11,7 @@
///
//===----------------------------------------------------------------------===//
+#include "LoopVectorizationPlanner.h"
#include "VPlan.h"
#include "VPlanAnalysis.h"
#include "VPlanPatternMatch.h"
@@ -937,6 +938,22 @@ InstructionCost VPIRInstruction::computeCost(ElementCount VF,
return 0;
}
+void VPIRInstruction::extractLastLaneOfOperand(VPBuilder &Builder) {
+ assert(isa<PHINode>(getInstruction()) &&
+ "can only add exiting operands to phi nodes");
+ assert(getNumOperands() == 1 && "must have a single operand");
+ VPValue *Exiting = getOperand(0);
+ if (!Exiting->isLiveIn()) {
+ LLVMContext &Ctx = getInstruction().getContext();
+ auto &Plan = *getParent()->getPlan();
+ Exiting = Builder.createNaryOp(
+ VPInstruction::ExtractFromEnd,
+ {Exiting,
+ Plan.getOrAddLiveIn(ConstantInt::get(IntegerType::get(Ctx, 32), 1))});
+ }
+ setOperand(0, Exiting);
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPIRInstruction::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 9febd612c644e1..714250a56ff576 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -2062,7 +2062,7 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan) {
}
}
-void VPlanTransforms::handleUncountableEarlyExit(
+bool VPlanTransforms::handleUncountableEarlyExit(
VPlan &Plan, ScalarEvolution &SE, Loop *OrigLoop,
BasicBlock *UncountableExitingBlock, VPRecipeBuilder &RecipeBuilder) {
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
@@ -2103,7 +2103,32 @@ void VPlanTransforms::handleUncountableEarlyExit(
VPBlockUtils::connectBlocks(NewMiddle, VPEarlyExitBlock);
NewMiddle->swapSuccessors();
+ // Update the exit phis in the early exit block.
VPBuilder MiddleBuilder(NewMiddle);
+ for (VPRecipeBase &R : *VPEarlyExitBlock) {
+ auto *ExitIRI = cast<VPIRInstruction>(&R);
+ auto *ExitPhi = dyn_cast<PHINode>(&ExitIRI->getInstruction());
+ if (!ExitPhi)
+ break;
+
+ VPValue *IncomingFromEarlyExit = RecipeBuilder.getVPValueOrAddLiveIn(
+ ExitPhi->getIncomingValueForBlock(UncountableExitingBlock));
+ // The incoming value from the early exit must be a live-in for now.
+ if (!IncomingFromEarlyExit->isLiveIn())
+ return false;
+
+ if (OrigLoop->getUniqueExitBlock()) {
+ // If there's a unique exit block, VPEarlyExitBlock has 2 predecessors
+ // (MiddleVPBB and NewMiddle). Add the incoming value from MiddleVPBB
+ // which is coming from the original latch.
+ VPValue *IncomingFromLatch = RecipeBuilder.getVPValueOrAddLiveIn(
+ ExitPhi->getIncomingValueForBlock(OrigLoop->getLoopLatch()));
+ ExitIRI->addOperand(IncomingFromLatch);
+ ExitIRI->extractLastLaneOfOperand(MiddleBuilder);
+ }
+ // Add the incoming value from the early exit.
+ ExitIRI->addOperand(IncomingFromEarlyExit);
+ }
MiddleBuilder.createNaryOp(VPInstruction::BranchOnCond, {IsEarlyExitTaken});
// Replace the condition controlling the non-early exit from the vector loop
@@ -2119,4 +2144,5 @@ void VPlanTransforms::handleUncountableEarlyExit(
Instruction::Or, {IsEarlyExitTaken, IsLatchExitTaken});
Builder.createNaryOp(VPInstruction::BranchOnCond, AnyExitTaken);
LatchExitingBranch->eraseFromParent();
+ return true;
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index a751b8b5e8dc59..b31fef5d62456b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -130,7 +130,7 @@ struct VPlanTransforms {
/// exit conditions
/// * splitting the original middle block to branch to the early exit block
/// if taken.
- static void handleUncountableEarlyExit(VPlan &Plan, ScalarEvolution &SE,
+ static bool handleUncountableEarlyExit(VPlan &Plan, ScalarEvolution &SE,
Loop *OrigLoop,
BasicBlock *UncountableExitingBlock,
VPRecipeBuilder &RecipeBuilder);
diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll
index 6e542bd873b8c3..56d0871feacd39 100644
--- a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll
+++ b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll
@@ -217,21 +217,50 @@ define i64 @same_exit_block_pre_inc_use2() {
; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1
; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
+; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 3, i64 4, i64 5, i64 6>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]]
+; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 4
+; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true)
+; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
+; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
+; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: middle.split:
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[VEC_IND]], i32 3
+; CHECK-NEXT: br i1 [[TMP7]], label [[LOOP_END:%.*]], label [[MIDDLE_BLOCK:%.*]]
+; CHECK: middle.block:
+; CHECK-NEXT: br i1 true, label [[LOOP_END]], label [[SCALAR_PH]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: br label [[LOOP1:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
-; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END]]
; CHECK: loop.inc:
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP1]], label [[LOOP_END]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: loop.end:
-; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 67, [[LOOP]] ], [ [[INDEX]], [[LOOP_INC]] ]
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 67, [[LOOP1]] ], [ [[INDEX]], [[LOOP_INC]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ 67, [[MIDDLE_SPLIT]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
;
entry:
@@ -548,7 +577,7 @@ define i64 @
diff _exit_block_pre_inc_use2() {
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
-; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: middle.split:
; CHECK-NEXT: br i1 [[TMP7]], label [[LOOP_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
; CHECK: middle.block:
@@ -568,7 +597,7 @@ define i64 @
diff _exit_block_pre_inc_use2() {
; CHECK: loop.inc:
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP1]], label [[LOOP_END]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP1]], label [[LOOP_END]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: loop.early.exit:
; CHECK-NEXT: [[RETVAL1:%.*]] = phi i64 [ 67, [[LOOP1]] ], [ 67, [[MIDDLE_SPLIT]] ]
; CHECK-NEXT: ret i64 [[RETVAL1]]
@@ -1029,4 +1058,6 @@ attributes #0 = { "vector-function-abi-variant"="_ZGVsNxv_foo(foo_vec)" }
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
+; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
+; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
;.
More information about the llvm-commits
mailing list