[llvm] 9d297c7 - [VPlan] Add prepareToExecute to set up live-ins (NFC).
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 28 08:51:07 PST 2021
Author: Florian Hahn
Date: 2021-12-28T17:49:47+01:00
New Revision: 9d297c7894ecae3d0ab18b6dac4c2df0bc489951
URL: https://github.com/llvm/llvm-project/commit/9d297c7894ecae3d0ab18b6dac4c2df0bc489951
DIFF: https://github.com/llvm/llvm-project/commit/9d297c7894ecae3d0ab18b6dac4c2df0bc489951.diff
LOG: [VPlan] Add prepareToExecute to set up live-ins (NFC).
This patch adds a new prepareToExecute helper to set up live-ins, so
VPTransformState doesn't need to hold values like TripCount.
This also requires making the trip count operand for ActiveLaneMask
explicit in VPlan.
Reviewed By: Ayal
Differential Revision: https://reviews.llvm.org/D116320
Added:
Modified:
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/lib/Transforms/Vectorize/VPlan.cpp
llvm/lib/Transforms/Vectorize/VPlan.h
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index ef49ae529cec..112e697c7f5e 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7962,7 +7962,6 @@ void LoopVectorizationPlanner::executePlan(ElementCount BestVF, unsigned BestUF,
// 1. Create a new empty loop. Unlink the old loop and connect the new one.
VPTransformState State{BestVF, BestUF, LI, DT, ILV.Builder, &ILV, &BestVPlan};
State.CFG.PrevBB = ILV.createVectorizedLoopSkeleton();
- State.TripCount = ILV.getOrCreateTripCount(nullptr);
State.CanonicalIV = ILV.Induction;
ILV.collectPoisonGeneratingRecipes(State);
@@ -7977,6 +7976,7 @@ void LoopVectorizationPlanner::executePlan(ElementCount BestVF, unsigned BestUF,
//===------------------------------------------------===//
// 2. Copy and widen instructions from the old loop into the new loop.
+ BestVPlan.prepareToExecute(ILV.getOrCreateTripCount(nullptr), State);
BestVPlan.execute(&State);
// 3. Fix the vectorized code: take care of header phi's, live-outs,
@@ -8461,11 +8461,8 @@ VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlanPtr &Plan) {
bool TailFolded = !CM.isScalarEpilogueAllowed();
if (TailFolded && CM.TTI.emitGetActiveLaneMask()) {
- // While ActiveLaneMask is a binary op that consumes the loop tripcount
- // as a second argument, we only pass the IV here and extract the
- // tripcount from the transform state where codegen of the VP instructions
- // happen.
- BlockMask = Builder.createNaryOp(VPInstruction::ActiveLaneMask, {IV});
+ VPValue *TC = Plan->getOrCreateTripCount();
+ BlockMask = Builder.createNaryOp(VPInstruction::ActiveLaneMask, {IV, TC});
} else {
VPValue *BTC = Plan->getOrCreateBackedgeTakenCount();
BlockMask = Builder.createNaryOp(VPInstruction::ICmpULE, {IV, BTC});
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index bebc6bbdd4a7..e4517a47e7c5 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -677,7 +677,7 @@ void VPInstruction::generateInstruction(VPTransformState &State,
// Get first lane of vector induction variable.
Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0));
// Get the original loop tripcount.
- Value *ScalarTC = State.TripCount;
+ Value *ScalarTC = State.get(getOperand(1), Part);
auto *Int1Ty = Type::getInt1Ty(Builder.getContext());
auto *PredTy = FixedVectorType::get(Int1Ty, State.VF.getKnownMinValue());
@@ -786,23 +786,31 @@ void VPInstruction::setFastMathFlags(FastMathFlags FMFNew) {
FMF = FMFNew;
}
-/// Generate the code inside the body of the vectorized loop. Assumes a single
-/// LoopVectorBody basic-block was created for this. Introduce additional
-/// basic-blocks as needed, and fill them all.
-void VPlan::execute(VPTransformState *State) {
- // -1. Check if the backedge taken count is needed, and if so build it.
+void VPlan::prepareToExecute(Value *TripCountV, VPTransformState &State) {
+ // Check if the trip count is needed, and if so build it.
+ if (TripCount && TripCount->getNumUsers()) {
+ for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
+ State.set(TripCount, TripCountV, Part);
+ }
+
+ // Check if the backedge taken count is needed, and if so build it.
if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
- Value *TC = State->TripCount;
- IRBuilder<> Builder(State->CFG.PrevBB->getTerminator());
- auto *TCMO = Builder.CreateSub(TC, ConstantInt::get(TC->getType(), 1),
+ IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
+ auto *TCMO = Builder.CreateSub(TripCountV,
+ ConstantInt::get(TripCountV->getType(), 1),
"trip.count.minus.1");
- auto VF = State->VF;
+ auto VF = State.VF;
Value *VTCMO =
VF.isScalar() ? TCMO : Builder.CreateVectorSplat(VF, TCMO, "broadcast");
- for (unsigned Part = 0, UF = State->UF; Part < UF; ++Part)
- State->set(BackedgeTakenCount, VTCMO, Part);
+ for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
+ State.set(BackedgeTakenCount, VTCMO, Part);
}
+}
+/// Generate the code inside the body of the vectorized loop. Assumes a single
+/// LoopVectorBody basic-block was created for this. Introduce additional
+/// basic-blocks as needed, and fill them all.
+void VPlan::execute(VPTransformState *State) {
// 0. Set the reverse mapping from VPValues to Values for code generation.
for (auto &Entry : Value2VPValue)
State->VPValue2Value[Entry.second] = Entry.first;
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 7fa3c1defaca..96de9114b618 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -341,9 +341,6 @@ struct VPTransformState {
/// Hold the canonical scalar IV of the vector loop (start=0, step=VF*UF).
Value *CanonicalIV = nullptr;
- /// Hold the trip count of the scalar loop.
- Value *TripCount = nullptr;
-
/// Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods.
InnerLoopVectorizer *ILV;
@@ -2134,8 +2131,12 @@ class VPlan {
// (operators '==' and '<').
SetVector<VPValue *> VPExternalDefs;
- /// Represents the backedge taken count of the original loop, for folding
+ /// Represents the trip count of the original loop, for folding
/// the tail.
+ VPValue *TripCount = nullptr;
+
+ /// Represents the backedge taken count of the original loop, for folding
+ /// the tail. It equals TripCount - 1.
VPValue *BackedgeTakenCount = nullptr;
/// Holds a mapping between Values and their corresponding VPValue inside
@@ -2169,12 +2170,17 @@ class VPlan {
}
for (VPValue *VPV : VPValuesToFree)
delete VPV;
+ if (TripCount)
+ delete TripCount;
if (BackedgeTakenCount)
delete BackedgeTakenCount;
for (VPValue *Def : VPExternalDefs)
delete Def;
}
+ /// Prepare the plan for execution, setting up the required live-in values.
+ void prepareToExecute(Value *TripCount, VPTransformState &State);
+
/// Generate the IR code for this VPlan.
void execute(struct VPTransformState *State);
@@ -2187,6 +2193,13 @@ class VPlan {
return Entry;
}
+ /// The trip count of the original loop.
+ VPValue *getOrCreateTripCount() {
+ if (!TripCount)
+ TripCount = new VPValue();
+ return TripCount;
+ }
+
/// The backedge taken count of the original loop.
VPValue *getOrCreateBackedgeTakenCount() {
if (!BackedgeTakenCount)
More information about the llvm-commits
mailing list