[llvm] 06c3a7d - [VPlan] Remove unneeded State.UF after 8ec406757cb92 (NFC).
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Sun Sep 22 12:42:58 PDT 2024
Author: Florian Hahn
Date: 2024-09-22T20:42:37+01:00
New Revision: 06c3a7d2d764fc605ef933b2f805f41f2fcaee71
URL: https://github.com/llvm/llvm-project/commit/06c3a7d2d764fc605ef933b2f805f41f2fcaee71
DIFF: https://github.com/llvm/llvm-project/commit/06c3a7d2d764fc605ef933b2f805f41f2fcaee71.diff
LOG: [VPlan] Remove unneeded State.UF after 8ec406757cb92 (NFC).
State.UF is not needed any longer after 8ec406757cb92
(https://github.com/llvm/llvm-project/pull/95842). Clean it up,
simplifying ::execute of existing recipes.
Added:
Modified:
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/lib/Transforms/Vectorize/VPlan.cpp
llvm/lib/Transforms/Vectorize/VPlan.h
llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index b5c8763b4f66ad..9685e7d124b7d1 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7440,7 +7440,7 @@ static void createAndCollectMergePhiForReduction(
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
Value *FinalValue =
- State.get(RedResult, VPIteration(State.UF - 1, VPLane::getFirstLane()));
+ State.get(RedResult, VPIteration(0, VPLane::getFirstLane()));
auto *ResumePhi =
dyn_cast<PHINode>(PhiR->getStartValue()->getUnderlyingValue());
if (VectorizingEpilogue && RecurrenceDescriptor::isAnyOfRecurrenceKind(
@@ -9453,24 +9453,8 @@ void VPReplicateRecipe::execute(VPTransformState &State) {
}
if (IsUniform) {
- // If the recipe is uniform across all parts (instead of just per VF), only
- // generate a single instance.
- if ((isa<LoadInst>(UI) || isa<StoreInst>(UI)) &&
- all_of(operands(),
- [](VPValue *Op) { return Op->isDefinedOutsideLoopRegions(); })) {
- State.ILV->scalarizeInstruction(UI, this, VPIteration(0, 0), State);
- if (user_begin() != user_end()) {
- for (unsigned Part = 1; Part < State.UF; ++Part)
- State.set(this, State.get(this, VPIteration(0, 0)),
- VPIteration(Part, 0));
- }
- return;
- }
-
- // Uniform within VL means we need to generate lane 0 only for each
- // unrolled copy.
- for (unsigned Part = 0; Part < State.UF; ++Part)
- State.ILV->scalarizeInstruction(UI, this, VPIteration(Part, 0), State);
+ // Uniform within VL means we need to generate lane 0.
+ State.ILV->scalarizeInstruction(UI, this, VPIteration(0, 0), State);
return;
}
@@ -9479,17 +9463,15 @@ void VPReplicateRecipe::execute(VPTransformState &State) {
if (isa<StoreInst>(UI) &&
vputils::isUniformAfterVectorization(getOperand(1))) {
auto Lane = VPLane::getLastLaneForVF(State.VF);
- State.ILV->scalarizeInstruction(UI, this, VPIteration(State.UF - 1, Lane),
- State);
+ State.ILV->scalarizeInstruction(UI, this, VPIteration(0, Lane), State);
return;
}
// Generate scalar instances for all VF lanes of all UF parts.
assert(!State.VF.isScalable() && "Can't scalarize a scalable vector");
const unsigned EndLane = State.VF.getKnownMinValue();
- for (unsigned Part = 0; Part < State.UF; ++Part)
- for (unsigned Lane = 0; Lane < EndLane; ++Lane)
- State.ILV->scalarizeInstruction(UI, this, VPIteration(Part, Lane), State);
+ for (unsigned Lane = 0; Lane < EndLane; ++Lane)
+ State.ILV->scalarizeInstruction(UI, this, VPIteration(0, Lane), State);
}
// Determine how to lower the scalar epilogue, which depends on 1) optimising
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index b0a637012037d6..9a0c353edd136f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -225,7 +225,7 @@ VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi() {
VPTransformState::VPTransformState(ElementCount VF, unsigned UF, LoopInfo *LI,
DominatorTree *DT, IRBuilderBase &Builder,
InnerLoopVectorizer *ILV, VPlan *Plan)
- : VF(VF), UF(UF), CFG(DT), LI(LI), Builder(Builder), ILV(ILV), Plan(Plan),
+ : VF(VF), CFG(DT), LI(LI), Builder(Builder), ILV(ILV), Plan(Plan),
LVer(nullptr), TypeAnalysis(Plan->getCanonicalIV()->getScalarType()) {}
Value *VPTransformState::get(VPValue *Def, const VPIteration &Instance) {
@@ -772,9 +772,6 @@ void VPRegionBlock::execute(VPTransformState *State) {
// Enter replicating mode.
State->Instance = VPIteration(0, 0);
-
- for (unsigned Part = 0, UF = State->UF; Part < UF; ++Part) {
- State->Instance->Part = Part;
assert(!State->VF.isScalable() && "VF is assumed to be non scalable.");
for (unsigned Lane = 0, VF = State->VF.getKnownMinValue(); Lane < VF;
++Lane) {
@@ -784,7 +781,6 @@ void VPRegionBlock::execute(VPTransformState *State) {
LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
Block->execute(State);
}
- }
}
// Exit replicating mode.
@@ -963,16 +959,15 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
// FIXME: Model VF * UF computation completely in VPlan.
assert(VFxUF.getNumUsers() && "VFxUF expected to always have users");
+ unsigned UF = getUF();
if (VF.getNumUsers()) {
Value *RuntimeVF = getRuntimeVF(Builder, TCTy, State.VF);
VF.setUnderlyingValue(RuntimeVF);
VFxUF.setUnderlyingValue(
- State.UF > 1
- ? Builder.CreateMul(RuntimeVF, ConstantInt::get(TCTy, State.UF))
- : RuntimeVF);
+ UF > 1 ? Builder.CreateMul(RuntimeVF, ConstantInt::get(TCTy, UF))
+ : RuntimeVF);
} else {
- VFxUF.setUnderlyingValue(
- createStepForVF(Builder, TCTy, State.VF, State.UF));
+ VFxUF.setUnderlyingValue(createStepForVF(Builder, TCTy, State.VF, UF));
}
// When vectorizing the epilogue loop, the canonical induction start value
@@ -1019,10 +1014,6 @@ static void replaceVPBBWithIRVPBB(VPBasicBlock *VPBB, BasicBlock *IRBB) {
/// Assumes a single pre-header basic-block was created for this. Introduce
/// additional basic-blocks as needed, and fill them all.
void VPlan::execute(VPTransformState *State) {
- // Set UF to 1, as the unrollByUF VPlan transform already explicitly unrolled
- // the VPlan.
- // TODO: Remove State::UF and all uses.
- State->UF = 1;
// Initialize CFG state.
State->CFG.PrevVPBB = nullptr;
State->CFG.ExitBB = State->CFG.PrevBB->getSingleSuccessor();
@@ -1106,28 +1097,13 @@ void VPlan::execute(VPTransformState *State) {
}
auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
- // For canonical IV, first-order recurrences and in-order reduction phis,
- // only a single part is generated, which provides the last part from the
- // previous iteration. For non-ordered reductions all UF parts are
- // generated.
- bool SinglePartNeeded =
- isa<VPCanonicalIVPHIRecipe>(PhiR) ||
- isa<VPFirstOrderRecurrencePHIRecipe, VPEVLBasedIVPHIRecipe>(PhiR) ||
- (isa<VPReductionPHIRecipe>(PhiR) &&
- cast<VPReductionPHIRecipe>(PhiR)->isOrdered());
bool NeedsScalar =
isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe>(PhiR) ||
(isa<VPReductionPHIRecipe>(PhiR) &&
cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
- unsigned LastPartForNewPhi = SinglePartNeeded ? 1 : State->UF;
-
- for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
- Value *Phi = State->get(PhiR, Part, NeedsScalar);
- Value *Val =
- State->get(PhiR->getBackedgeValue(),
- SinglePartNeeded ? State->UF - 1 : Part, NeedsScalar);
- cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
- }
+ Value *Phi = State->get(PhiR, 0, NeedsScalar);
+ Value *Val = State->get(PhiR->getBackedgeValue(), 0, NeedsScalar);
+ cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
}
State->CFG.DTU.flush();
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index a29d972ca1d16d..fda0a8907b4ab0 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -256,7 +256,6 @@ struct VPTransformState {
/// The chosen Vectorization and Unroll Factors of the loop being vectorized.
ElementCount VF;
- unsigned UF;
/// Hold the indices to generate specific scalar instructions. Null indicates
/// that all instances are to be generated, using either scalar or vector
@@ -309,7 +308,7 @@ struct VPTransformState {
assert((VF.isScalar() || V->getType()->isVectorTy()) &&
"scalar values must be stored as (Part, 0)");
if (!Data.PerPartOutput.count(Def)) {
- DataState::PerPartValuesTy Entry(UF);
+ DataState::PerPartValuesTy Entry(1);
Data.PerPartOutput[Def] = Entry;
}
Data.PerPartOutput[Def][Part] = V;
@@ -1306,11 +1305,10 @@ class VPInstruction : public VPRecipeWithIRFlags,
/// needed.
bool canGenerateScalarForFirstLane() const;
- /// Utility methods serving execute(): generates a single instance of the
- /// modeled instruction for a given part. \returns the generated value for \p
- /// Part. In some cases an existing value is returned rather than a generated
- /// one.
- Value *generatePerPart(VPTransformState &State, unsigned Part);
+ /// Utility methods serving execute(): generates a single vector instance of
+ /// the modeled instruction. \returns the generated value. . In some cases an
+ /// existing value is returned rather than a generated one.
+ Value *generate(VPTransformState &State);
/// Utility methods serving execute(): generates a scalar single instance of
/// the modeled instruction for a given lane. \returns the scalar generated
@@ -1616,7 +1614,7 @@ class VPScalarCastRecipe : public VPSingleDefRecipe {
Type *ResultTy;
- Value *generate(VPTransformState &State, unsigned Part);
+ Value *generate(VPTransformState &State);
public:
VPScalarCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 255b8b03ae0db9..8f4b2951839118 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -399,13 +399,13 @@ Value *VPInstruction::generatePerLane(VPTransformState &State,
State.get(getOperand(1), Lane), Name);
}
-Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) {
+Value *VPInstruction::generate(VPTransformState &State) {
IRBuilderBase &Builder = State.Builder;
if (Instruction::isBinaryOp(getOpcode())) {
bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
- Value *A = State.get(getOperand(0), Part, OnlyFirstLaneUsed);
- Value *B = State.get(getOperand(1), Part, OnlyFirstLaneUsed);
+ Value *A = State.get(getOperand(0), 0, OnlyFirstLaneUsed);
+ Value *B = State.get(getOperand(1), 0, OnlyFirstLaneUsed);
auto *Res =
Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name);
if (auto *I = dyn_cast<Instruction>(Res))
@@ -415,26 +415,26 @@ Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) {
switch (getOpcode()) {
case VPInstruction::Not: {
- Value *A = State.get(getOperand(0), Part);
+ Value *A = State.get(getOperand(0), 0);
return Builder.CreateNot(A, Name);
}
case Instruction::ICmp: {
bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
- Value *A = State.get(getOperand(0), Part, OnlyFirstLaneUsed);
- Value *B = State.get(getOperand(1), Part, OnlyFirstLaneUsed);
+ Value *A = State.get(getOperand(0), 0, OnlyFirstLaneUsed);
+ Value *B = State.get(getOperand(1), 0, OnlyFirstLaneUsed);
return Builder.CreateCmp(getPredicate(), A, B, Name);
}
case Instruction::Select: {
- Value *Cond = State.get(getOperand(0), Part);
- Value *Op1 = State.get(getOperand(1), Part);
- Value *Op2 = State.get(getOperand(2), Part);
+ Value *Cond = State.get(getOperand(0), 0);
+ Value *Op1 = State.get(getOperand(1), 0);
+ Value *Op2 = State.get(getOperand(2), 0);
return Builder.CreateSelect(Cond, Op1, Op2, Name);
}
case VPInstruction::ActiveLaneMask: {
// Get first lane of vector induction variable.
- Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0));
+ Value *VIVElem0 = State.get(getOperand(0), VPIteration(0, 0));
// Get the original loop tripcount.
- Value *ScalarTC = State.get(getOperand(1), VPIteration(Part, 0));
+ Value *ScalarTC = State.get(getOperand(1), VPIteration(0, 0));
// If this part of the active lane mask is scalar, generate the CMP directly
// to avoid unnecessary extracts.
@@ -461,18 +461,13 @@ Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) {
// v2 = a[i, i+1, i+2, i+3];
// v3 = vector(v1(3), v2(0, 1, 2))
- // For the first part, use the recurrence phi (v1), otherwise v2.
auto *V1 = State.get(getOperand(0), 0);
- Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1);
- if (!PartMinus1->getType()->isVectorTy())
- return PartMinus1;
- Value *V2 = State.get(getOperand(1), Part);
- return Builder.CreateVectorSplice(PartMinus1, V2, -1, Name);
+ if (!V1->getType()->isVectorTy())
+ return V1;
+ Value *V2 = State.get(getOperand(1), 0);
+ return Builder.CreateVectorSplice(V1, V2, -1, Name);
}
case VPInstruction::CalculateTripCountMinusVF: {
- if (Part != 0)
- return State.get(this, 0, /*IsScalar*/ true);
-
unsigned UF = getParent()->getPlan()->getUF();
Value *ScalarTC = State.get(getOperand(0), {0, 0});
Value *Step = createStepForVF(Builder, ScalarTC->getType(), State.VF, UF);
@@ -498,7 +493,6 @@ Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) {
};
// TODO: Restructure this code with an explicit remainder loop, vsetvli can
// be outside of the main loop.
- assert(Part == 0 && "No unrolling expected for predicated vectorization.");
// Compute VTC - IV as the AVL (requested vector length).
Value *Index = State.get(getOperand(0), VPIteration(0, 0));
Value *TripCount = State.get(getOperand(1), VPIteration(0, 0));
@@ -517,10 +511,7 @@ Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) {
hasNoSignedWrap());
}
case VPInstruction::BranchOnCond: {
- if (Part != 0)
- return nullptr;
-
- Value *Cond = State.get(getOperand(0), VPIteration(Part, 0));
+ Value *Cond = State.get(getOperand(0), VPIteration(0, 0));
// Replace the temporary unreachable terminator with a new conditional
// branch, hooking it up to backward destination for exiting blocks now and
// to forward destination(s) later when they are created.
@@ -538,11 +529,9 @@ Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) {
return CondBr;
}
case VPInstruction::BranchOnCount: {
- if (Part != 0)
- return nullptr;
// First create the compare.
- Value *IV = State.get(getOperand(0), Part, /*IsScalar*/ true);
- Value *TC = State.get(getOperand(1), Part, /*IsScalar*/ true);
+ Value *IV = State.get(getOperand(0), 0, /*IsScalar*/ true);
+ Value *TC = State.get(getOperand(1), 0, /*IsScalar*/ true);
Value *Cond = Builder.CreateICmpEQ(IV, TC);
// Now create the branch.
@@ -636,9 +625,6 @@ Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) {
return ReducedPartRdx;
}
case VPInstruction::ExtractFromEnd: {
- if (Part != 0)
- return State.get(this, 0, /*IsScalar*/ true);
-
auto *CI = cast<ConstantInt>(getOperand(1)->getLiveInIRValue());
unsigned Offset = CI->getZExtValue();
assert(Offset > 0 && "Offset from end must be positive");
@@ -647,37 +633,34 @@ Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) {
assert(Offset <= State.VF.getKnownMinValue() &&
"invalid offset to extract from");
// Extract lane VF - Offset from the operand.
- Res = State.get(
- getOperand(0),
- VPIteration(State.UF - 1, VPLane::getLaneFromEnd(State.VF, Offset)));
+ Res = State.get(getOperand(0),
+ VPIteration(0, VPLane::getLaneFromEnd(State.VF, Offset)));
} else {
- assert(Offset <= State.UF && "invalid offset to extract from");
+ assert(Offset <= 1 && "invalid offset to extract from");
// When loop is unrolled without vectorizing, retrieve UF - Offset.
- Res = State.get(getOperand(0), State.UF - Offset);
+ Res = State.get(getOperand(0), 1 - Offset);
}
if (isa<ExtractElementInst>(Res))
Res->setName(Name);
return Res;
}
case VPInstruction::LogicalAnd: {
- Value *A = State.get(getOperand(0), Part);
- Value *B = State.get(getOperand(1), Part);
+ Value *A = State.get(getOperand(0), 0);
+ Value *B = State.get(getOperand(1), 0);
return Builder.CreateLogicalAnd(A, B, Name);
}
case VPInstruction::PtrAdd: {
assert(vputils::onlyFirstLaneUsed(this) &&
"can only generate first lane for PtrAdd");
- Value *Ptr = State.get(getOperand(0), Part, /* IsScalar */ true);
- Value *Addend = State.get(getOperand(1), Part, /* IsScalar */ true);
+ Value *Ptr = State.get(getOperand(0), 0, /* IsScalar */ true);
+ Value *Addend = State.get(getOperand(1), 0, /* IsScalar */ true);
return Builder.CreatePtrAdd(Ptr, Addend, Name);
}
case VPInstruction::ResumePhi: {
- if (Part != 0)
- return State.get(this, 0, /*IsScalar*/ true);
Value *IncomingFromVPlanPred =
- State.get(getOperand(0), Part, /* IsScalar */ true);
+ State.get(getOperand(0), 0, /* IsScalar */ true);
Value *IncomingFromOtherPreds =
- State.get(getOperand(1), Part, /* IsScalar */ true);
+ State.get(getOperand(1), 0, /* IsScalar */ true);
auto *NewPhi =
Builder.CreatePHI(IncomingFromOtherPreds->getType(), 2, Name);
BasicBlock *VPlanPred =
@@ -730,36 +713,26 @@ void VPInstruction::execute(VPTransformState &State) {
(vputils::onlyFirstLaneUsed(this) ||
isVectorToScalar() || isSingleScalar());
bool GeneratesPerAllLanes = doesGeneratePerAllLanes();
- bool OnlyFirstPartUsed = vputils::onlyFirstPartUsed(this);
- for (unsigned Part = 0; Part < State.UF; ++Part) {
- if (GeneratesPerAllLanes) {
- for (unsigned Lane = 0, NumLanes = State.VF.getKnownMinValue();
- Lane != NumLanes; ++Lane) {
- Value *GeneratedValue = generatePerLane(State, VPIteration(Part, Lane));
- assert(GeneratedValue && "generatePerLane must produce a value");
- State.set(this, GeneratedValue, VPIteration(Part, Lane));
- }
- continue;
+ if (GeneratesPerAllLanes) {
+ for (unsigned Lane = 0, NumLanes = State.VF.getKnownMinValue();
+ Lane != NumLanes; ++Lane) {
+ Value *GeneratedValue = generatePerLane(State, VPIteration(0, Lane));
+ assert(GeneratedValue && "generatePerLane must produce a value");
+ State.set(this, GeneratedValue, VPIteration(0, Lane));
}
-
- if (Part != 0 && OnlyFirstPartUsed && hasResult()) {
- Value *Part0 = State.get(this, 0, /*IsScalar*/ GeneratesPerFirstLaneOnly);
- State.set(this, Part0, Part,
- /*IsScalar*/ GeneratesPerFirstLaneOnly);
- continue;
- }
-
- Value *GeneratedValue = generatePerPart(State, Part);
- if (!hasResult())
- continue;
- assert(GeneratedValue && "generatePerPart must produce a value");
- assert((GeneratedValue->getType()->isVectorTy() ==
- !GeneratesPerFirstLaneOnly ||
- State.VF.isScalar()) &&
- "scalar value but not only first lane defined");
- State.set(this, GeneratedValue, Part,
- /*IsScalar*/ GeneratesPerFirstLaneOnly);
+ return;
}
+
+ Value *GeneratedValue = generate(State);
+ if (!hasResult())
+ return;
+ assert(GeneratedValue && "generate must produce a value");
+ assert(
+ (GeneratedValue->getType()->isVectorTy() == !GeneratesPerFirstLaneOnly ||
+ State.VF.isScalar()) &&
+ "scalar value but not only first lane defined");
+ State.set(this, GeneratedValue, 0,
+ /*IsScalar*/ GeneratesPerFirstLaneOnly);
}
bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
@@ -893,7 +866,7 @@ void VPIRInstruction::execute(VPTransformState &State) {
// Set insertion point in PredBB in case an extract needs to be generated.
// TODO: Model extracts explicitly.
State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt());
- Value *V = State.get(ExitValue, VPIteration(State.UF - 1, Lane));
+ Value *V = State.get(ExitValue, VPIteration(0, Lane));
auto *Phi = cast<PHINode>(&I);
Phi->addIncoming(V, PredBB);
}
@@ -928,61 +901,59 @@ void VPWidenCallRecipe::execute(VPTransformState &State) {
FunctionType *VFTy = nullptr;
if (Variant)
VFTy = Variant->getFunctionType();
- for (unsigned Part = 0; Part < State.UF; ++Part) {
- SmallVector<Type *, 2> TysForDecl;
- // Add return type if intrinsic is overloaded on it.
+ SmallVector<Type *, 2> TysForDecl;
+ // Add return type if intrinsic is overloaded on it.
+ if (UseIntrinsic &&
+ isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1))
+ TysForDecl.push_back(VectorType::get(
+ CalledScalarFn->getReturnType()->getScalarType(), State.VF));
+ SmallVector<Value *, 4> Args;
+ for (const auto &I : enumerate(arg_operands())) {
+ // Some intrinsics have a scalar argument - don't replace it with a
+ // vector.
+ Value *Arg;
if (UseIntrinsic &&
- isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1))
- TysForDecl.push_back(VectorType::get(
- CalledScalarFn->getReturnType()->getScalarType(), State.VF));
- SmallVector<Value *, 4> Args;
- for (const auto &I : enumerate(arg_operands())) {
- // Some intrinsics have a scalar argument - don't replace it with a
- // vector.
- Value *Arg;
- if (UseIntrinsic &&
- isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index()))
- Arg = State.get(I.value(), VPIteration(0, 0));
- // Some vectorized function variants may also take a scalar argument,
- // e.g. linear parameters for pointers. This needs to be the scalar value
- // from the start of the respective part when interleaving.
- else if (VFTy && !VFTy->getParamType(I.index())->isVectorTy())
- Arg = State.get(I.value(), VPIteration(Part, 0));
- else
- Arg = State.get(I.value(), Part);
- if (UseIntrinsic &&
- isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index()))
- TysForDecl.push_back(Arg->getType());
- Args.push_back(Arg);
- }
+ isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index()))
+ Arg = State.get(I.value(), VPIteration(0, 0));
+ // Some vectorized function variants may also take a scalar argument,
+ // e.g. linear parameters for pointers. This needs to be the scalar value
+ // from the start of the respective part when interleaving.
+ else if (VFTy && !VFTy->getParamType(I.index())->isVectorTy())
+ Arg = State.get(I.value(), VPIteration(0, 0));
+ else
+ Arg = State.get(I.value(), 0);
+ if (UseIntrinsic &&
+ isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index()))
+ TysForDecl.push_back(Arg->getType());
+ Args.push_back(Arg);
+ }
- Function *VectorF;
- if (UseIntrinsic) {
- // Use vector version of the intrinsic.
- Module *M = State.Builder.GetInsertBlock()->getModule();
- VectorF = Intrinsic::getDeclaration(M, VectorIntrinsicID, TysForDecl);
- assert(VectorF && "Can't retrieve vector intrinsic.");
- } else {
+ Function *VectorF;
+ if (UseIntrinsic) {
+ // Use vector version of the intrinsic.
+ Module *M = State.Builder.GetInsertBlock()->getModule();
+ VectorF = Intrinsic::getDeclaration(M, VectorIntrinsicID, TysForDecl);
+ assert(VectorF && "Can't retrieve vector intrinsic.");
+ } else {
#ifndef NDEBUG
- assert(Variant != nullptr && "Can't create vector function.");
+ assert(Variant != nullptr && "Can't create vector function.");
#endif
- VectorF = Variant;
- }
+ VectorF = Variant;
+ }
- auto *CI = cast_or_null<CallInst>(getUnderlyingInstr());
- SmallVector<OperandBundleDef, 1> OpBundles;
- if (CI)
- CI->getOperandBundlesAsDefs(OpBundles);
+ auto *CI = cast_or_null<CallInst>(getUnderlyingInstr());
+ SmallVector<OperandBundleDef, 1> OpBundles;
+ if (CI)
+ CI->getOperandBundlesAsDefs(OpBundles);
- CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles);
+ CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles);
- if (isa<FPMathOperator>(V))
- V->copyFastMathFlags(CI);
+ if (isa<FPMathOperator>(V))
+ V->copyFastMathFlags(CI);
- if (!V->getType()->isVoidTy())
- State.set(this, V, Part);
- State.addMetadata(V, CI);
- }
+ if (!V->getType()->isVoidTy())
+ State.set(this, V, 0);
+ State.addMetadata(V, CI);
}
InstructionCost VPWidenCallRecipe::computeCost(ElementCount VF,
@@ -1085,14 +1056,12 @@ void VPWidenSelectRecipe::execute(VPTransformState &State) {
auto *InvarCond =
isInvariantCond() ? State.get(getCond(), VPIteration(0, 0)) : nullptr;
- for (unsigned Part = 0; Part < State.UF; ++Part) {
- Value *Cond = InvarCond ? InvarCond : State.get(getCond(), Part);
- Value *Op0 = State.get(getOperand(1), Part);
- Value *Op1 = State.get(getOperand(2), Part);
- Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1);
- State.set(this, Sel, Part);
- State.addMetadata(Sel, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
- }
+ Value *Cond = InvarCond ? InvarCond : State.get(getCond(), 0);
+ Value *Op0 = State.get(getOperand(1), 0);
+ Value *Op1 = State.get(getOperand(2), 0);
+ Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1);
+ State.set(this, Sel, 0);
+ State.addMetadata(Sel, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
}
VPRecipeWithIRFlags::FastMathFlagsTy::FastMathFlagsTy(
@@ -1175,53 +1144,45 @@ void VPWidenRecipe::execute(VPTransformState &State) {
case Instruction::Or:
case Instruction::Xor: {
// Just widen unops and binops.
- for (unsigned Part = 0; Part < State.UF; ++Part) {
- SmallVector<Value *, 2> Ops;
- for (VPValue *VPOp : operands())
- Ops.push_back(State.get(VPOp, Part));
+ SmallVector<Value *, 2> Ops;
+ for (VPValue *VPOp : operands())
+ Ops.push_back(State.get(VPOp, 0));
- Value *V = Builder.CreateNAryOp(Opcode, Ops);
+ Value *V = Builder.CreateNAryOp(Opcode, Ops);
- if (auto *VecOp = dyn_cast<Instruction>(V))
- setFlags(VecOp);
-
- // Use this vector value for all users of the original instruction.
- State.set(this, V, Part);
- State.addMetadata(V, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
- }
+ if (auto *VecOp = dyn_cast<Instruction>(V))
+ setFlags(VecOp);
+ // Use this vector value for all users of the original instruction.
+ State.set(this, V, 0);
+ State.addMetadata(V, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
break;
}
case Instruction::Freeze: {
- for (unsigned Part = 0; Part < State.UF; ++Part) {
- Value *Op = State.get(getOperand(0), Part);
+ Value *Op = State.get(getOperand(0), 0);
- Value *Freeze = Builder.CreateFreeze(Op);
- State.set(this, Freeze, Part);
- }
+ Value *Freeze = Builder.CreateFreeze(Op);
+ State.set(this, Freeze, 0);
break;
}
case Instruction::ICmp:
case Instruction::FCmp: {
// Widen compares. Generate vector compares.
bool FCmp = Opcode == Instruction::FCmp;
- for (unsigned Part = 0; Part < State.UF; ++Part) {
- Value *A = State.get(getOperand(0), Part);
- Value *B = State.get(getOperand(1), Part);
- Value *C = nullptr;
- if (FCmp) {
- // Propagate fast math flags.
- IRBuilder<>::FastMathFlagGuard FMFG(Builder);
- if (auto *I = dyn_cast_or_null<Instruction>(getUnderlyingValue()))
- Builder.setFastMathFlags(I->getFastMathFlags());
- C = Builder.CreateFCmp(getPredicate(), A, B);
- } else {
- C = Builder.CreateICmp(getPredicate(), A, B);
- }
- State.set(this, C, Part);
- State.addMetadata(C, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
+ Value *A = State.get(getOperand(0), 0);
+ Value *B = State.get(getOperand(1), 0);
+ Value *C = nullptr;
+ if (FCmp) {
+ // Propagate fast math flags.
+ IRBuilder<>::FastMathFlagGuard FMFG(Builder);
+ if (auto *I = dyn_cast_or_null<Instruction>(getUnderlyingValue()))
+ Builder.setFastMathFlags(I->getFastMathFlags());
+ C = Builder.CreateFCmp(getPredicate(), A, B);
+ } else {
+ C = Builder.CreateICmp(getPredicate(), A, B);
}
-
+ State.set(this, C, 0);
+ State.addMetadata(C, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
break;
}
default:
@@ -1234,11 +1195,9 @@ void VPWidenRecipe::execute(VPTransformState &State) {
#if !defined(NDEBUG)
// Verify that VPlan type inference results agree with the type of the
// generated values.
- for (unsigned Part = 0; Part < State.UF; ++Part) {
- assert(VectorType::get(State.TypeAnalysis.inferScalarType(this),
- State.VF) == State.get(this, Part)->getType() &&
- "inferred type and type from generated instructions do not match");
- }
+ assert(VectorType::get(State.TypeAnalysis.inferScalarType(this), State.VF) ==
+ State.get(this, 0)->getType() &&
+ "inferred type and type from generated instructions do not match");
#endif
}
@@ -1323,8 +1282,6 @@ void VPWidenEVLRecipe::execute(VPTransformState &State) {
llvm_unreachable("Unsupported opcode in VPWidenEVLRecipe::execute");
State.setDebugLocFrom(getDebugLoc());
- assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with "
- "explicit vector length.");
assert(State.get(getOperand(0), 0)->getType()->isVectorTy() &&
"VPWidenEVLRecipe should not be used for scalars");
@@ -1381,17 +1338,10 @@ void VPWidenCastRecipe::execute(VPTransformState &State) {
assert(State.VF.isVector() && "Not vectorizing?");
Type *DestTy = VectorType::get(getResultType(), State.VF);
VPValue *Op = getOperand(0);
- for (unsigned Part = 0; Part < State.UF; ++Part) {
- if (Part > 0 && Op->isLiveIn()) {
- // FIXME: Remove once explicit unrolling is implemented using VPlan.
- State.set(this, State.get(this, 0), Part);
- continue;
- }
- Value *A = State.get(Op, Part);
- Value *Cast = Builder.CreateCast(Instruction::CastOps(Opcode), A, DestTy);
- State.set(this, Cast, Part);
- State.addMetadata(Cast, cast_or_null<Instruction>(getUnderlyingValue()));
- }
+ Value *A = State.get(Op, 0);
+ Value *Cast = Builder.CreateCast(Instruction::CastOps(Opcode), A, DestTy);
+ State.set(this, Cast, 0);
+ State.addMetadata(Cast, cast_or_null<Instruction>(getUnderlyingValue()));
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -1553,19 +1503,14 @@ void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind");
VecInd->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
VecInd->setDebugLoc(EntryVal->getDebugLoc());
- Instruction *LastInduction = VecInd;
- for (unsigned Part = 0; Part < State.UF; ++Part) {
- State.set(this, LastInduction, Part);
+ State.set(this, VecInd, 0);
- if (isa<TruncInst>(EntryVal))
- State.addMetadata(LastInduction, EntryVal);
-
- LastInduction = cast<Instruction>(
- Builder.CreateBinOp(AddOp, LastInduction, SplatVF, "step.add"));
- LastInduction->setDebugLoc(EntryVal->getDebugLoc());
- }
+ Instruction *LastInduction = cast<Instruction>(Builder.CreateBinOp(
+ AddOp, VecInd, SplatVF, "vec.ind.next", EntryVal->getDebugLoc()));
+ if (isa<TruncInst>(EntryVal))
+ State.addMetadata(LastInduction, EntryVal);
+ LastInduction->setDebugLoc(EntryVal->getDebugLoc());
- LastInduction->setName("vec.ind.next");
VecInd->addIncoming(SteppedStart, VectorPH);
// Add induction update using an incorrect block temporarily. The phi node
// will be fixed after VPlan execution. Note that at this point the latch
@@ -1667,49 +1612,42 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) {
SplatIV = Builder.CreateVectorSplat(State.VF, BaseIV);
}
- unsigned StartPart = 0;
- unsigned EndPart = 1;
unsigned StartLane = 0;
unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue();
if (State.Instance) {
- StartPart = State.Instance->Part;
- EndPart = StartPart + 1;
StartLane = State.Instance->Lane.getKnownLane();
EndLane = StartLane + 1;
}
- // TODO: Remove loop after VPlan-based unroller lands.
- for (unsigned Part = StartPart; Part < EndPart; ++Part) {
- Value *StartIdx0 =
- createStepForVF(Builder, IntStepTy, State.VF, getUnrollPart(*this));
-
- if (!FirstLaneOnly && State.VF.isScalable()) {
- auto *SplatStartIdx = Builder.CreateVectorSplat(State.VF, StartIdx0);
- auto *InitVec = Builder.CreateAdd(SplatStartIdx, UnitStepVec);
- if (BaseIVTy->isFloatingPointTy())
- InitVec = Builder.CreateSIToFP(InitVec, VecIVTy);
- auto *Mul = Builder.CreateBinOp(MulOp, InitVec, SplatStep);
- auto *Add = Builder.CreateBinOp(AddOp, SplatIV, Mul);
- State.set(this, Add, Part);
- // It's useful to record the lane values too for the known minimum number
- // of elements so we do those below. This improves the code quality when
- // trying to extract the first element, for example.
- }
+ Value *StartIdx0 =
+ createStepForVF(Builder, IntStepTy, State.VF, getUnrollPart(*this));
+ if (!FirstLaneOnly && State.VF.isScalable()) {
+ auto *SplatStartIdx = Builder.CreateVectorSplat(State.VF, StartIdx0);
+ auto *InitVec = Builder.CreateAdd(SplatStartIdx, UnitStepVec);
if (BaseIVTy->isFloatingPointTy())
- StartIdx0 = Builder.CreateSIToFP(StartIdx0, BaseIVTy);
-
- for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) {
- Value *StartIdx = Builder.CreateBinOp(
- AddOp, StartIdx0, getSignedIntOrFpConstant(BaseIVTy, Lane));
- // The step returned by `createStepForVF` is a runtime-evaluated value
- // when VF is scalable. Otherwise, it should be folded into a Constant.
- assert((State.VF.isScalable() || isa<Constant>(StartIdx)) &&
- "Expected StartIdx to be folded to a constant when VF is not "
- "scalable");
- auto *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step);
- auto *Add = Builder.CreateBinOp(AddOp, BaseIV, Mul);
- State.set(this, Add, VPIteration(Part, Lane));
- }
+ InitVec = Builder.CreateSIToFP(InitVec, VecIVTy);
+ auto *Mul = Builder.CreateBinOp(MulOp, InitVec, SplatStep);
+ auto *Add = Builder.CreateBinOp(AddOp, SplatIV, Mul);
+ State.set(this, Add, 0);
+ // It's useful to record the lane values too for the known minimum number
+ // of elements so we do those below. This improves the code quality when
+ // trying to extract the first element, for example.
+ }
+
+ if (BaseIVTy->isFloatingPointTy())
+ StartIdx0 = Builder.CreateSIToFP(StartIdx0, BaseIVTy);
+
+ for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) {
+ Value *StartIdx = Builder.CreateBinOp(
+ AddOp, StartIdx0, getSignedIntOrFpConstant(BaseIVTy, Lane));
+ // The step returned by `createStepForVF` is a runtime-evaluated value
+ // when VF is scalable. Otherwise, it should be folded into a Constant.
+ assert((State.VF.isScalable() || isa<Constant>(StartIdx)) &&
+ "Expected StartIdx to be folded to a constant when VF is not "
+ "scalable");
+ auto *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step);
+ auto *Add = Builder.CreateBinOp(AddOp, BaseIV, Mul);
+ State.set(this, Add, VPIteration(0, Lane));
}
}
@@ -1752,46 +1690,37 @@ void VPWidenGEPRecipe::execute(VPTransformState &State) {
auto *NewGEP =
State.Builder.CreateGEP(GEP->getSourceElementType(), Ops[0],
ArrayRef(Ops).drop_front(), "", isInBounds());
- for (unsigned Part = 0; Part < State.UF; ++Part) {
- Value *EntryPart = State.Builder.CreateVectorSplat(State.VF, NewGEP);
- State.set(this, EntryPart, Part);
- State.addMetadata(EntryPart, GEP);
- }
+ Value *Splat = State.Builder.CreateVectorSplat(State.VF, NewGEP);
+ State.set(this, Splat, 0);
+ State.addMetadata(Splat, GEP);
} else {
// If the GEP has at least one loop-varying operand, we are sure to
- // produce a vector of pointers. But if we are only unrolling, we want
- // to produce a scalar GEP for each unroll part. Thus, the GEP we
- // produce with the code below will be scalar (if VF == 1) or vector
- // (otherwise). Note that for the unroll-only case, we still maintain
- // values in the vector mapping with initVector, as we do for other
- // instructions.
- for (unsigned Part = 0; Part < State.UF; ++Part) {
- // The pointer operand of the new GEP. If it's loop-invariant, we
- // won't broadcast it.
- auto *Ptr = isPointerLoopInvariant()
- ? State.get(getOperand(0), VPIteration(0, 0))
- : State.get(getOperand(0), Part);
-
- // Collect all the indices for the new GEP. If any index is
- // loop-invariant, we won't broadcast it.
- SmallVector<Value *, 4> Indices;
- for (unsigned I = 1, E = getNumOperands(); I < E; I++) {
- VPValue *Operand = getOperand(I);
- if (isIndexLoopInvariant(I - 1))
- Indices.push_back(State.get(Operand, VPIteration(0, 0)));
- else
- Indices.push_back(State.get(Operand, Part));
- }
-
- // Create the new GEP. Note that this GEP may be a scalar if VF == 1,
- // but it should be a vector, otherwise.
- auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr,
- Indices, "", isInBounds());
- assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) &&
- "NewGEP is not a pointer vector");
- State.set(this, NewGEP, Part);
- State.addMetadata(NewGEP, GEP);
+ // produce a vector of pointers unless VF is scalar.
+ // The pointer operand of the new GEP. If it's loop-invariant, we
+ // won't broadcast it.
+ auto *Ptr = isPointerLoopInvariant()
+ ? State.get(getOperand(0), VPIteration(0, 0))
+ : State.get(getOperand(0), 0);
+
+ // Collect all the indices for the new GEP. If any index is
+ // loop-invariant, we won't broadcast it.
+ SmallVector<Value *, 4> Indices;
+ for (unsigned I = 1, E = getNumOperands(); I < E; I++) {
+ VPValue *Operand = getOperand(I);
+ if (isIndexLoopInvariant(I - 1))
+ Indices.push_back(State.get(Operand, VPIteration(0, 0)));
+ else
+ Indices.push_back(State.get(Operand, 0));
}
+
+ // Create the new GEP. Note that this GEP may be a scalar if VF == 1,
+ // but it should be a vector, otherwise.
+ auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr,
+ Indices, "", isInBounds());
+ assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) &&
+ "NewGEP is not a pointer vector");
+ State.set(this, NewGEP, 0);
+ State.addMetadata(NewGEP, GEP);
}
}
@@ -1815,40 +1744,36 @@ void VPVectorPointerRecipe ::execute(VPTransformState &State) {
auto &Builder = State.Builder;
State.setDebugLocFrom(getDebugLoc());
unsigned CurrentPart = getUnrollPart(*this);
- for (unsigned Part = 0; Part < State.UF; ++Part) {
- // Calculate the pointer for the specific unroll-part.
- Value *PartPtr = nullptr;
- // Use i32 for the gep index type when the value is constant,
- // or query DataLayout for a more suitable index type otherwise.
- const DataLayout &DL =
- Builder.GetInsertBlock()->getDataLayout();
- Type *IndexTy = State.VF.isScalable() && (IsReverse || CurrentPart > 0)
- ? DL.getIndexType(IndexedTy->getPointerTo())
- : Builder.getInt32Ty();
- Value *Ptr = State.get(getOperand(0), VPIteration(0, 0));
- bool InBounds = isInBounds();
- if (IsReverse) {
- // If the address is consecutive but reversed, then the
- // wide store needs to start at the last vector element.
- // RunTimeVF = VScale * VF.getKnownMinValue()
- // For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue()
- Value *RunTimeVF = getRuntimeVF(Builder, IndexTy, State.VF);
- // NumElt = -CurrentPart * RunTimeVF
- Value *NumElt = Builder.CreateMul(
- ConstantInt::get(IndexTy, -(int64_t)CurrentPart), RunTimeVF);
- // LastLane = 1 - RunTimeVF
- Value *LastLane =
- Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
- PartPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds);
- PartPtr = Builder.CreateGEP(IndexedTy, PartPtr, LastLane, "", InBounds);
- } else {
- Value *Increment =
- createStepForVF(Builder, IndexTy, State.VF, CurrentPart);
- PartPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds);
- }
-
- State.set(this, PartPtr, Part, /*IsScalar*/ true);
+ // Use i32 for the gep index type when the value is constant,
+ // or query DataLayout for a more suitable index type otherwise.
+ const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout();
+ Type *IndexTy = State.VF.isScalable() && (IsReverse || CurrentPart > 0)
+ ? DL.getIndexType(IndexedTy->getPointerTo())
+ : Builder.getInt32Ty();
+ Value *Ptr = State.get(getOperand(0), VPIteration(0, 0));
+ bool InBounds = isInBounds();
+
+ Value *ResultPtr = nullptr;
+ if (IsReverse) {
+ // If the address is consecutive but reversed, then the
+ // wide store needs to start at the last vector element.
+ // RunTimeVF = VScale * VF.getKnownMinValue()
+ // For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue()
+ Value *RunTimeVF = getRuntimeVF(Builder, IndexTy, State.VF);
+ // NumElt = -CurrentPart * RunTimeVF
+ Value *NumElt = Builder.CreateMul(
+ ConstantInt::get(IndexTy, -(int64_t)CurrentPart), RunTimeVF);
+ // LastLane = 1 - RunTimeVF
+ Value *LastLane =
+ Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
+ ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds);
+ ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "", InBounds);
+ } else {
+ Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart);
+ ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds);
}
+
+ State.set(this, ResultPtr, 0, /*IsScalar*/ true);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -1883,27 +1808,22 @@ void VPBlendRecipe::execute(VPTransformState &State) {
// In0)))
// Note that Mask0 is never used: lanes for which no path reaches this phi and
// are essentially undef are taken from In0.
- VectorParts Entry(State.UF);
bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
+ Value *Result = nullptr;
for (unsigned In = 0; In < NumIncoming; ++In) {
- for (unsigned Part = 0; Part < State.UF; ++Part) {
- // We might have single edge PHIs (blocks) - use an identity
- // 'select' for the first PHI operand.
- Value *In0 = State.get(getIncomingValue(In), Part, OnlyFirstLaneUsed);
- if (In == 0)
- Entry[Part] = In0; // Initialize with the first incoming value.
- else {
- // Select between the current value and the previous incoming edge
- // based on the incoming mask.
- Value *Cond = State.get(getMask(In), Part, OnlyFirstLaneUsed);
- Entry[Part] =
- State.Builder.CreateSelect(Cond, In0, Entry[Part], "predphi");
- }
+ // We might have single edge PHIs (blocks) - use an identity
+ // 'select' for the first PHI operand.
+ Value *In0 = State.get(getIncomingValue(In), 0, OnlyFirstLaneUsed);
+ if (In == 0)
+ Result = In0; // Initialize with the first incoming value.
+ else {
+ // Select between the current value and the previous incoming edge
+ // based on the incoming mask.
+ Value *Cond = State.get(getMask(In), 0, OnlyFirstLaneUsed);
+ Result = State.Builder.CreateSelect(Cond, In0, Result, "predphi");
}
}
-
- for (unsigned Part = 0; Part < State.UF; ++Part)
- State.set(this, Entry[Part], Part, OnlyFirstLaneUsed);
+ State.set(this, Result, 0, OnlyFirstLaneUsed);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -1937,57 +1857,51 @@ void VPReductionRecipe::execute(VPTransformState &State) {
// Propagate the fast-math flags carried by the underlying instruction.
IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);
State.Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
- for (unsigned Part = 0; Part < State.UF; ++Part) {
- Value *NewVecOp = State.get(getVecOp(), Part);
- if (VPValue *Cond = getCondOp()) {
- Value *NewCond = State.get(Cond, Part, State.VF.isScalar());
- VectorType *VecTy = dyn_cast<VectorType>(NewVecOp->getType());
- Type *ElementTy = VecTy ? VecTy->getElementType() : NewVecOp->getType();
-
- Value *Start;
- if (RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind))
- Start = RdxDesc.getRecurrenceStartValue();
- else
- Start = llvm::getRecurrenceIdentity(Kind, ElementTy,
- RdxDesc.getFastMathFlags());
- if (State.VF.isVector())
- Start = State.Builder.CreateVectorSplat(VecTy->getElementCount(),
- Start);
-
- Value *Select = State.Builder.CreateSelect(NewCond, NewVecOp, Start);
- NewVecOp = Select;
- }
- Value *NewRed;
- Value *NextInChain;
- if (IsOrdered) {
- if (State.VF.isVector())
- NewRed = createOrderedReduction(State.Builder, RdxDesc, NewVecOp,
- PrevInChain);
- else
- NewRed = State.Builder.CreateBinOp(
- (Instruction::BinaryOps)RdxDesc.getOpcode(Kind), PrevInChain,
- NewVecOp);
- PrevInChain = NewRed;
- NextInChain = NewRed;
- } else {
- PrevInChain = State.get(getChainOp(), Part, /*IsScalar*/ true);
- NewRed = createReduction(State.Builder, RdxDesc, NewVecOp);
- if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind))
- NextInChain = createMinMaxOp(State.Builder, RdxDesc.getRecurrenceKind(),
- NewRed, PrevInChain);
- else
- NextInChain = State.Builder.CreateBinOp(
- (Instruction::BinaryOps)RdxDesc.getOpcode(Kind), NewRed,
- PrevInChain);
- }
- State.set(this, NextInChain, Part, /*IsScalar*/ true);
+ Value *NewVecOp = State.get(getVecOp(), 0);
+ if (VPValue *Cond = getCondOp()) {
+ Value *NewCond = State.get(Cond, 0, State.VF.isScalar());
+ VectorType *VecTy = dyn_cast<VectorType>(NewVecOp->getType());
+ Type *ElementTy = VecTy ? VecTy->getElementType() : NewVecOp->getType();
+
+ Value *Start;
+ if (RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind))
+ Start = RdxDesc.getRecurrenceStartValue();
+ else
+ Start = llvm::getRecurrenceIdentity(Kind, ElementTy,
+ RdxDesc.getFastMathFlags());
+ if (State.VF.isVector())
+ Start = State.Builder.CreateVectorSplat(VecTy->getElementCount(), Start);
+
+ Value *Select = State.Builder.CreateSelect(NewCond, NewVecOp, Start);
+ NewVecOp = Select;
+ }
+ Value *NewRed;
+ Value *NextInChain;
+ if (IsOrdered) {
+ if (State.VF.isVector())
+ NewRed =
+ createOrderedReduction(State.Builder, RdxDesc, NewVecOp, PrevInChain);
+ else
+ NewRed = State.Builder.CreateBinOp(
+ (Instruction::BinaryOps)RdxDesc.getOpcode(Kind), PrevInChain,
+ NewVecOp);
+ PrevInChain = NewRed;
+ NextInChain = NewRed;
+ } else {
+ PrevInChain = State.get(getChainOp(), 0, /*IsScalar*/ true);
+ NewRed = createReduction(State.Builder, RdxDesc, NewVecOp);
+ if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind))
+ NextInChain = createMinMaxOp(State.Builder, RdxDesc.getRecurrenceKind(),
+ NewRed, PrevInChain);
+ else
+ NextInChain = State.Builder.CreateBinOp(
+ (Instruction::BinaryOps)RdxDesc.getOpcode(Kind), NewRed, PrevInChain);
}
+ State.set(this, NextInChain, 0, /*IsScalar*/ true);
}
void VPReductionEVLRecipe::execute(VPTransformState &State) {
assert(!State.Instance && "Reduction being replicated.");
- assert(State.UF == 1 &&
- "Expected only UF == 1 when vectorizing with explicit vector length.");
auto &Builder = State.Builder;
// Propagate the fast-math flags carried by the underlying instruction.
@@ -2112,18 +2026,7 @@ void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,
}
#endif
-/// Checks if \p C is uniform across all VFs and UFs. It is considered as such
-/// if it is either defined outside the vector region or its operand is known to
-/// be uniform across all VFs and UFs (e.g. VPDerivedIV or VPCanonicalIVPHI).
-/// TODO: Uniformity should be associated with a VPValue and there should be a
-/// generic way to check.
-static bool isUniformAcrossVFsAndUFs(VPScalarCastRecipe *C) {
- return C->isDefinedOutsideLoopRegions() ||
- isa<VPDerivedIVRecipe>(C->getOperand(0)) ||
- isa<VPCanonicalIVPHIRecipe>(C->getOperand(0));
-}
-
-Value *VPScalarCastRecipe ::generate(VPTransformState &State, unsigned Part) {
+Value *VPScalarCastRecipe ::generate(VPTransformState &State) {
assert(vputils::onlyFirstLaneUsed(this) &&
"Codegen only implemented for first lane.");
switch (Opcode) {
@@ -2131,7 +2034,7 @@ Value *VPScalarCastRecipe ::generate(VPTransformState &State, unsigned Part) {
case Instruction::ZExt:
case Instruction::Trunc: {
// Note: SExt/ZExt not used yet.
- Value *Op = State.get(getOperand(0), VPIteration(Part, 0));
+ Value *Op = State.get(getOperand(0), VPIteration(0, 0));
return State.Builder.CreateCast(Instruction::CastOps(Opcode), Op, ResultTy);
}
default:
@@ -2140,17 +2043,7 @@ Value *VPScalarCastRecipe ::generate(VPTransformState &State, unsigned Part) {
}
void VPScalarCastRecipe ::execute(VPTransformState &State) {
- bool IsUniformAcrossVFsAndUFs = isUniformAcrossVFsAndUFs(this);
- for (unsigned Part = 0; Part != State.UF; ++Part) {
- Value *Res;
- // Only generate a single instance, if the recipe is uniform across UFs and
- // VFs.
- if (Part > 0 && IsUniformAcrossVFsAndUFs)
- Res = State.get(this, VPIteration(0, 0));
- else
- Res = generate(State, Part);
- State.set(this, Res, VPIteration(Part, 0));
- }
+ State.set(this, generate(State), VPIteration(0, 0));
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -2167,13 +2060,12 @@ void VPScalarCastRecipe ::print(raw_ostream &O, const Twine &Indent,
void VPBranchOnMaskRecipe::execute(VPTransformState &State) {
assert(State.Instance && "Branch on Mask works only on single instance.");
- unsigned Part = State.Instance->Part;
unsigned Lane = State.Instance->Lane.getKnownLane();
Value *ConditionBit = nullptr;
VPValue *BlockInMask = getMask();
if (BlockInMask) {
- ConditionBit = State.get(BlockInMask, Part);
+ ConditionBit = State.get(BlockInMask, 0);
if (ConditionBit->getType()->isVectorTy())
ConditionBit = State.Builder.CreateExtractElement(
ConditionBit, State.Builder.getInt32(Lane));
@@ -2206,20 +2098,19 @@ void VPPredInstPHIRecipe::execute(VPTransformState &State) {
// needed. In this case the recipe of the predicated instruction is marked to
// also do that packing, thereby "hoisting" the insert-element sequence.
// Otherwise, a phi node for the scalar value is needed.
- unsigned Part = State.Instance->Part;
- if (State.hasVectorValue(getOperand(0), Part)) {
- Value *VectorValue = State.get(getOperand(0), Part);
+ if (State.hasVectorValue(getOperand(0), 0)) {
+ Value *VectorValue = State.get(getOperand(0), 0);
InsertElementInst *IEI = cast<InsertElementInst>(VectorValue);
PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2);
VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector.
VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element.
- if (State.hasVectorValue(this, Part))
- State.reset(this, VPhi, Part);
+ if (State.hasVectorValue(this, 0))
+ State.reset(this, VPhi, 0);
else
- State.set(this, VPhi, Part);
+ State.set(this, VPhi, 0);
// NOTE: Currently we need to update the value of the operand, so the next
// predicated iteration inserts its generated value in the correct vector.
- State.reset(getOperand(0), VPhi, Part);
+ State.reset(getOperand(0), VPhi, 0);
} else {
Type *PredInstType = getOperand(0)->getUnderlyingValue()->getType();
PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2);
@@ -2295,34 +2186,32 @@ void VPWidenLoadRecipe::execute(VPTransformState &State) {
auto &Builder = State.Builder;
State.setDebugLocFrom(getDebugLoc());
- for (unsigned Part = 0; Part < State.UF; ++Part) {
- Value *NewLI;
- Value *Mask = nullptr;
- if (auto *VPMask = getMask()) {
- // Mask reversal is only needed for non-all-one (null) masks, as reverse
- // of a null all-one mask is a null mask.
- Mask = State.get(VPMask, Part);
- if (isReverse())
- Mask = Builder.CreateVectorReverse(Mask, "reverse");
- }
+ Value *Mask = nullptr;
+ if (auto *VPMask = getMask()) {
+ // Mask reversal is only needed for non-all-one (null) masks, as reverse
+ // of a null all-one mask is a null mask.
+ Mask = State.get(VPMask, 0);
+ if (isReverse())
+ Mask = Builder.CreateVectorReverse(Mask, "reverse");
+ }
- Value *Addr = State.get(getAddr(), Part, /*IsScalar*/ !CreateGather);
- if (CreateGather) {
- NewLI = Builder.CreateMaskedGather(DataTy, Addr, Alignment, Mask, nullptr,
- "wide.masked.gather");
- } else if (Mask) {
- NewLI = Builder.CreateMaskedLoad(DataTy, Addr, Alignment, Mask,
- PoisonValue::get(DataTy),
- "wide.masked.load");
- } else {
- NewLI = Builder.CreateAlignedLoad(DataTy, Addr, Alignment, "wide.load");
- }
- // Add metadata to the load, but setVectorValue to the reverse shuffle.
- State.addMetadata(NewLI, LI);
- if (Reverse)
- NewLI = Builder.CreateVectorReverse(NewLI, "reverse");
- State.set(this, NewLI, Part);
+ Value *Addr = State.get(getAddr(), 0, /*IsScalar*/ !CreateGather);
+ Value *NewLI;
+ if (CreateGather) {
+ NewLI = Builder.CreateMaskedGather(DataTy, Addr, Alignment, Mask, nullptr,
+ "wide.masked.gather");
+ } else if (Mask) {
+ NewLI =
+ Builder.CreateMaskedLoad(DataTy, Addr, Alignment, Mask,
+ PoisonValue::get(DataTy), "wide.masked.load");
+ } else {
+ NewLI = Builder.CreateAlignedLoad(DataTy, Addr, Alignment, "wide.load");
}
+ // Add metadata to the load, but setVectorValue to the reverse shuffle.
+ State.addMetadata(NewLI, LI);
+ if (Reverse)
+ NewLI = Builder.CreateVectorReverse(NewLI, "reverse");
+ State.set(this, NewLI, 0);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -2347,8 +2236,6 @@ static Instruction *createReverseEVL(IRBuilderBase &Builder, Value *Operand,
}
void VPWidenLoadEVLRecipe::execute(VPTransformState &State) {
- assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with "
- "explicit vector length.");
auto *LI = cast<LoadInst>(&Ingredient);
Type *ScalarDataTy = getLoadStoreType(&Ingredient);
@@ -2409,34 +2296,32 @@ void VPWidenStoreRecipe::execute(VPTransformState &State) {
auto &Builder = State.Builder;
State.setDebugLocFrom(getDebugLoc());
- for (unsigned Part = 0; Part < State.UF; ++Part) {
- Instruction *NewSI = nullptr;
- Value *Mask = nullptr;
- if (auto *VPMask = getMask()) {
- // Mask reversal is only needed for non-all-one (null) masks, as reverse
- // of a null all-one mask is a null mask.
- Mask = State.get(VPMask, Part);
- if (isReverse())
- Mask = Builder.CreateVectorReverse(Mask, "reverse");
- }
-
- Value *StoredVal = State.get(StoredVPValue, Part);
- if (isReverse()) {
- // If we store to reverse consecutive memory locations, then we need
- // to reverse the order of elements in the stored value.
- StoredVal = Builder.CreateVectorReverse(StoredVal, "reverse");
- // We don't want to update the value in the map as it might be used in
- // another expression. So don't call resetVectorValue(StoredVal).
- }
- Value *Addr = State.get(getAddr(), Part, /*IsScalar*/ !CreateScatter);
- if (CreateScatter)
- NewSI = Builder.CreateMaskedScatter(StoredVal, Addr, Alignment, Mask);
- else if (Mask)
- NewSI = Builder.CreateMaskedStore(StoredVal, Addr, Alignment, Mask);
- else
- NewSI = Builder.CreateAlignedStore(StoredVal, Addr, Alignment);
- State.addMetadata(NewSI, SI);
- }
+ Value *Mask = nullptr;
+ if (auto *VPMask = getMask()) {
+ // Mask reversal is only needed for non-all-one (null) masks, as reverse
+ // of a null all-one mask is a null mask.
+ Mask = State.get(VPMask, 0);
+ if (isReverse())
+ Mask = Builder.CreateVectorReverse(Mask, "reverse");
+ }
+
+ Value *StoredVal = State.get(StoredVPValue, 0);
+ if (isReverse()) {
+ // If we store to reverse consecutive memory locations, then we need
+ // to reverse the order of elements in the stored value.
+ StoredVal = Builder.CreateVectorReverse(StoredVal, "reverse");
+ // We don't want to update the value in the map as it might be used in
+ // another expression. So don't call resetVectorValue(StoredVal).
+ }
+ Value *Addr = State.get(getAddr(), 0, /*IsScalar*/ !CreateScatter);
+ Instruction *NewSI = nullptr;
+ if (CreateScatter)
+ NewSI = Builder.CreateMaskedScatter(StoredVal, Addr, Alignment, Mask);
+ else if (Mask)
+ NewSI = Builder.CreateMaskedStore(StoredVal, Addr, Alignment, Mask);
+ else
+ NewSI = Builder.CreateAlignedStore(StoredVal, Addr, Alignment);
+ State.addMetadata(NewSI, SI);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -2448,8 +2333,6 @@ void VPWidenStoreRecipe::print(raw_ostream &O, const Twine &Indent,
#endif
void VPWidenStoreEVLRecipe::execute(VPTransformState &State) {
- assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with "
- "explicit vector length.");
auto *SI = cast<StoreInst>(&Ingredient);
VPValue *StoredValue = getStoredValue();
@@ -2597,7 +2480,6 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
auto *VecTy = VectorType::get(ScalarTy, State.VF * InterleaveFactor);
// Prepare for the new pointers.
- SmallVector<Value *, 2> AddrParts;
unsigned Index = Group->getIndex(Instr);
// TODO: extend the masked interleaved-group support to reversed access.
@@ -2624,41 +2506,38 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
Idx = State.Builder.getInt32(-Index);
VPValue *Addr = getAddr();
- for (unsigned Part = 0; Part < State.UF; Part++) {
- Value *AddrPart = State.get(Addr, VPIteration(Part, 0));
- if (auto *I = dyn_cast<Instruction>(AddrPart))
- State.setDebugLocFrom(I->getDebugLoc());
-
- // Notice current instruction could be any index. Need to adjust the address
- // to the member of index 0.
- //
- // E.g. a = A[i+1]; // Member of index 1 (Current instruction)
- // b = A[i]; // Member of index 0
- // Current pointer is pointed to A[i+1], adjust it to A[i].
- //
- // E.g. A[i+1] = a; // Member of index 1
- // A[i] = b; // Member of index 0
- // A[i+2] = c; // Member of index 2 (Current instruction)
- // Current pointer is pointed to A[i+2], adjust it to A[i].
-
- bool InBounds = false;
- if (auto *gep = dyn_cast<GetElementPtrInst>(AddrPart->stripPointerCasts()))
- InBounds = gep->isInBounds();
- AddrPart = State.Builder.CreateGEP(ScalarTy, AddrPart, Idx, "", InBounds);
- AddrParts.push_back(AddrPart);
- }
+ Value *ResAddr = State.get(Addr, VPIteration(0, 0));
+ if (auto *I = dyn_cast<Instruction>(ResAddr))
+ State.setDebugLocFrom(I->getDebugLoc());
+
+ // Notice current instruction could be any index. Need to adjust the address
+ // to the member of index 0.
+ //
+ // E.g. a = A[i+1]; // Member of index 1 (Current instruction)
+ // b = A[i]; // Member of index 0
+ // Current pointer is pointed to A[i+1], adjust it to A[i].
+ //
+ // E.g. A[i+1] = a; // Member of index 1
+ // A[i] = b; // Member of index 0
+ // A[i+2] = c; // Member of index 2 (Current instruction)
+ // Current pointer is pointed to A[i+2], adjust it to A[i].
+
+ bool InBounds = false;
+ if (auto *gep = dyn_cast<GetElementPtrInst>(ResAddr->stripPointerCasts()))
+ InBounds = gep->isInBounds();
+ ResAddr = State.Builder.CreateGEP(ScalarTy, ResAddr, Idx, "", InBounds);
State.setDebugLocFrom(Instr->getDebugLoc());
Value *PoisonVec = PoisonValue::get(VecTy);
- auto CreateGroupMask = [&BlockInMask, &State, &InterleaveFactor](
- unsigned Part, Value *MaskForGaps) -> Value * {
+ auto CreateGroupMask = [&BlockInMask, &State,
+ &InterleaveFactor](Value *MaskForGaps) -> Value * {
if (State.VF.isScalable()) {
assert(!MaskForGaps && "Interleaved groups with gaps are not supported.");
assert(InterleaveFactor == 2 &&
"Unsupported deinterleave factor for scalable vectors");
- auto *BlockInMaskPart = State.get(BlockInMask, Part);
- SmallVector<Value *, 2> Ops = {BlockInMaskPart, BlockInMaskPart};
+ auto *ResBlockInMask = State.get(BlockInMask, 0);
+ SmallVector<Value *, 2> Ops = {ResBlockInMask, ResBlockInMask};
auto *MaskTy = VectorType::get(State.Builder.getInt1Ty(),
State.VF.getKnownMinValue() * 2, true);
return State.Builder.CreateIntrinsic(
@@ -2669,9 +2548,9 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
if (!BlockInMask)
return MaskForGaps;
- Value *BlockInMaskPart = State.get(BlockInMask, Part);
+ Value *ResBlockInMask = State.get(BlockInMask, 0);
Value *ShuffledMask = State.Builder.CreateShuffleVector(
- BlockInMaskPart,
+ ResBlockInMask,
createReplicatedMask(InterleaveFactor, State.VF.getKnownMinValue()),
"interleaved.mask");
return MaskForGaps ? State.Builder.CreateBinOp(Instruction::And,
@@ -2689,21 +2568,16 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
assert(MaskForGaps && "Mask for Gaps is required but it is null");
}
- // For each unroll part, create a wide load for the group.
- SmallVector<Value *, 2> NewLoads;
- for (unsigned Part = 0; Part < State.UF; Part++) {
- Instruction *NewLoad;
- if (BlockInMask || MaskForGaps) {
- Value *GroupMask = CreateGroupMask(Part, MaskForGaps);
- NewLoad = State.Builder.CreateMaskedLoad(VecTy, AddrParts[Part],
- Group->getAlign(), GroupMask,
- PoisonVec, "wide.masked.vec");
- } else
- NewLoad = State.Builder.CreateAlignedLoad(
- VecTy, AddrParts[Part], Group->getAlign(), "wide.vec");
- Group->addMetadata(NewLoad);
- NewLoads.push_back(NewLoad);
- }
+ Instruction *NewLoad;
+ if (BlockInMask || MaskForGaps) {
+ Value *GroupMask = CreateGroupMask(MaskForGaps);
+ NewLoad = State.Builder.CreateMaskedLoad(VecTy, ResAddr,
+ Group->getAlign(), GroupMask,
+ PoisonVec, "wide.masked.vec");
+ } else
+ NewLoad = State.Builder.CreateAlignedLoad(VecTy, ResAddr,
+ Group->getAlign(), "wide.vec");
+ Group->addMetadata(NewLoad);
ArrayRef<VPValue *> VPDefs = definedValues();
const DataLayout &DL = State.CFG.PrevBB->getDataLayout();
@@ -2711,34 +2585,31 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
assert(InterleaveFactor == 2 &&
"Unsupported deinterleave factor for scalable vectors");
- for (unsigned Part = 0; Part < State.UF; ++Part) {
// Scalable vectors cannot use arbitrary shufflevectors (only splats),
// so must use intrinsics to deinterleave.
- Value *DI = State.Builder.CreateIntrinsic(
- Intrinsic::vector_deinterleave2, VecTy, NewLoads[Part],
- /*FMFSource=*/nullptr, "strided.vec");
- unsigned J = 0;
- for (unsigned I = 0; I < InterleaveFactor; ++I) {
- Instruction *Member = Group->getMember(I);
-
- if (!Member)
- continue;
-
- Value *StridedVec = State.Builder.CreateExtractValue(DI, I);
- // If this member has
diff erent type, cast the result type.
- if (Member->getType() != ScalarTy) {
- VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF);
- StridedVec =
- createBitOrPointerCast(State.Builder, StridedVec, OtherVTy, DL);
- }
-
- if (Group->isReverse())
- StridedVec =
- State.Builder.CreateVectorReverse(StridedVec, "reverse");
-
- State.set(VPDefs[J], StridedVec, Part);
- ++J;
+ Value *DI = State.Builder.CreateIntrinsic(
+ Intrinsic::vector_deinterleave2, VecTy, NewLoad,
+ /*FMFSource=*/nullptr, "strided.vec");
+ unsigned J = 0;
+ for (unsigned I = 0; I < InterleaveFactor; ++I) {
+ Instruction *Member = Group->getMember(I);
+
+ if (!Member)
+ continue;
+
+ Value *StridedVec = State.Builder.CreateExtractValue(DI, I);
+ // If this member has
diff erent type, cast the result type.
+ if (Member->getType() != ScalarTy) {
+ VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF);
+ StridedVec =
+ createBitOrPointerCast(State.Builder, StridedVec, OtherVTy, DL);
}
+
+ if (Group->isReverse())
+ StridedVec = State.Builder.CreateVectorReverse(StridedVec, "reverse");
+
+ State.set(VPDefs[J], StridedVec, 0);
+ ++J;
}
return;
@@ -2756,23 +2627,21 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
auto StrideMask =
createStrideMask(I, InterleaveFactor, State.VF.getKnownMinValue());
- for (unsigned Part = 0; Part < State.UF; Part++) {
- Value *StridedVec = State.Builder.CreateShuffleVector(
- NewLoads[Part], StrideMask, "strided.vec");
-
- // If this member has
diff erent type, cast the result type.
- if (Member->getType() != ScalarTy) {
- assert(!State.VF.isScalable() && "VF is assumed to be non scalable.");
- VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF);
- StridedVec =
- createBitOrPointerCast(State.Builder, StridedVec, OtherVTy, DL);
- }
+ Value *StridedVec =
+ State.Builder.CreateShuffleVector(NewLoad, StrideMask, "strided.vec");
+
+ // If this member has
diff erent type, cast the result type.
+ if (Member->getType() != ScalarTy) {
+ assert(!State.VF.isScalable() && "VF is assumed to be non scalable.");
+ VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF);
+ StridedVec =
+ createBitOrPointerCast(State.Builder, StridedVec, OtherVTy, DL);
+ }
- if (Group->isReverse())
- StridedVec = State.Builder.CreateVectorReverse(StridedVec, "reverse");
+ if (Group->isReverse())
+ StridedVec = State.Builder.CreateVectorReverse(StridedVec, "reverse");
- State.set(VPDefs[J], StridedVec, Part);
- }
+ State.set(VPDefs[J], StridedVec, 0);
++J;
}
return;
@@ -2787,50 +2656,47 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
assert((!MaskForGaps || !State.VF.isScalable()) &&
"masking gaps for scalable vectors is not yet supported.");
ArrayRef<VPValue *> StoredValues = getStoredValues();
- for (unsigned Part = 0; Part < State.UF; Part++) {
- // Collect the stored vector from each member.
- SmallVector<Value *, 4> StoredVecs;
- unsigned StoredIdx = 0;
- for (unsigned i = 0; i < InterleaveFactor; i++) {
- assert((Group->getMember(i) || MaskForGaps) &&
- "Fail to get a member from an interleaved store group");
- Instruction *Member = Group->getMember(i);
-
- // Skip the gaps in the group.
- if (!Member) {
- Value *Undef = PoisonValue::get(SubVT);
- StoredVecs.push_back(Undef);
- continue;
- }
+ // Collect the stored vector from each member.
+ SmallVector<Value *, 4> StoredVecs;
+ unsigned StoredIdx = 0;
+ for (unsigned i = 0; i < InterleaveFactor; i++) {
+ assert((Group->getMember(i) || MaskForGaps) &&
+ "Fail to get a member from an interleaved store group");
+ Instruction *Member = Group->getMember(i);
+
+ // Skip the gaps in the group.
+ if (!Member) {
+ Value *Undef = PoisonValue::get(SubVT);
+ StoredVecs.push_back(Undef);
+ continue;
+ }
- Value *StoredVec = State.get(StoredValues[StoredIdx], Part);
- ++StoredIdx;
+ Value *StoredVec = State.get(StoredValues[StoredIdx], 0);
+ ++StoredIdx;
- if (Group->isReverse())
- StoredVec = State.Builder.CreateVectorReverse(StoredVec, "reverse");
+ if (Group->isReverse())
+ StoredVec = State.Builder.CreateVectorReverse(StoredVec, "reverse");
- // If this member has
diff erent type, cast it to a unified type.
+ // If this member has
diff erent type, cast it to a unified type.
- if (StoredVec->getType() != SubVT)
- StoredVec = createBitOrPointerCast(State.Builder, StoredVec, SubVT, DL);
+ if (StoredVec->getType() != SubVT)
+ StoredVec = createBitOrPointerCast(State.Builder, StoredVec, SubVT, DL);
- StoredVecs.push_back(StoredVec);
- }
+ StoredVecs.push_back(StoredVec);
+ }
- // Interleave all the smaller vectors into one wider vector.
- Value *IVec =
- interleaveVectors(State.Builder, StoredVecs, "interleaved.vec");
- Instruction *NewStoreInstr;
- if (BlockInMask || MaskForGaps) {
- Value *GroupMask = CreateGroupMask(Part, MaskForGaps);
- NewStoreInstr = State.Builder.CreateMaskedStore(
- IVec, AddrParts[Part], Group->getAlign(), GroupMask);
- } else
- NewStoreInstr = State.Builder.CreateAlignedStore(IVec, AddrParts[Part],
- Group->getAlign());
+ // Interleave all the smaller vectors into one wider vector.
+ Value *IVec = interleaveVectors(State.Builder, StoredVecs, "interleaved.vec");
+ Instruction *NewStoreInstr;
+ if (BlockInMask || MaskForGaps) {
+ Value *GroupMask = CreateGroupMask(MaskForGaps);
+ NewStoreInstr = State.Builder.CreateMaskedStore(
+ IVec, ResAddr, Group->getAlign(), GroupMask);
+ } else
+ NewStoreInstr =
+ State.Builder.CreateAlignedStore(IVec, ResAddr, Group->getAlign());
- Group->addMetadata(NewStoreInstr);
- }
+ Group->addMetadata(NewStoreInstr);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -2866,14 +2732,13 @@ void VPInterleaveRecipe::print(raw_ostream &O, const Twine &Indent,
void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) {
Value *Start = getStartValue()->getLiveInIRValue();
- PHINode *EntryPart = PHINode::Create(Start->getType(), 2, "index");
- EntryPart->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
+ PHINode *Phi = PHINode::Create(Start->getType(), 2, "index");
+ Phi->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
- EntryPart->addIncoming(Start, VectorPH);
- EntryPart->setDebugLoc(getDebugLoc());
- for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
- State.set(this, EntryPart, Part, /*IsScalar*/ true);
+ Phi->addIncoming(Start, VectorPH);
+ Phi->setDebugLoc(getDebugLoc());
+ State.set(this, Phi, 0, /*IsScalar*/ true);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -2964,28 +2829,25 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) {
NewPointerPhi->addIncoming(InductionGEP, VectorPH);
}
- // Create UF many actual address geps that use the pointer
- // phi as base and a vectorized version of the step value
- // (<step*0, ..., step*N>) as offset.
- for (unsigned Part = 0; Part < State.UF; ++Part) {
- Type *VecPhiType = VectorType::get(PhiType, State.VF);
- Value *StartOffsetScalar = State.Builder.CreateMul(
- RuntimeVF, ConstantInt::get(PhiType, CurrentPart));
- Value *StartOffset =
- State.Builder.CreateVectorSplat(State.VF, StartOffsetScalar);
- // Create a vector of consecutive numbers from zero to VF.
- StartOffset = State.Builder.CreateAdd(
- StartOffset, State.Builder.CreateStepVector(VecPhiType));
-
- assert(ScalarStepValue == State.get(getOperand(1), VPIteration(Part, 0)) &&
- "scalar step must be the same across all parts");
- Value *GEP = State.Builder.CreateGEP(
- State.Builder.getInt8Ty(), NewPointerPhi,
- State.Builder.CreateMul(StartOffset, State.Builder.CreateVectorSplat(
- State.VF, ScalarStepValue)),
- "vector.gep");
- State.set(this, GEP, Part);
- }
+ // Create actual address geps that use the pointer phi as base and a
+ // vectorized version of the step value (<step*0, ..., step*N>) as offset.
+ Type *VecPhiType = VectorType::get(PhiType, State.VF);
+ Value *StartOffsetScalar = State.Builder.CreateMul(
+ RuntimeVF, ConstantInt::get(PhiType, CurrentPart));
+ Value *StartOffset =
+ State.Builder.CreateVectorSplat(State.VF, StartOffsetScalar);
+ // Create a vector of consecutive numbers from zero to VF.
+ StartOffset = State.Builder.CreateAdd(
+ StartOffset, State.Builder.CreateStepVector(VecPhiType));
+
+ assert(ScalarStepValue == State.get(getOperand(1), VPIteration(0, 0)) &&
+ "scalar step must be the same across all parts");
+ Value *GEP = State.Builder.CreateGEP(
+ State.Builder.getInt8Ty(), NewPointerPhi,
+ State.Builder.CreateMul(StartOffset, State.Builder.CreateVectorSplat(
+ State.VF, ScalarStepValue)),
+ "vector.gep");
+ State.set(this, GEP, 0);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -3017,8 +2879,7 @@ void VPExpandSCEVRecipe::execute(VPTransformState &State) {
assert(!State.ExpandedSCEVs.contains(Expr) &&
"Same SCEV expanded multiple times");
State.ExpandedSCEVs[Expr] = Res;
- for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
- State.set(this, Res, {Part, 0});
+ State.set(this, Res, {0, 0});
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -3038,16 +2899,14 @@ void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) {
Value *VStart = VF.isScalar()
? CanonicalIV
: Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast");
- for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
- Value *VStep = createStepForVF(Builder, STy, VF, getUnrollPart(*this));
- if (VF.isVector()) {
- VStep = Builder.CreateVectorSplat(VF, VStep);
- VStep =
- Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType()));
- }
- Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");
- State.set(this, CanonicalVectorIV, Part);
+ Value *VStep = createStepForVF(Builder, STy, VF, getUnrollPart(*this));
+ if (VF.isVector()) {
+ VStep = Builder.CreateVectorSplat(VF, VStep);
+ VStep =
+ Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType()));
}
+ Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");
+ State.set(this, CanonicalVectorIV, 0);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -3082,10 +2941,10 @@ void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) {
}
// Create a phi node for the new recurrence.
- PHINode *EntryPart = PHINode::Create(VecTy, 2, "vector.recur");
- EntryPart->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
- EntryPart->addIncoming(VectorInit, VectorPH);
- State.set(this, EntryPart, 0);
+ PHINode *Phi = PHINode::Create(VecTy, 2, "vector.recur");
+ Phi->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
+ Phi->addIncoming(VectorInit, VectorPH);
+ State.set(this, Phi, 0);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -3117,12 +2976,9 @@ void VPReductionPHIRecipe::execute(VPTransformState &State) {
BasicBlock *HeaderBB = State.CFG.PrevBB;
assert(State.CurrentVectorLoop->getHeader() == HeaderBB &&
"recipe must be in the vector loop header");
- unsigned LastPartForNewPhi = isOrdered() ? 1 : State.UF;
- for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
- Instruction *EntryPart = PHINode::Create(VecTy, 2, "vec.phi");
- EntryPart->insertBefore(HeaderBB->getFirstInsertionPt());
- State.set(this, EntryPart, Part, IsInLoop);
- }
+ auto *Phi = PHINode::Create(VecTy, 2, "vec.phi");
+ Phi->insertBefore(HeaderBB->getFirstInsertionPt());
+ State.set(this, Phi, 0, IsInLoop);
BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
@@ -3160,13 +3016,9 @@ void VPReductionPHIRecipe::execute(VPTransformState &State) {
}
}
- for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
- Value *EntryPart = State.get(this, Part, IsInLoop);
- // Make sure to add the reduction start value only to the
- // first unroll part.
- Value *StartVal = (CurrentPart == 0) ? StartV : Iden;
- cast<PHINode>(EntryPart)->addIncoming(StartVal, VectorPH);
- }
+ Phi = cast<PHINode>(State.get(this, 0, IsInLoop));
+ Value *StartVal = (CurrentPart == 0) ? StartV : Iden;
+ Phi->addIncoming(StartVal, VectorPH);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -3215,14 +3067,12 @@ void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent,
// remove VPActiveLaneMaskPHIRecipe.
void VPActiveLaneMaskPHIRecipe::execute(VPTransformState &State) {
BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
- for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
- Value *StartMask = State.get(getOperand(0), Part);
- PHINode *EntryPart =
- State.Builder.CreatePHI(StartMask->getType(), 2, "active.lane.mask");
- EntryPart->addIncoming(StartMask, VectorPH);
- EntryPart->setDebugLoc(getDebugLoc());
- State.set(this, EntryPart, Part);
- }
+ Value *StartMask = State.get(getOperand(0), 0);
+ PHINode *Phi =
+ State.Builder.CreatePHI(StartMask->getType(), 2, "active.lane.mask");
+ Phi->addIncoming(StartMask, VectorPH);
+ Phi->setDebugLoc(getDebugLoc());
+ State.set(this, Phi, 0);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -3238,13 +3088,11 @@ void VPActiveLaneMaskPHIRecipe::print(raw_ostream &O, const Twine &Indent,
void VPEVLBasedIVPHIRecipe::execute(VPTransformState &State) {
BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
- assert(State.UF == 1 && "Expected unroll factor 1 for VP vectorization.");
Value *Start = State.get(getOperand(0), VPIteration(0, 0));
- PHINode *EntryPart =
- State.Builder.CreatePHI(Start->getType(), 2, "evl.based.iv");
- EntryPart->addIncoming(Start, VectorPH);
- EntryPart->setDebugLoc(getDebugLoc());
- State.set(this, EntryPart, 0, /*IsScalar=*/true);
+ PHINode *Phi = State.Builder.CreatePHI(Start->getType(), 2, "evl.based.iv");
+ Phi->addIncoming(Start, VectorPH);
+ Phi->setDebugLoc(getDebugLoc());
+ State.set(this, Phi, 0, /*IsScalar=*/true);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
More information about the llvm-commits
mailing list