[llvm] [LoopVectorize] Generate wide active lane masks (PR #147535)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 14 12:49:38 PDT 2025
================
@@ -1432,20 +1433,93 @@ static bool isConditionTrueViaVFAndUF(VPValue *Cond, VPlan &Plan,
return SE.isKnownPredicate(CmpInst::ICMP_EQ, TripCount, C);
}
+static void extractFromWideActiveLaneMask(VPlan &Plan, ElementCount VF,
+ unsigned UF) {
+ VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion();
+ auto *Header = cast<VPBasicBlock>(VectorRegion->getEntry());
+ VPBasicBlock *ExitingVPBB = VectorRegion->getExitingBasicBlock();
+ auto *Term = &ExitingVPBB->back();
+
+ VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV();
+ LLVMContext &Ctx = CanonicalIV->getScalarType()->getContext();
+ using namespace llvm::VPlanPatternMatch;
+
+ auto extractFromALM = [&](VPInstruction *ALM, VPInstruction *InsBefore,
+ SmallVectorImpl<VPValue *> &Extracts) {
+ VPBuilder Builder(InsBefore);
+ DebugLoc DL = ALM->getDebugLoc();
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ SmallVector<VPValue *> Ops;
+ Ops.append({ALM, Plan.getOrAddLiveIn(
+ ConstantInt::get(IntegerType::getInt64Ty(Ctx),
+ VF.getKnownMinValue() * Part))});
+ Extracts.push_back(
+ Builder.createNaryOp(VPInstruction::ExtractSubvector, Ops, DL));
+ }
+ };
+
+ // Create a list of each active lane mask phi, ordered by unroll part.
+ SmallVector<VPActiveLaneMaskPHIRecipe *> Phis(UF, nullptr);
+ for (VPRecipeBase &R : Header->phis())
+ if (auto *Phi = dyn_cast<VPActiveLaneMaskPHIRecipe>(&R))
+ Phis[Phi->getUnrollPart()] = Phi;
+
+ assert(all_of(Phis, [](VPActiveLaneMaskPHIRecipe *Phi) { return Phi; }) &&
+ "Expected one VPActiveLaneMaskPHIRecipe for each unroll part");
+
+ // When using wide lane masks, the return type of the get.active.lane.mask
+ // intrinsic is VF x UF (second operand).
+ VPValue *ALMMultiplier =
+ Plan.getOrAddLiveIn(ConstantInt::get(IntegerType::getInt64Ty(Ctx), UF));
+ cast<VPInstruction>(Phis[0]->getStartValue())->setOperand(2, ALMMultiplier);
+ cast<VPInstruction>(Phis[0]->getBackedgeValue())
+ ->setOperand(2, ALMMultiplier);
+
+ // Create UF x extract vectors and insert into preheader.
+ SmallVector<VPValue *> EntryExtracts;
+ auto *EntryALM = cast<VPInstruction>(Phis[0]->getStartValue());
+ extractFromALM(EntryALM, cast<VPInstruction>(&EntryALM->getParent()->back()),
+ EntryExtracts);
+
+ // Create UF x extract vectors and insert before the loop compare & branch,
+ // updating the compare to use the first extract.
+ SmallVector<VPValue *> LoopExtracts;
+ auto *LoopALM = cast<VPInstruction>(Phis[0]->getBackedgeValue());
+ VPInstruction *Not = cast<VPInstruction>(Term->getOperand(0));
+ extractFromALM(LoopALM, Not, LoopExtracts);
+ Not->setOperand(0, LoopExtracts[0]);
+
+ // Update the incoming values of active lane mask phis.
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ Phis[Part]->setStartValue(EntryExtracts[Part]);
+ Phis[Part]->setBackedgeValue(LoopExtracts[Part]);
+ }
+
+ return;
+}
+
/// Try to simplify the branch condition of \p Plan. This may restrict the
/// resulting plan to \p BestVF and \p BestUF.
-static bool simplifyBranchConditionForVFAndUF(VPlan &Plan, ElementCount BestVF,
- unsigned BestUF,
- PredicatedScalarEvolution &PSE) {
+static bool
+simplifyBranchConditionForVFAndUF(VPlan &Plan, ElementCount BestVF,
+ unsigned BestUF,
+ PredicatedScalarEvolution &PSE,
+ bool DataAndControlFlowWithoutRuntimeCheck) {
VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion();
VPBasicBlock *ExitingVPBB = VectorRegion->getExitingBasicBlock();
auto *Term = &ExitingVPBB->back();
VPValue *Cond;
ScalarEvolution &SE = *PSE.getSE();
using namespace llvm::VPlanPatternMatch;
- if (match(Term, m_BranchOnCount(m_VPValue(), m_VPValue())) ||
- match(Term, m_BranchOnCond(
- m_Not(m_ActiveLaneMask(m_VPValue(), m_VPValue()))))) {
+ auto *Header = cast<VPBasicBlock>(VectorRegion->getEntry());
+ bool BranchALM = match(Term, m_BranchOnCond(m_Not(m_ActiveLaneMask(
+ m_VPValue(), m_VPValue(), m_VPValue()))));
+
+ if (BranchALM || match(Term, m_BranchOnCount(m_VPValue(), m_VPValue()))) {
+ if (BranchALM && DataAndControlFlowWithoutRuntimeCheck &&
+ EnableWideActiveLaneMask && BestVF.isVector() && BestUF > 1)
+ extractFromWideActiveLaneMask(Plan, BestVF, BestUF);
----------------
fhahn wrote:
is there any benefit from having this here? It doesn't seem to fit here, as it does not simplify the branch condition directly?
https://github.com/llvm/llvm-project/pull/147535
More information about the llvm-commits
mailing list