[llvm] [LV, VP]VP intrinsics support for the Loop Vectorizer + adding new tail-folding mode using EVL. (PR #76172)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 7 14:08:02 PST 2024
================
@@ -1201,20 +1240,75 @@ void VPlanTransforms::addActiveLaneMask(
// Walk users of WideCanonicalIV and replace all compares of the form
// (ICMP_ULE, WideCanonicalIV, backedge-taken-count) with an
// active-lane-mask.
- VPValue *BTC = Plan.getOrCreateBackedgeTakenCount();
- for (VPUser *U : SmallVector<VPUser *>(WideCanonicalIV->users())) {
- auto *CompareToReplace = dyn_cast<VPInstruction>(U);
- if (!CompareToReplace ||
- CompareToReplace->getOpcode() != Instruction::ICmp ||
- CompareToReplace->getPredicate() != CmpInst::ICMP_ULE ||
- CompareToReplace->getOperand(1) != BTC)
- continue;
+ replaceHeaderPredicateWithIdiom(Plan, *LaneMask);
+}
- assert(CompareToReplace->getOperand(0) == WideCanonicalIV &&
- "WidenCanonicalIV must be the first operand of the compare");
- CompareToReplace->replaceAllUsesWith(LaneMask);
- CompareToReplace->eraseFromParent();
+/// Add a VPEVLBasedIVPHIRecipe and related recipes to \p Plan and
+/// replaces all uses except the canonical IV increment of
+/// VPCanonicalIVPHIRecipe with a VPEVLBasedIVPHIRecipe. VPCanonicalIVPHIRecipe
+/// is used only for loop iterations counting after this transformation.
+///
+/// The function uses the following definitions:
+/// %StartV is the canonical induction start value.
+///
+/// The function adds the following recipes:
+///
+/// vector.ph:
+/// ...
+///
+/// vector.body:
+/// ...
+/// %P = EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI [ %StartV, %vector.ph ], [
+/// %NextEVL, %vector.body ] %EVL = EXPLICIT-VECTOR-LENGTH %P, original TC
+/// ...
+/// %NextEVL = EXPLICIT-VECTOR-LENGTH + %P, %EVL
+/// ...
+///
+void VPlanTransforms::addExplicitVectorLength(VPlan &Plan) {
+ VPBasicBlock *Header = Plan.getVectorLoopRegion()->getEntryBasicBlock();
+ auto *CanonicalIVPHI = Plan.getCanonicalIV();
+ VPValue *StartV = CanonicalIVPHI->getStartValue();
+
+ // Walk users of WideCanonicalIV and replace all compares of the form
+ // (ICMP_ULE, WideCanonicalIV, backedge-taken-count) with an
+ // all-true-mask.
+ Value *TrueMask =
+ ConstantInt::getTrue(CanonicalIVPHI->getScalarType()->getContext());
+ VPValue *VPTrueMask = Plan.getVPValueOrAddLiveIn(TrueMask);
+ replaceHeaderPredicateWithIdiom(Plan, *VPTrueMask, [](VPUser &U, unsigned) {
+ return isa<VPWidenMemoryInstructionRecipe>(U);
+ });
+ // Now create the ExplicitVectorLengthPhi recipe in the main loop.
+ auto *EVLPhi = new VPEVLBasedIVPHIRecipe(StartV, DebugLoc());
+ EVLPhi->insertAfter(CanonicalIVPHI);
+ auto *VPEVL = new VPInstruction(VPInstruction::ExplicitVectorLength,
+ {EVLPhi, Plan.getTripCount()});
+ VPEVL->insertBefore(*Header, Header->getFirstNonPhi());
+
+ auto *CanonicalIVIncrement =
+ cast<VPInstruction>(CanonicalIVPHI->getBackedgeValue());
+ VPSingleDefRecipe *OpVPEVL = VPEVL;
+ if (CanonicalIVPHI->getScalarType() !=
+ IntegerType::get(CanonicalIVPHI->getScalarType()->getContext(),
+ /*NumBits=*/32)) {
+ OpVPEVL = new VPScalarCastRecipe(Instruction::ZExt, OpVPEVL,
+ CanonicalIVPHI->getScalarType());
+ OpVPEVL->insertBefore(CanonicalIVIncrement);
----------------
ayalz wrote:
Why depend on CanonicalIVIncrement, can simply place the cast and the bump after VPEVL in the header?
Canonical IV must have a type no smaller than i32?
Simply ask for the size of Canonical IV's scalar type, than compare if equal to i32?
https://github.com/llvm/llvm-project/pull/76172
More information about the llvm-commits
mailing list