[llvm] [VPlan] Add support for VPWidenIntOrFpInductionRecipe in predicated D… (PR #115274)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 6 23:21:50 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Shih-Po Hung (arcbbb)
<details>
<summary>Changes</summary>
…ataWithEVL vectorization mode.
As an alternative approach to #<!-- -->82021, this patch lowers VPWidenIntOrFpInductionRecipe into a widen phi recipe and step recipes, computed using EVL in the EVL transformation phase.
---
Patch is 75.85 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/115274.diff
12 Files Affected:
- (modified) llvm/lib/Analysis/VectorUtils.cpp (+8)
- (modified) llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp (+10)
- (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+1-2)
- (modified) llvm/lib/Transforms/Vectorize/VPlan.cpp (+5-5)
- (modified) llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (+5-6)
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (+131-2)
- (modified) llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp (+2-6)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll (+135-6)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/only-compute-cost-for-vplan-vfs.ll (+3-3)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll (+257-39)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-gather-scatter.ll (+50-7)
- (added) llvm/test/Transforms/LoopVectorize/RISCV/vplan-widen-iv-with-evl.ll (+147)
``````````diff
diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp
index cd5cf0443541fc..0c4df3a85c0947 100644
--- a/llvm/lib/Analysis/VectorUtils.cpp
+++ b/llvm/lib/Analysis/VectorUtils.cpp
@@ -115,6 +115,10 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
/// Identifies if the vector form of the intrinsic has a scalar operand.
bool llvm::isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID,
unsigned ScalarOpdIdx) {
+ if (VPIntrinsic::isVPIntrinsic(ID) &&
+ (ScalarOpdIdx == VPIntrinsic::getVectorLengthParamPos(ID)))
+ return true;
+
switch (ID) {
case Intrinsic::abs:
case Intrinsic::ctlz:
@@ -127,6 +131,8 @@ bool llvm::isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID,
case Intrinsic::umul_fix:
case Intrinsic::umul_fix_sat:
return (ScalarOpdIdx == 2);
+ case Intrinsic::experimental_vp_splat:
+ return (ScalarOpdIdx == 0);
default:
return false;
}
@@ -148,6 +154,8 @@ bool llvm::isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID,
return OpdIdx == 0;
case Intrinsic::powi:
return OpdIdx == -1 || OpdIdx == 1;
+ case Intrinsic::experimental_vp_splat:
+ return OpdIdx == -1;
default:
return OpdIdx == -1;
}
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 6344bc4664d3b6..3016622cddd226 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1191,6 +1191,16 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
return getCmpSelInstrCost(Instruction::Select, ICA.getReturnType(),
ICA.getArgTypes()[0], CmpInst::BAD_ICMP_PREDICATE,
CostKind);
+ case Intrinsic::experimental_vp_splat: {
+ auto LT = getTypeLegalizationCost(RetTy);
+ if (RetTy->getScalarSizeInBits() == 1) {
+ return LT.first *
+ (1 + getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI},
+ LT.second, CostKind));
+ }
+ return LT.first *
+ getRISCVInstructionCost(RISCV::VMV_V_X, LT.second, CostKind);
+ }
}
if (ST->hasVInstructions() && RetTy->isVectorTy()) {
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index c07af8519049c4..8442479229db3f 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2939,8 +2939,7 @@ LoopVectorizationCostModel::getVectorIntrinsicCost(CallInst *CI,
void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
// Fix widened non-induction PHIs by setting up the PHI operands.
- if (EnableVPlanNativePath)
- fixNonInductionPHIs(State);
+ fixNonInductionPHIs(State);
// Forget the original basic block.
PSE.getSE()->forgetLoop(OrigLoop);
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 00ba2f49017899..583925e8d9bbbc 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -285,15 +285,15 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
return Shuf;
};
- if (!hasScalarValue(Def, {0})) {
- assert(Def->isLiveIn() && "expected a live-in");
- Value *IRV = Def->getLiveInIRValue();
- Value *B = GetBroadcastInstrs(IRV);
+ Value *ScalarValue = hasScalarValue(Def, {0}) ? get(Def, VPLane(0)) : nullptr;
+ if (!ScalarValue || isa<Constant>(ScalarValue)) {
+ assert((ScalarValue || Def->isLiveIn()) && "expected a live-in");
+ Value *B = ScalarValue ? GetBroadcastInstrs(ScalarValue)
+ : GetBroadcastInstrs(Def->getLiveInIRValue());
set(Def, B);
return B;
}
- Value *ScalarValue = get(Def, VPLane(0));
// If we aren't vectorizing, we can just copy the scalar map values over
// to the vector map.
if (VF.isScalar()) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 6254ea15191819..0bfb29483282a1 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -648,7 +648,8 @@ bool VPInstruction::isVectorToScalar() const {
}
bool VPInstruction::isSingleScalar() const {
- return getOpcode() == VPInstruction::ResumePhi;
+ return getOpcode() == VPInstruction::ResumePhi ||
+ getOpcode() == VPInstruction::ExplicitVectorLength;
}
#if !defined(NDEBUG)
@@ -1022,6 +1023,8 @@ bool VPWidenIntrinsicRecipe::onlyFirstLaneUsed(const VPValue *Op) const {
assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
// Vector predication intrinsics only demand the the first lane the last
// operand (the EVL operand).
+ if (VectorIntrinsicID == Intrinsic::experimental_vp_splat)
+ return Op == getOperand(0);
return VPIntrinsic::isVPIntrinsic(VectorIntrinsicID) &&
Op == getOperand(getNumOperands() - 1);
}
@@ -2309,9 +2312,8 @@ void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,
#endif
Value *VPScalarCastRecipe ::generate(VPTransformState &State) {
- assert(vputils::onlyFirstLaneUsed(this) &&
- "Codegen only implemented for first lane.");
switch (Opcode) {
+ case Instruction::UIToFP:
case Instruction::SExt:
case Instruction::ZExt:
case Instruction::Trunc: {
@@ -3414,9 +3416,6 @@ void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent,
#endif
void VPWidenPHIRecipe::execute(VPTransformState &State) {
- assert(EnableVPlanNativePath &&
- "Non-native vplans are not expected to have VPWidenPHIRecipes.");
-
Value *Op0 = State.get(getOperand(0));
Type *VecTy = Op0->getType();
Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi");
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index ea8845eaa75d4d..ecd649b1048991 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1523,6 +1523,126 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
}
}
+/// This function adds (0 * Step, 1 * Step, 2 * Step, ...) to StartValue of
+/// an induction variable at the preheader.
+static VPSingleDefRecipe *createStepVector(VPValue *StartValue, VPValue *Step,
+ Type *InductionTy,
+ const InductionDescriptor &ID,
+ VPBasicBlock *VectorPHVPBB,
+ DebugLoc DL) {
+ Type *IntTy = InductionTy->isIntegerTy()
+ ? InductionTy
+ : IntegerType::get(InductionTy->getContext(),
+ InductionTy->getScalarSizeInBits());
+ // Create a vector of consecutive numbers from zero to VF.
+ VPSingleDefRecipe *InitVec =
+ new VPWidenIntrinsicRecipe(Intrinsic::stepvector, {}, IntTy, DL);
+ VectorPHVPBB->appendRecipe(InitVec);
+
+ if (InductionTy->isIntegerTy()) {
+ auto *Mul = new VPInstruction(Instruction::Mul, {InitVec, Step}, DL);
+ VectorPHVPBB->appendRecipe(Mul);
+ auto *SteppedStart =
+ new VPInstruction(Instruction::Add, {StartValue, Mul}, {}, "induction");
+ VectorPHVPBB->appendRecipe(SteppedStart);
+ return SteppedStart;
+ } else {
+ FastMathFlags FMF = ID.getInductionBinOp()->getFastMathFlags();
+ InitVec = new VPWidenCastRecipe(Instruction::UIToFP, InitVec, InductionTy);
+ VectorPHVPBB->appendRecipe(InitVec);
+ auto *Mul = new VPInstruction(Instruction::FMul, {InitVec, Step}, FMF, DL);
+ VectorPHVPBB->appendRecipe(Mul);
+ Instruction::BinaryOps BinOp = ID.getInductionOpcode();
+ auto *SteppedStart =
+ new VPInstruction(BinOp, {StartValue, Mul}, FMF, DL, "induction");
+ VectorPHVPBB->appendRecipe(SteppedStart);
+ return SteppedStart;
+ }
+}
+
+/// Lower widen iv recipes into recipes with EVL.
+static void
+transformWidenIVRecipestoEVLRecipes(VPWidenIntOrFpInductionRecipe *WidenIV,
+ VPlan &Plan, VPValue *EVL) {
+ DebugLoc DL = WidenIV->getDebugLoc();
+ const InductionDescriptor &ID = WidenIV->getInductionDescriptor();
+ auto *CanonicalIVIncrement =
+ cast<VPInstruction>(Plan.getCanonicalIV()->getBackedgeValue());
+ VPBasicBlock *VectorPHVPBB = Plan.getVectorLoopRegion()->getPreheaderVPBB();
+ VPBasicBlock *ExitingVPBB =
+ Plan.getVectorLoopRegion()->getExitingBasicBlock();
+ VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
+ VPValue *StartValue = WidenIV->getStartValue();
+ VPValue *Step = WidenIV->getStepValue();
+ if (TruncInst *I = WidenIV->getTruncInst()) {
+ Type *TruncTy = I->getType();
+ auto *R = new VPScalarCastRecipe(Instruction::Trunc, StartValue, TruncTy);
+ VectorPHVPBB->appendRecipe(R);
+ StartValue = R;
+ R = new VPScalarCastRecipe(Instruction::Trunc, Step, TruncTy);
+ VectorPHVPBB->appendRecipe(R);
+ Step = R;
+ }
+ Type *InductionTy = TypeInfo.inferScalarType(StartValue);
+ LLVMContext &Ctx = InductionTy->getContext();
+ VPValue *TrueMask = Plan.getOrAddLiveIn(ConstantInt::getTrue(Ctx));
+
+ // Construct the initial value of the vector IV in the vector loop preheader
+ VPSingleDefRecipe *SteppedStart =
+ createStepVector(StartValue, Step, InductionTy, ID, VectorPHVPBB, DL);
+
+ // Create the vector phi node for both int. and fp. induction variables
+ // and determine the kind of arithmetic we will perform
+ auto *VecInd = new VPWidenPHIRecipe(WidenIV->getPHINode());
+ VecInd->insertBefore(WidenIV);
+ WidenIV->replaceAllUsesWith(VecInd);
+ Intrinsic::ID VPArithOp;
+ Instruction::BinaryOps MulOp;
+ if (InductionTy->isIntegerTy()) {
+ VPArithOp = Intrinsic::vp_add;
+ MulOp = Instruction::Mul;
+ } else {
+ VPArithOp = ID.getInductionOpcode() == Instruction::FAdd
+ ? Intrinsic::vp_fadd
+ : Intrinsic::vp_fsub;
+ MulOp = Instruction::FMul;
+ }
+
+ // Multiply the runtime VF by the step
+ VPSingleDefRecipe *ScalarMul;
+ if (InductionTy->isFloatingPointTy()) {
+ FastMathFlags FMF = ID.getInductionBinOp()->getFastMathFlags();
+ auto *CastEVL =
+ new VPScalarCastRecipe(Instruction::UIToFP, EVL, InductionTy);
+ CastEVL->insertBefore(CanonicalIVIncrement);
+ ScalarMul = new VPInstruction(MulOp, {Step, CastEVL}, FMF, DL);
+ } else {
+ unsigned InductionSz = InductionTy->getScalarSizeInBits();
+ unsigned EVLSz = TypeInfo.inferScalarType(EVL)->getScalarSizeInBits();
+ VPValue *CastEVL = EVL;
+ if (InductionSz != EVLSz) {
+ auto *R = new VPScalarCastRecipe(EVLSz > InductionSz ? Instruction::Trunc
+ : Instruction::ZExt,
+ EVL, InductionTy);
+ R->insertBefore(CanonicalIVIncrement);
+ CastEVL = R;
+ }
+ ScalarMul = new VPInstruction(MulOp, {Step, CastEVL}, DL);
+ }
+ ScalarMul->insertBefore(CanonicalIVIncrement);
+ // Create a vector splat to use in the induction update.
+ auto *SplatVF =
+ new VPWidenIntrinsicRecipe(Intrinsic::experimental_vp_splat,
+ {ScalarMul, TrueMask, EVL}, InductionTy, DL);
+ SplatVF->insertBefore(CanonicalIVIncrement);
+ // TODO: We may need to add the step a number of times if UF > 1
+ auto *LastInduction = new VPWidenIntrinsicRecipe(
+ VPArithOp, {VecInd, SplatVF, TrueMask, EVL}, InductionTy, DL);
+ LastInduction->insertBefore(CanonicalIVIncrement);
+ VecInd->addIncoming(SteppedStart, VectorPHVPBB);
+ VecInd->addIncoming(LastInduction, ExitingVPBB);
+}
+
/// Add a VPEVLBasedIVPHIRecipe and related recipes to \p Plan and
/// replaces all uses except the canonical IV increment of
/// VPCanonicalIVPHIRecipe with a VPEVLBasedIVPHIRecipe. VPCanonicalIVPHIRecipe
@@ -1569,8 +1689,7 @@ bool VPlanTransforms::tryAddExplicitVectorLength(
// of the VF directly. At the moment, widened inductions cannot be updated, so
// bail out if the plan contains any.
bool ContainsWidenInductions = any_of(Header->phis(), [](VPRecipeBase &Phi) {
- return isa<VPWidenIntOrFpInductionRecipe, VPWidenPointerInductionRecipe>(
- &Phi);
+ return isa<VPWidenPointerInductionRecipe>(&Phi);
});
if (ContainsWidenInductions)
return false;
@@ -1615,6 +1734,16 @@ bool VPlanTransforms::tryAddExplicitVectorLength(
transformRecipestoEVLRecipes(Plan, *VPEVL);
+ VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
+ SmallVector<VPRecipeBase *> ToRemove;
+ for (VPRecipeBase &Phi : HeaderVPBB->phis())
+ if (auto *WidenIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi)) {
+ transformWidenIVRecipestoEVLRecipes(WidenIV, Plan, VPEVL);
+ ToRemove.push_back(WidenIV);
+ }
+ for (VPRecipeBase *R : ToRemove)
+ R->eraseFromParent();
+
// Replace all uses of VPCanonicalIVPHIRecipe by
// VPEVLBasedIVPHIRecipe except for the canonical IV increment.
CanonicalIVPHI->replaceAllUsesWith(EVLPhi);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
index 8bdb3133243582..9d64f5c03274e1 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
@@ -156,7 +156,8 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const {
.Case<VPScalarCastRecipe>(
[&](const VPScalarCastRecipe *S) { return true; })
.Case<VPInstruction>([&](const VPInstruction *I) {
- if (I->getOpcode() != Instruction::Add) {
+ if ((I->getOpcode() != Instruction::Add) &&
+ (I->getOpcode() != Instruction::Mul)) {
errs()
<< "EVL is used as an operand in non-VPInstruction::Add\n";
return false;
@@ -166,11 +167,6 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const {
"users\n";
return false;
}
- if (!isa<VPEVLBasedIVPHIRecipe>(*I->users().begin())) {
- errs() << "Result of VPInstruction::Add with EVL operand is "
- "not used by VPEVLBasedIVPHIRecipe\n";
- return false;
- }
return true;
})
.Default([&](const VPUser *U) {
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll b/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll
index e40f51fd7bd705..27e8bb618803e3 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll
@@ -8,14 +8,55 @@ define void @test_wide_integer_induction(ptr noalias %a, i64 %N) {
; CHECK-LABEL: define void @test_wide_integer_induction(
; CHECK-SAME: ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N]]
+; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 2
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
+; CHECK-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2
+; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1
+; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]]
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2
+; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
+; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 2 x i64> [[TMP9]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP10]]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDEX_EVL_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], [[ENTRY]] ], [ [[TMP17:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
+; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[EVL_BASED_IV]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP12]]
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i32 0
+; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VEC_PHI]], ptr align 8 [[TMP14]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 [[TMP11]])
+; CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[TMP11]] to i64
+; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]]
+; CHECK-NEXT: [[TMP20:%.*]] = zext i32 [[TMP11]] to i64
+; CHECK-NEXT: [[TMP16:%.*]] = mul i64 1, [[TMP20]]
+; CHECK-NEXT: [[TMP19:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vp.splat.nxv2i64(i64 [[TMP16]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 [[TMP11]])
+; CHECK-NEXT: [[TMP17]] = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> [[VEC_PHI]], <vscale x 2 x i64> [[TMP19]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 [[TMP11]])
+; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], [[TMP8]]
+; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ]
+; CHECK-NEXT: br label [[FOR_BODY1:%.*]]
; CHECK: for.body:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
-; CHECK-NEXT: store i64 [[IV]], ptr [[ARRAYIDX]], align 8
-; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
-; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
+; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT1:%.*]], [[FOR_BODY1]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV1]]
+; CHECK-NEXT: store i64 [[IV1]], ptr [[ARRAYIDX]], align 8
+; CHECK-NEXT: [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT1]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY1]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: for.cond.cleanup:
; CHECK-NEXT: ret void
;
@@ -34,6 +75,86 @@ for.cond.cleanup:
ret void
}
+define void @test_wide_fp_induction(ptr noalias %a, i64 %N) {
+; CHECK-LABEL: define void @test_wide_fp_induction(
+; CHECK-SAME: ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N]]
+; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/115274
More information about the llvm-commits
mailing list