[llvm-branch-commits] [llvm] [VPlan] Explicitly handle scalar pointer inductions. (PR #80273)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Feb 1 03:03:13 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Florian Hahn (fhahn)
<details>
<summary>Changes</summary>
Add a new PtrAdd opcode to VPInstruction that corresponds to
IRBuilder::CreatePtrAdd, which creates a GEP with source element type
i8.
This is then used to model scalarizing VPWidenPointerInductionRecipe by
introducing scalar-steps to model the index increment followed by a
PtrAdd.
Note that PtrAdd needs to be able to generate code for only the first
lane or for all lanes. This may warrant introducing a separate recipe
for scalarizing that can be created without relying on the underlying
IR.
---
Patch is 125.55 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/80273.diff
18 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+2-33)
- (modified) llvm/lib/Transforms/Vectorize/VPlan.cpp (+1-1)
- (modified) llvm/lib/Transforms/Vectorize/VPlan.h (+11-1)
- (modified) llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp (+2)
- (modified) llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (+25-2)
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (+38-8)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll (+49-51)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll (+73-75)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll (+37-37)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll (+24-25)
- (modified) llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll (+75-79)
- (modified) llvm/test/Transforms/LoopVectorize/X86/interleave-opaque-pointers.ll (+9-10)
- (modified) llvm/test/Transforms/LoopVectorize/X86/small-size.ll (+83-95)
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll (+49-60)
- (modified) llvm/test/Transforms/LoopVectorize/pointer-induction-unroll.ll (+31-30)
- (modified) llvm/test/Transforms/LoopVectorize/pointer-induction.ll (+34-37)
- (modified) llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll (+32-35)
- (modified) llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll (+5-3)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 17a0d01f18072..4ee878358f9bc 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9233,42 +9233,11 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) {
"Not a pointer induction according to InductionDescriptor!");
assert(cast<PHINode>(getUnderlyingInstr())->getType()->isPointerTy() &&
"Unexpected type.");
+ assert(!onlyScalarsGenerated(State.VF.isScalable()) &&
+ "Recipe should have been replaced");
auto *IVR = getParent()->getPlan()->getCanonicalIV();
PHINode *CanonicalIV = cast<PHINode>(State.get(IVR, 0));
-
- if (onlyScalarsGenerated(State.VF)) {
- // This is the normalized GEP that starts counting at zero.
- Value *PtrInd = State.Builder.CreateSExtOrTrunc(
- CanonicalIV, IndDesc.getStep()->getType());
- // Determine the number of scalars we need to generate for each unroll
- // iteration. If the instruction is uniform, we only need to generate the
- // first lane. Otherwise, we generate all VF values.
- bool IsUniform = vputils::onlyFirstLaneUsed(this);
- assert((IsUniform || !State.VF.isScalable()) &&
- "Cannot scalarize a scalable VF");
- unsigned Lanes = IsUniform ? 1 : State.VF.getFixedValue();
-
- for (unsigned Part = 0; Part < State.UF; ++Part) {
- Value *PartStart =
- createStepForVF(State.Builder, PtrInd->getType(), State.VF, Part);
-
- for (unsigned Lane = 0; Lane < Lanes; ++Lane) {
- Value *Idx = State.Builder.CreateAdd(
- PartStart, ConstantInt::get(PtrInd->getType(), Lane));
- Value *GlobalIdx = State.Builder.CreateAdd(PtrInd, Idx);
-
- Value *Step = State.get(getOperand(1), VPIteration(Part, Lane));
- Value *SclrGep = emitTransformedIndex(
- State.Builder, GlobalIdx, IndDesc.getStartValue(), Step,
- IndDesc.getKind(), IndDesc.getInductionBinOp());
- SclrGep->setName("next.gep");
- State.set(this, SclrGep, VPIteration(Part, Lane));
- }
- }
- return;
- }
-
Type *PhiType = IndDesc.getStep()->getType();
// Build a pointer phi
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 7ed07fe5f413a..51576d9f5364e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -859,7 +859,7 @@ void VPlan::execute(VPTransformState *State) {
auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
// TODO: Split off the case that all users of a pointer phi are scalar
// from the VPWidenPointerInductionRecipe.
- if (WidenPhi->onlyScalarsGenerated(State->VF))
+ if (WidenPhi->onlyScalarsGenerated(State->VF.isScalable()))
continue;
auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi, 0));
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 97035146a2f4d..4904287412f8a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1156,6 +1156,7 @@ class VPInstruction : public VPRecipeWithIRFlags {
BranchOnCount,
BranchOnCond,
ComputeReductionResult,
+ PtrAdd,
};
private:
@@ -1736,7 +1737,7 @@ class VPWidenPointerInductionRecipe : public VPHeaderPHIRecipe {
void execute(VPTransformState &State) override;
/// Returns true if only scalar values will be generated.
- bool onlyScalarsGenerated(ElementCount VF);
+ bool onlyScalarsGenerated(bool Scalable);
/// Returns the induction descriptor for the recipe.
const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
@@ -2502,6 +2503,12 @@ class VPDerivedIVRecipe : public VPSingleDefRecipe {
dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()),
Start, CanonicalIV, Step) {}
+ VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind, VPValue *Start,
+ VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step,
+ FPMathOperator *FPBinOp)
+ : VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, CanonicalIV, Step}),
+ Kind(Kind), FPBinOp(FPBinOp) {}
+
~VPDerivedIVRecipe() override = default;
VPRecipeBase *clone() override {
@@ -2957,6 +2964,9 @@ class VPlan {
}
bool hasVF(ElementCount VF) { return VFs.count(VF); }
+ bool hasScalableVF() {
+ return any_of(VFs, [](ElementCount VF) { return VF.isScalable(); });
+ }
bool hasScalarVFOnly() const { return VFs.size() == 1 && VFs[0].isScalar(); }
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index 515dc41a55ea1..bd2f65935e479 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -43,6 +43,8 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
CachedTypes[OtherV] = ResTy;
return ResTy;
}
+ case VPInstruction::PtrAdd:
+ return inferScalarType(R->getOperand(0));
default:
break;
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 77f2cf899b085..d6550821fb57a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -127,6 +127,7 @@ bool VPRecipeBase::mayHaveSideEffects() const {
case VPInstruction::Not:
case VPInstruction::CalculateTripCountMinusVF:
case VPInstruction::CanonicalIVIncrementForPart:
+ case VPInstruction::PtrAdd:
return false;
default:
return true;
@@ -489,6 +490,23 @@ Value *VPInstruction::generateInstruction(VPTransformState &State,
return ReducedPartRdx;
}
+ case VPInstruction::PtrAdd: {
+ if (vputils::onlyFirstLaneUsed(this)) {
+ auto *P =
+ Builder.CreatePtrAdd(State.get(getOperand(0), VPIteration(Part, 0)),
+ State.get(getOperand(1), VPIteration(Part, 0)));
+ State.set(this, P, VPIteration(Part, 0));
+ } else {
+ for (unsigned Lane = 0; Lane != State.VF.getKnownMinValue(); ++Lane) {
+ Value *P = Builder.CreatePtrAdd(
+ State.get(getOperand(0), VPIteration(Part, Lane)),
+ State.get(getOperand(1), VPIteration(Part, Lane)));
+
+ State.set(this, P, VPIteration(Part, Lane));
+ }
+ }
+ return nullptr;
+ }
default:
llvm_unreachable("Unsupported opcode for instruction");
}
@@ -515,6 +533,8 @@ void VPInstruction::execute(VPTransformState &State) {
State.Builder.setFastMathFlags(getFastMathFlags());
for (unsigned Part = 0; Part < State.UF; ++Part) {
Value *GeneratedValue = generateInstruction(State, Part);
+ if (!GeneratedValue)
+ continue;
if (!hasResult())
continue;
assert(GeneratedValue && "generateInstruction must produce a value");
@@ -598,6 +618,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
case VPInstruction::ComputeReductionResult:
O << "compute-reduction-result";
break;
+ case VPInstruction::PtrAdd:
+ O << "ptradd";
+ break;
default:
O << Instruction::getOpcodeName(getOpcode());
}
@@ -1686,9 +1709,9 @@ bool VPCanonicalIVPHIRecipe::isCanonical(
return StepC && StepC->isOne();
}
-bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(ElementCount VF) {
+bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(bool Scalable) {
return IsScalarAfterVectorization &&
- (!VF.isScalable() || vputils::onlyFirstLaneUsed(this));
+ (!Scalable || vputils::onlyFirstLaneUsed(this));
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 71f5285f90236..6964f1bec2675 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -489,15 +489,18 @@ void VPlanTransforms::removeDeadRecipes(VPlan &Plan) {
}
}
-static VPValue *createScalarIVSteps(VPlan &Plan, const InductionDescriptor &ID,
+static VPValue *createScalarIVSteps(VPlan &Plan,
+ InductionDescriptor::InductionKind Kind,
ScalarEvolution &SE, Instruction *TruncI,
VPValue *StartV, VPValue *Step,
- VPBasicBlock::iterator IP) {
+ Instruction::BinaryOps InductionOpcode,
+ VPBasicBlock::iterator IP,
+ FPMathOperator *FPBinOp = nullptr) {
VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV();
VPSingleDefRecipe *BaseIV = CanonicalIV;
- if (!CanonicalIV->isCanonical(ID.getKind(), StartV, Step)) {
- BaseIV = new VPDerivedIVRecipe(ID, StartV, CanonicalIV, Step);
+ if (!CanonicalIV->isCanonical(Kind, StartV, Step)) {
+ BaseIV = new VPDerivedIVRecipe(Kind, StartV, CanonicalIV, Step, FPBinOp);
HeaderVPBB->insert(BaseIV, IP);
}
@@ -526,7 +529,9 @@ static VPValue *createScalarIVSteps(VPlan &Plan, const InductionDescriptor &ID,
VecPreheader->appendRecipe(Step->getDefiningRecipe());
}
- VPScalarIVStepsRecipe *Steps = new VPScalarIVStepsRecipe(ID, BaseIV, Step);
+ VPScalarIVStepsRecipe *Steps = new VPScalarIVStepsRecipe(
+ BaseIV, Step, InductionOpcode,
+ FPBinOp ? FPBinOp->getFastMathFlags() : FastMathFlags());
HeaderVPBB->insert(Steps, IP);
return Steps;
}
@@ -537,6 +542,30 @@ void VPlanTransforms::optimizeInductions(VPlan &Plan, ScalarEvolution &SE) {
bool HasOnlyVectorVFs = !Plan.hasVF(ElementCount::getFixed(1));
VPBasicBlock::iterator InsertPt = HeaderVPBB->getFirstNonPhi();
for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
+ if (auto *PtrIV = dyn_cast<VPWidenPointerInductionRecipe>(&Phi)) {
+ if (!PtrIV->onlyScalarsGenerated(Plan.hasScalableVF()))
+ continue;
+
+ const InductionDescriptor &ID = PtrIV->getInductionDescriptor();
+ VPValue *StartV = Plan.getVPValueOrAddLiveIn(
+ ConstantInt::get(ID.getStep()->getType(), 0));
+ VPValue *StepV = PtrIV->getOperand(1);
+ VPRecipeBase *Steps =
+ createScalarIVSteps(Plan, InductionDescriptor::IK_IntInduction, SE,
+ nullptr, StartV, StepV, Instruction::Add,
+ InsertPt)
+ ->getDefiningRecipe();
+
+ auto *Recipe =
+ new VPInstruction(VPInstruction::PtrAdd,
+ {PtrIV->getStartValue(), Steps->getVPSingleValue()},
+ PtrIV->getDebugLoc());
+
+ Recipe->insertAfter(Steps);
+ PtrIV->replaceAllUsesWith(Recipe);
+ continue;
+ }
+
auto *WideIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
if (!WideIV)
continue;
@@ -546,9 +575,10 @@ void VPlanTransforms::optimizeInductions(VPlan &Plan, ScalarEvolution &SE) {
continue;
const InductionDescriptor &ID = WideIV->getInductionDescriptor();
- VPValue *Steps = createScalarIVSteps(Plan, ID, SE, WideIV->getTruncInst(),
- WideIV->getStartValue(),
- WideIV->getStepValue(), InsertPt);
+ VPValue *Steps = createScalarIVSteps(
+ Plan, ID.getKind(), SE, WideIV->getTruncInst(), WideIV->getStartValue(),
+ WideIV->getStepValue(), ID.getInductionOpcode(), InsertPt,
+ dyn_cast_or_null<FPMathOperator>(ID.getInductionBinOp()));
// Update scalar users of IV to use Step instead.
if (!HasOnlyVectorVFs)
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll
index 24c59fdb47b61..00ec396107dcb 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll
@@ -11,76 +11,74 @@ define void @test_widen_ptr_induction(ptr %ptr.start.1) {
; CHECK: vector.main.loop.iter.check:
; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
-; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[PTR_START_1:%.*]], i64 10000
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP1]]
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP]], i32 0
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x ptr> [[TMP2]], ptr [[NEXT_GEP1]], i32 1
-; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 2
-; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP4]]
-; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 3
-; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP5]]
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP2]], i32 0
-; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x ptr> [[TMP6]], ptr [[NEXT_GEP3]], i32 1
-; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <2 x ptr> [[TMP3]], zeroinitializer
-; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <2 x ptr> [[TMP7]], zeroinitializer
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0
-; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP10]])
-; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1
-; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP11]])
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP9]], i32 0
-; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP12]])
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP9]], i32 1
-; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP13]])
-; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0
-; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 2
-; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP14]], align 1
-; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP15]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
+; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[PTR_START_1:%.*]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP1]]
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP4]], i32 0
+; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x ptr> [[TMP6]], ptr [[TMP5]], i32 1
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP2]]
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP3]]
+; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP8]], i32 0
+; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x ptr> [[TMP10]], ptr [[TMP9]], i32 1
+; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <2 x ptr> [[TMP7]], zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = icmp ne <2 x ptr> [[TMP11]], zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP12]], i32 0
+; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP14]])
+; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[TMP12]], i32 1
+; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP15]])
+; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i1> [[TMP13]], i32 0
+; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP16]])
+; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x i1> [[TMP13]], i32 1
+; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP17]])
+; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[TMP4]], i32 0
+; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[TMP4]], i32 2
+; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP18]], align 1
+; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP19]], align 1
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
-; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
+; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
+; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
; CHECK: middle.block:
; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
; CHECK: vec.epilog.iter.check:
-; CHECK-NEXT: [[IND_END6:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 10000
+; CHECK-NEXT: [[IND_END1:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 10000
; CHECK-NEXT: br i1 true, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
; CHECK: vec.epilog.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PTR_START_1]], [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
-; CHECK-NEXT: [[IND_END5:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 10000
+; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 10000
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
; CHECK: vec.epilog.vector.body:
-; CHECK-NEXT: [[INDEX8:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT11:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX8]], 0
-; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP17]]
-; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[INDEX8]], 1
-; CHECK-NEXT: [[NEXT_GEP10:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP18]]
-; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP9]], i32 0
-; CHECK-NEXT: [[TMP20:%.*]] = insertelement <2 x ptr> [[TMP19]], ptr [[NEXT_GEP10]], i32 1
-; CHECK-NEXT: [[TMP21:%.*]] = icmp ne <2 x ptr> [[TMP20]], zeroinitializer
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i1> [[TMP21]], i32 0
-; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP22]])
-; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i1> [[TMP21]], i32 1
-; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP23]])
-; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[NEXT_GEP9]], i32 0
-; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP24]], align 1
-; CHECK-NEXT: [[INDEX_NEXT11]] = add nuw i64 [[INDEX8]], 2
-; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT11]], 10000
-; CHECK-NEXT: br i1 [[TMP25]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
+; CHECK-NEXT: [[INDEX3:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT4:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX3]], 0
+; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[INDEX3]], 1
+; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP21]]
+; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP22]]
+; CHECK-NEXT: [[TMP25:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP23]], i32 0
+; CHECK-NEXT: [[TMP26:%.*]] = insertelement <2 x ptr> [[TMP25]], ptr [[TMP24]], i32 1
+; CHECK-NEXT: [[TMP27:%.*]] = icmp ne <2 x ptr> [[TMP26]], zeroinitializer
+; CHECK-NEXT: [[TMP28:%.*]] = extractelement <2 x i1> [[TMP27]], i32 0
+; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP28]])
+; CHECK-NEXT: [[TMP29:%.*]] = extractelement <2 x i1> [[TMP27]], i32 1
+; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP29]])
+; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[TMP23]], i32 0
+; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP30]], align 1
+; CHECK-NEXT: [[INDEX_NEXT4]] = add nuw i64 [[INDEX3]], 2
+; CHECK-...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/80273
More information about the llvm-branch-commits
mailing list