[llvm-branch-commits] [llvm] [VPlan] Model address separately. (PR #72164)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Nov 13 14:06:54 PST 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
@llvm/pr-subscribers-llvm-transforms
Author: Florian Hahn (fhahn)
<details>
<summary>Changes</summary>
Move vector pointer generation to a separate VPInstruction opcode.
This untangles address computation from the memory recipes future
and is also needed to enable explicit unrolling in VPlan.
---
Patch is 333.44 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/72164.diff
57 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+17-46)
- (modified) llvm/lib/Transforms/Vectorize/VPlan.h (+3-1)
- (modified) llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (+47)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll (+9-9)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll (+37-37)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/fixed-order-recurrence.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/interleaving-load-store.ll (+8-8)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll (+45-45)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll (+24-24)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-multiexit.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll (+2-1)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll (+9-9)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll (+6-3)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll (+24-12)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll (+3-3)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll (+4-2)
- (modified) llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll (+108-108)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll (+9-9)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll (+8-4)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll (+8-8)
- (modified) llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll (+3-3)
- (modified) llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll (+3-3)
- (modified) llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll (+9-9)
- (modified) llvm/test/Transforms/LoopVectorize/X86/interleaving.ll (+30-30)
- (modified) llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll (+12-12)
- (modified) llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll (+3-3)
- (modified) llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll (+33-33)
- (modified) llvm/test/Transforms/LoopVectorize/X86/pr23997.ll (+6-6)
- (modified) llvm/test/Transforms/LoopVectorize/X86/pr35432.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/X86/pr47437.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll (+6-6)
- (modified) llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll (+3-3)
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll (+8-4)
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll (+13-13)
- (modified) llvm/test/Transforms/LoopVectorize/float-induction.ll (+8-8)
- (modified) llvm/test/Transforms/LoopVectorize/induction.ll (+39-39)
- (modified) llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll (+32-32)
- (modified) llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll (+3-3)
- (modified) llvm/test/Transforms/LoopVectorize/reduction-odd-interleave-counts.ll (+6-6)
- (modified) llvm/test/Transforms/LoopVectorize/scalable-inductions.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/scalar_after_vectorization.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll (+4-2)
- (modified) llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll (+2-1)
- (modified) llvm/test/Transforms/LoopVectorize/vplan-printing.ll (+36-18)
- (modified) llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll (+4-2)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index ae8d306c44dd885..e3374724b04a144 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8231,13 +8231,24 @@ VPRecipeBase *VPRecipeBuilder::tryToWidenMemory(Instruction *I,
bool Consecutive =
Reverse || Decision == LoopVectorizationCostModel::CM_Widen;
+ VPValue *Ptr = isa<LoadInst>(I) ? Operands[0] : Operands[1];
+ if (Decision != LoopVectorizationCostModel::CM_GatherScatter &&
+ Decision != LoopVectorizationCostModel::CM_Interleave) {
+ auto *VectorPtr = Reverse
+ ? new VPInstruction(VPInstruction::CreateVectorPtr,
+ {Ptr, Ptr}, I->getDebugLoc())
+ : new VPInstruction(VPInstruction::CreateVectorPtr,
+ {Ptr}, I->getDebugLoc());
+ Builder.getInsertBlock()->appendRecipe(VectorPtr);
+ Ptr = VectorPtr;
+ }
if (LoadInst *Load = dyn_cast<LoadInst>(I))
- return new VPWidenMemoryInstructionRecipe(*Load, Operands[0], Mask,
- Consecutive, Reverse);
+ return new VPWidenMemoryInstructionRecipe(*Load, Ptr, Mask, Consecutive,
+ Reverse);
StoreInst *Store = cast<StoreInst>(I);
- return new VPWidenMemoryInstructionRecipe(*Store, Operands[1], Operands[0],
- Mask, Consecutive, Reverse);
+ return new VPWidenMemoryInstructionRecipe(*Store, Ptr, Operands[0], Mask,
+ Consecutive, Reverse);
}
/// Creates a VPWidenIntOrFpInductionRecpipe for \p Phi. If needed, it will also
@@ -9532,44 +9543,6 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
BlockInMaskParts[Part] = Mask;
}
- const auto CreateVecPtr = [&](unsigned Part, Value *Ptr) -> Value * {
- // Calculate the pointer for the specific unroll-part.
- Value *PartPtr = nullptr;
-
- // Use i32 for the gep index type when the value is constant,
- // or query DataLayout for a more suitable index type otherwise.
- const DataLayout &DL =
- Builder.GetInsertBlock()->getModule()->getDataLayout();
- Type *IndexTy = State.VF.isScalable() && (isReverse() || Part > 0)
- ? DL.getIndexType(PointerType::getUnqual(
- ScalarDataTy->getContext()))
- : Builder.getInt32Ty();
- bool InBounds = false;
- if (auto *gep = dyn_cast<GetElementPtrInst>(Ptr->stripPointerCasts()))
- InBounds = gep->isInBounds();
- if (isReverse()) {
- // If the address is consecutive but reversed, then the
- // wide store needs to start at the last vector element.
- // RunTimeVF = VScale * VF.getKnownMinValue()
- // For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue()
- Value *RunTimeVF = getRuntimeVF(Builder, IndexTy, State.VF);
- // NumElt = -Part * RunTimeVF
- Value *NumElt =
- Builder.CreateMul(ConstantInt::get(IndexTy, -(int64_t)Part), RunTimeVF);
- // LastLane = 1 - RunTimeVF
- Value *LastLane =
- Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
- PartPtr = Builder.CreateGEP(ScalarDataTy, Ptr, NumElt, "", InBounds);
- PartPtr =
- Builder.CreateGEP(ScalarDataTy, PartPtr, LastLane, "", InBounds);
- } else {
- Value *Increment = createStepForVF(Builder, IndexTy, State.VF, Part);
- PartPtr = Builder.CreateGEP(ScalarDataTy, Ptr, Increment, "", InBounds);
- }
-
- return PartPtr;
- };
-
// Handle Stores:
if (SI) {
State.setDebugLocFrom(SI->getDebugLoc());
@@ -9590,8 +9563,7 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
// We don't want to update the value in the map as it might be used in
// another expression. So don't call resetVectorValue(StoredVal).
}
- auto *VecPtr =
- CreateVecPtr(Part, State.get(getAddr(), VPIteration(0, 0)));
+ auto *VecPtr = State.get(getAddr(), Part);
if (isMaskRequired)
NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr, Alignment,
BlockInMaskParts[Part]);
@@ -9615,8 +9587,7 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
nullptr, "wide.masked.gather");
State.addMetadata(NewLI, LI);
} else {
- auto *VecPtr =
- CreateVecPtr(Part, State.get(getAddr(), VPIteration(0, 0)));
+ auto *VecPtr = State.get(getAddr(), Part);
if (isMaskRequired)
NewLI = Builder.CreateMaskedLoad(
DataTy, VecPtr, Alignment, BlockInMaskParts[Part],
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index a26308a212bbd3c..be770e33e92a32b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1038,7 +1038,8 @@ class VPInstruction : public VPRecipeWithIRFlags, public VPValue {
// canonical IV separately for each unrolled part.
CanonicalIVIncrementForPart,
BranchOnCount,
- BranchOnCond
+ BranchOnCond,
+ CreateVectorPtr
};
private:
@@ -1146,6 +1147,7 @@ class VPInstruction : public VPRecipeWithIRFlags, public VPValue {
case VPInstruction::CanonicalIVIncrement:
case VPInstruction::CanonicalIVIncrementForPart:
case VPInstruction::BranchOnCount:
+ case VPInstruction::CreateVectorPtr:
return true;
};
llvm_unreachable("switch should return");
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 6b3218dca1b18b0..1dac8a806d657cb 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -122,6 +122,7 @@ bool VPRecipeBase::mayHaveSideEffects() const {
case VPInstruction::CalculateTripCountMinusVF:
case VPInstruction::CanonicalIVIncrement:
case VPInstruction::CanonicalIVIncrementForPart:
+ case VPInstruction::CreateVectorPtr:
return false;
default:
return true;
@@ -404,6 +405,49 @@ Value *VPInstruction::generateInstruction(VPTransformState &State,
Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
return CondBr;
}
+ case VPInstruction::CreateVectorPtr: {
+ // Calculate the pointer for the specific unroll-part.
+ Value *PartPtr = nullptr;
+ bool IsReverse = getNumOperands() > 1;
+ auto *MemR = cast<VPWidenMemoryInstructionRecipe>(*user_begin());
+ Type *ScalarDataTy =
+ MemR->isStore() ? cast<StoreInst>(&MemR->getIngredient())
+ ->getValueOperand()
+ ->getType()
+ : cast<LoadInst>(&MemR->getIngredient())->getType();
+ // Use i32 for the gep index type when the value is constant,
+ // or query DataLayout for a more suitable index type otherwise.
+ const DataLayout &DL =
+ Builder.GetInsertBlock()->getModule()->getDataLayout();
+ Type *IndexTy = State.VF.isScalable() && (IsReverse || Part > 0)
+ ? DL.getIndexType(ScalarDataTy->getPointerTo())
+ : Builder.getInt32Ty();
+ Value *Ptr = State.get(getOperand(0), VPIteration(0, 0));
+ bool InBounds = false;
+ if (auto *gep = dyn_cast<GetElementPtrInst>(Ptr->stripPointerCasts()))
+ InBounds = gep->isInBounds();
+ if (IsReverse) {
+ // If the address is consecutive but reversed, then the
+ // wide store needs to start at the last vector element.
+ // RunTimeVF = VScale * VF.getKnownMinValue()
+ // For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue()
+ Value *RunTimeVF = getRuntimeVF(Builder, IndexTy, State.VF);
+ // NumElt = -Part * RunTimeVF
+ Value *NumElt =
+ Builder.CreateMul(ConstantInt::get(IndexTy, -(int64_t)Part), RunTimeVF);
+ // LastLane = 1 - RunTimeVF
+ Value *LastLane =
+ Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
+ PartPtr = Builder.CreateGEP(ScalarDataTy, Ptr, NumElt, "", InBounds);
+ PartPtr =
+ Builder.CreateGEP(ScalarDataTy, PartPtr, LastLane, "", InBounds);
+ } else {
+ Value *Increment = createStepForVF(Builder, IndexTy, State.VF, Part);
+ PartPtr = Builder.CreateGEP(ScalarDataTy, Ptr, Increment, "", InBounds);
+ }
+
+ return PartPtr;
+ }
default:
llvm_unreachable("Unsupported opcode for instruction");
}
@@ -483,6 +527,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
case VPInstruction::BranchOnCount:
O << "branch-on-count";
break;
+ case VPInstruction::CreateVectorPtr:
+ O << "create-vector-pointer";
+ break;
default:
O << Instruction::getOpcodeName(getOpcode());
}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll b/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll
index 4a8e07eaaf757fa..cbc4733cf5cf5fa 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll
@@ -179,8 +179,8 @@ define void @test_shrink_zext_in_preheader(ptr noalias %src, ptr noalias %dst, i
; CHECK-NEXT: [[TMP8:%.*]] = trunc <16 x i16> [[TMP6]] to <16 x i8>
; CHECK-NEXT: [[TMP9:%.*]] = sext i32 [[INDEX]] to i64
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP9]]
-; CHECK-NEXT: store <16 x i8> [[TMP7]], ptr [[TMP10]], align 1
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i64 16
+; CHECK-NEXT: store <16 x i8> [[TMP7]], ptr [[TMP10]], align 1
; CHECK-NEXT: store <16 x i8> [[TMP8]], ptr [[TMP11]], align 1
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 32
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992
@@ -193,18 +193,18 @@ define void @test_shrink_zext_in_preheader(ptr noalias %src, ptr noalias %dst, i
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <8 x i16> undef, i16 [[B]], i64 0
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
; CHECK: vec.epilog.vector.body:
-; CHECK-NEXT: [[INDEX4:%.*]] = phi i32 [ 992, [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT: [[INDEX3:%.*]] = phi i32 [ 992, [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT8:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP14:%.*]] = trunc i32 [[A]] to i16
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x i16> undef, i16 [[TMP14]], i64 0
; CHECK-NEXT: [[TMP16:%.*]] = mul <8 x i16> [[TMP15]], [[TMP13]]
; CHECK-NEXT: [[TMP17:%.*]] = lshr <8 x i16> [[TMP16]], <i16 8, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
; CHECK-NEXT: [[TMP18:%.*]] = trunc <8 x i16> [[TMP17]] to <8 x i8>
; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <8 x i8> [[TMP18]], <8 x i8> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP20:%.*]] = sext i32 [[INDEX4]] to i64
+; CHECK-NEXT: [[TMP20:%.*]] = sext i32 [[INDEX3]] to i64
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP20]]
; CHECK-NEXT: store <8 x i8> [[TMP19]], ptr [[TMP21]], align 1
-; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i32 [[INDEX4]], 8
-; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i32 [[INDEX_NEXT9]], 1000
+; CHECK-NEXT: [[INDEX_NEXT8]] = add nuw i32 [[INDEX3]], 8
+; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i32 [[INDEX_NEXT8]], 1000
; CHECK-NEXT: br i1 [[TMP22]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: vec.epilog.middle.block:
; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
@@ -268,7 +268,7 @@ define void @test_shrink_select(ptr noalias %src, ptr noalias %dst, i32 %A, i1 %
; CHECK: vec.epilog.ph:
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
; CHECK: vec.epilog.vector.body:
-; CHECK-NEXT: [[INDEX2:%.*]] = phi i32 [ 992, [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT5:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT: [[INDEX1:%.*]] = phi i32 [ 992, [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT4:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP10:%.*]] = trunc i32 [[A]] to i16
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <8 x i16> undef, i16 [[TMP10]], i64 0
; CHECK-NEXT: [[TMP12:%.*]] = mul <8 x i16> [[TMP11]], <i16 99, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison>
@@ -276,11 +276,11 @@ define void @test_shrink_select(ptr noalias %src, ptr noalias %dst, i32 %A, i1 %
; CHECK-NEXT: [[TMP14:%.*]] = lshr <8 x i16> [[TMP13]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
; CHECK-NEXT: [[TMP15:%.*]] = select i1 [[C]], <8 x i16> [[TMP14]], <8 x i16> [[TMP13]]
; CHECK-NEXT: [[TMP16:%.*]] = trunc <8 x i16> [[TMP15]] to <8 x i8>
-; CHECK-NEXT: [[TMP17:%.*]] = sext i32 [[INDEX2]] to i64
+; CHECK-NEXT: [[TMP17:%.*]] = sext i32 [[INDEX1]] to i64
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP17]]
; CHECK-NEXT: store <8 x i8> [[TMP16]], ptr [[TMP18]], align 1
-; CHECK-NEXT: [[INDEX_NEXT5]] = add nuw i32 [[INDEX2]], 8
-; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i32 [[INDEX_NEXT5]], 1000
+; CHECK-NEXT: [[INDEX_NEXT4]] = add nuw i32 [[INDEX1]], 8
+; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i32 [[INDEX_NEXT4]], 1000
; CHECK-NEXT: br i1 [[TMP19]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK: vec.epilog.middle.block:
; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll
index 24d6d2d532aa0c2..24c59fdb47b6133 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll
@@ -38,8 +38,8 @@ define void @test_widen_ptr_induction(ptr %ptr.start.1) {
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP9]], i32 1
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP13]])
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0
-; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP14]], align 1
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 2
+; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP14]], align 1
; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP15]], align 1
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
@@ -55,22 +55,22 @@ define void @test_widen_ptr_induction(ptr %ptr.start.1) {
; CHECK-NEXT: [[IND_END5:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 10000
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
; CHECK: vec.epilog.vector.body:
-; CHECK-NEXT: [[INDEX9:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT12:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX9]], 0
-; CHECK-NEXT: [[NEXT_GEP10:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP17]]
-; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[INDEX9]], 1
-; CHECK-NEXT: [[NEXT_GEP11:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP18]]
-; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP10]], i32 0
-; CHECK-NEXT: [[TMP20:%.*]] = insertelement <2 x ptr> [[TMP19]], ptr [[NEXT_GEP11]], i32 1
+; CHECK-NEXT: [[INDEX8:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT11:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX8]], 0
+; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP17]]
+; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[INDEX8]], 1
+; CHECK-NEXT: [[NEXT_GEP10:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP18]]
+; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP9]], i32 0
+; CHECK-NEXT: [[TMP20:%.*]] = insertelement <2 x ptr> [[TMP19]], ptr [[NEXT_GEP10]], i32 1
; CHECK-NEXT: [[TMP21:%.*]] = icmp ne <2 x ptr> [[TMP20]], zeroinitializer
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i1> [[TMP21]], i32 0
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP22]])
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i1> [[TMP21]], i32 1
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP23]])
-; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[NEXT_GEP10]], i32 0
+; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[NEXT_GEP9]], i32 0
; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP24]], align 1
-; CHECK-NEXT: [[INDEX_NEXT12]] = add nuw i64 [[INDEX9]], 2
-; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT12]], 10000
+; CHECK-NEXT: [[INDEX_NEXT11]] = add nuw i64 [[INDEX8]], 2
+; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT11]], 10000
; CHECK-NEXT: br i1 [[TMP25]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
; CHECK: vec.epilog.middle.block:
; CHECK-NEXT: br i1 false, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
@@ -132,8 +132,8 @@ define void @test_widen_induction(ptr %A, i64 %N) {
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0
-; CHECK-NEXT: store <2 x i64> [[VEC_IND]], ptr [[TMP4]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2
+; CHECK-NEXT: store <2 x i64> [[VEC_IND]], ptr [[TMP4]], align 4
; CHECK-NEXT: store <2 x i64> [[STEP_ADD]], ptr [[TMP5]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], <i64 2, i64 2>
@@ -156,13 +156,13 @@ define void @test_widen_induction(ptr %A, i64 %N) {
; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i64> [[DOTSPLAT]], <i64 0, i64 1>
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
; CHECK: vec.epilog.vector.body:
-; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT11:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT: [[INDEX7:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT11:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND8:%.*]] = phi <2 x i64> [ [[INDUCTION]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT10:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 0
+; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX7]], 0
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0
; CHECK-NEXT: store <2 x i64> [[VEC_IND8]], ptr [[TMP9]], align 4
-; CHECK-NEXT: [[INDEX_NEXT11]] = add nuw i64 [[OFFSET_IDX]], 2
+; CHECK-NEXT: [[INDEX_NEXT11]] = add nuw i64 [[INDEX7]], 2
; CHECK-NEXT: [[VEC_IND_NEXT10]] = add <2 x i64> [[VEC_IND8]], <i64 2, i64 2>
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT11]], [[N_VEC4]]
; CHECK-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
@@ -224,8 +224,8 @@ define void @test_widen_induction_variable_start(ptr %A, i64 %N, i64 %start) {
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0
-; CHECK-NEXT: store <2 x i64> [[VEC_IND]], ptr [[TMP5]], align 4
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 2
+; CHECK-NEXT: store <2 x i64> [[VEC_IND]], ptr [[TMP5]], align 4
; CHECK-NEXT: stor...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/72164
More information about the llvm-branch-commits
mailing list