[llvm] [VPlan] Add transformation to narrow interleave groups. (PR #106441)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 25 13:34:37 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Florian Hahn (fhahn)
<details>
<summary>Changes</summary>
This patch adds a new narrowInterleaveGroups transfrom, which tries
convert a plan with interleave groups with VF elements to a plan that
instead replaces the interleave groups with wide loads and stores
processing VF elements.
This effectively is a very simple form of loop-aware SLP, where we
use interleave groups to identify candidates.
This initial version is quite restricted and hopefully serves as a
starting point for how to best model those kinds of transforms.
Depends on https://github.com/llvm/llvm-project/pull/106431.
Fixes https://github.com/llvm/llvm-project/issues/82936
---
Patch is 249.37 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/106441.diff
20 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h (+8-2)
- (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+3-2)
- (modified) llvm/lib/Transforms/Vectorize/VPlan.cpp (-1)
- (modified) llvm/lib/Transforms/Vectorize/VPlan.h (+7-1)
- (modified) llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (+16-25)
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (+170-6)
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.h (+5-1)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/interleaved-store-of-first-order-recurrence.ll (+1-2)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll (+11-19)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll (+6-10)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll (+15-84)
- (modified) llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll (+7-10)
- (added) llvm/test/Transforms/LoopVectorize/X86/transform-narrow-interleave-to-widen-memory.ll (+755)
- (modified) llvm/test/Transforms/LoopVectorize/X86/vectorize-interleaved-accesses-gap.ll (+2-3)
- (modified) llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll (+6-12)
- (modified) llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll (+4-8)
- (modified) llvm/test/Transforms/LoopVectorize/interleaved-accesses-different-insert-position.ll (+5-3)
- (modified) llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll (+9-16)
- (added) llvm/test/Transforms/LoopVectorize/transform-narrow-interleave-to-widen-memory.ll (+1677)
- (modified) llvm/test/Transforms/LoopVectorize/vplan-printing.ll (+2-3)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index 00eec0a6f7b14e..5951873a960af2 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -220,9 +220,15 @@ class VPBuilder {
new VPInstruction(Instruction::ICmp, Pred, A, B, DL, Name));
}
- VPInstruction *createPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL,
+ VPInstruction *createPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL = {},
const Twine &Name = "") {
- return createInstruction(VPInstruction::PtrAdd, {Ptr, Offset}, DL, Name);
+ return tryInsertInstruction(new VPInstruction(
+ Ptr, Offset, VPRecipeWithIRFlags::GEPFlagsTy(false), DL, Name));
+ }
+ VPValue *createInBoundsPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL = {},
+ const Twine &Name = "") {
+ return tryInsertInstruction(new VPInstruction(
+ Ptr, Offset, VPRecipeWithIRFlags::GEPFlagsTy(true), DL, Name));
}
VPDerivedIVRecipe *createDerivedIV(InductionDescriptor::InductionKind Kind,
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index db4631e19c11d3..3eb830bcc02098 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7506,6 +7506,7 @@ LoopVectorizationPlanner::executePlan(
VPlanTransforms::unrollByUF(BestVPlan, BestUF,
OrigLoop->getHeader()->getModule()->getContext());
VPlanTransforms::optimizeForVFAndUF(BestVPlan, BestVF, BestUF, PSE);
+ VPlanTransforms::narrowInterleaveGroups(BestVPlan, BestVF);
LLVM_DEBUG(dbgs() << "Executing best plan with VF=" << BestVF
<< ", UF=" << BestUF << '\n');
@@ -9005,8 +9006,8 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
// Interleave memory: for each Interleave Group we marked earlier as relevant
// for this VPlan, replace the Recipes widening its memory instructions with a
// single VPInterleaveRecipe at its insertion point.
- VPlanTransforms::createInterleaveGroups(InterleaveGroups, RecipeBuilder,
- CM.isScalarEpilogueAllowed());
+ VPlanTransforms::createInterleaveGroups(
+ *Plan, InterleaveGroups, RecipeBuilder, CM.isScalarEpilogueAllowed());
for (ElementCount VF : Range)
Plan->addVF(VF);
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 6ddbfcf0ecfe58..a18ab844c5ebec 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -954,7 +954,6 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
// FIXME: Model VF * UF computation completely in VPlan.
- assert(VFxUF.getNumUsers() && "VFxUF expected to always have users");
unsigned UF = getUF();
if (VF.getNumUsers()) {
Value *RuntimeVF = getRuntimeVF(Builder, TCTy, State.VF);
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index bbcfaf9e19cd0c..f841355d320930 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -956,7 +956,6 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
DisjointFlagsTy(bool IsDisjoint) : IsDisjoint(IsDisjoint) {}
};
-protected:
struct GEPFlagsTy {
char IsInBounds : 1;
GEPFlagsTy(bool IsInBounds) : IsInBounds(IsInBounds) {}
@@ -1307,6 +1306,13 @@ class VPInstruction : public VPRecipeWithIRFlags,
assert(Opcode == Instruction::Or && "only OR opcodes can be disjoint");
}
+ VPInstruction(VPValue *Ptr, VPValue *Offset, GEPFlagsTy Flags = {false},
+ DebugLoc DL = {}, const Twine &Name = "")
+ : VPRecipeWithIRFlags(VPDef::VPInstructionSC,
+ ArrayRef<VPValue *>({Ptr, Offset}),
+ GEPFlagsTy(Flags), DL),
+ Opcode(VPInstruction::PtrAdd), Name(Name.str()) {}
+
VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
FastMathFlags FMFs, DebugLoc DL = {}, const Twine &Name = "");
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index dacba152611c19..96b16164738181 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -48,6 +48,9 @@ extern cl::opt<unsigned> ForceTargetInstructionCost;
bool VPRecipeBase::mayWriteToMemory() const {
switch (getVPDefID()) {
+ case VPInstructionSC: {
+ return !Instruction::isBinaryOp(cast<VPInstruction>(this)->getOpcode());
+ }
case VPInterleaveSC:
return cast<VPInterleaveRecipe>(this)->getNumStoreOperands() > 0;
case VPWidenStoreEVLSC:
@@ -63,6 +66,7 @@ bool VPRecipeBase::mayWriteToMemory() const {
case VPBranchOnMaskSC:
case VPScalarIVStepsSC:
case VPPredInstPHISC:
+ case VPVectorPointerSC:
return false;
case VPBlendSC:
case VPReductionEVLSC:
@@ -644,7 +648,8 @@ Value *VPInstruction::generate(VPTransformState &State) {
"can only generate first lane for PtrAdd");
Value *Ptr = State.get(getOperand(0), /* IsScalar */ true);
Value *Addend = State.get(getOperand(1), /* IsScalar */ true);
- return Builder.CreatePtrAdd(Ptr, Addend, Name);
+ return isInBounds() ? Builder.CreateInBoundsPtrAdd(Ptr, Addend, Name)
+ : Builder.CreatePtrAdd(Ptr, Addend, Name);
}
case VPInstruction::ResumePhi: {
Value *IncomingFromVPlanPred =
@@ -2470,15 +2475,12 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
unsigned InterleaveFactor = Group->getFactor();
auto *VecTy = VectorType::get(ScalarTy, State.VF * InterleaveFactor);
- // Prepare for the new pointers.
- unsigned Index = Group->getIndex(Instr);
-
// TODO: extend the masked interleaved-group support to reversed access.
VPValue *BlockInMask = getMask();
assert((!BlockInMask || !Group->isReverse()) &&
"Reversed masked interleave-group not supported.");
- Value *Idx;
+ Value *Index;
// If the group is reverse, adjust the index to refer to the last vector lane
// instead of the first. We adjust the index from the first vector lane,
// rather than directly getting the pointer for lane VF - 1, because the
@@ -2486,35 +2488,24 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
if (Group->isReverse()) {
Value *RuntimeVF =
getRuntimeVF(State.Builder, State.Builder.getInt32Ty(), State.VF);
- Idx = State.Builder.CreateSub(RuntimeVF, State.Builder.getInt32(1));
- Idx = State.Builder.CreateMul(Idx,
- State.Builder.getInt32(Group->getFactor()));
- Idx = State.Builder.CreateAdd(Idx, State.Builder.getInt32(Index));
- Idx = State.Builder.CreateNeg(Idx);
- } else
- Idx = State.Builder.getInt32(-Index);
+ Index = State.Builder.CreateSub(RuntimeVF, State.Builder.getInt32(1));
+ Index = State.Builder.CreateMul(Index,
+ State.Builder.getInt32(Group->getFactor()));
+ Index = State.Builder.CreateNeg(Index);
+ } else {
+ // TODO: Drop redundant 0-index GEP as follow-up.
+ Index = State.Builder.getInt32(0);
+ }
VPValue *Addr = getAddr();
Value *ResAddr = State.get(Addr, VPLane(0));
if (auto *I = dyn_cast<Instruction>(ResAddr))
State.setDebugLocFrom(I->getDebugLoc());
- // Notice current instruction could be any index. Need to adjust the address
- // to the member of index 0.
- //
- // E.g. a = A[i+1]; // Member of index 1 (Current instruction)
- // b = A[i]; // Member of index 0
- // Current pointer is pointed to A[i+1], adjust it to A[i].
- //
- // E.g. A[i+1] = a; // Member of index 1
- // A[i] = b; // Member of index 0
- // A[i+2] = c; // Member of index 2 (Current instruction)
- // Current pointer is pointed to A[i+2], adjust it to A[i].
-
bool InBounds = false;
if (auto *gep = dyn_cast<GetElementPtrInst>(ResAddr->stripPointerCasts()))
InBounds = gep->isInBounds();
- ResAddr = State.Builder.CreateGEP(ScalarTy, ResAddr, Idx, "", InBounds);
+ ResAddr = State.Builder.CreateGEP(ScalarTy, ResAddr, Index, "", InBounds);
State.setDebugLocFrom(Instr->getDebugLoc());
Value *PoisonVec = PoisonValue::get(VecTy);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index a878613c4ba483..58188379c7fabb 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -29,6 +29,9 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/PatternMatch.h"
+#define LV_NAME "loop-vectorize"
+#define DEBUG_TYPE LV_NAME
+
using namespace llvm;
void VPlanTransforms::VPInstructionsToVPRecipes(
@@ -710,6 +713,7 @@ void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
// TODO: Further simplifications are possible
// 1. Replace inductions with constants.
// 2. Replace vector loop region with VPBasicBlock.
+ //
}
/// Sink users of \p FOR after the recipe defining the previous value \p
@@ -1590,14 +1594,19 @@ void VPlanTransforms::dropPoisonGeneratingRecipes(
}
void VPlanTransforms::createInterleaveGroups(
- const SmallPtrSetImpl<const InterleaveGroup<Instruction> *> &InterleaveGroups,
+ VPlan &Plan,
+ const SmallPtrSetImpl<const InterleaveGroup<Instruction> *>
+ &InterleaveGroups,
VPRecipeBuilder &RecipeBuilder, bool ScalarEpilogueAllowed) {
+ if (InterleaveGroups.empty())
+ return;
+
// Interleave memory: for each Interleave Group we marked earlier as relevant
// for this VPlan, replace the Recipes widening its memory instructions with a
// single VPInterleaveRecipe at its insertion point.
+ VPDominatorTree VPDT;
+ VPDT.recalculate(Plan);
for (const auto *IG : InterleaveGroups) {
- auto *Recipe =
- cast<VPWidenMemoryRecipe>(RecipeBuilder.getRecipe(IG->getInsertPos()));
SmallVector<VPValue *, 4> StoredValues;
for (unsigned i = 0; i < IG->getFactor(); ++i)
if (auto *SI = dyn_cast_or_null<StoreInst>(IG->getMember(i))) {
@@ -1607,9 +1616,38 @@ void VPlanTransforms::createInterleaveGroups(
bool NeedsMaskForGaps =
IG->requiresScalarEpilogue() && !ScalarEpilogueAllowed;
- auto *VPIG = new VPInterleaveRecipe(IG, Recipe->getAddr(), StoredValues,
- Recipe->getMask(), NeedsMaskForGaps);
- VPIG->insertBefore(Recipe);
+
+ Instruction *IRInsertPos = IG->getInsertPos();
+ auto *InsertPos =
+ cast<VPWidenMemoryRecipe>(RecipeBuilder.getRecipe(IRInsertPos));
+
+ // Get or create the start address for the interleave group.
+ auto *Start =
+ cast<VPWidenMemoryRecipe>(RecipeBuilder.getRecipe(IG->getMember(0)));
+ VPValue *Addr = Start->getAddr();
+ if (!VPDT.properlyDominates(Addr->getDefiningRecipe(), InsertPos)) {
+ bool InBounds = false;
+ if (auto *Gep = dyn_cast<GetElementPtrInst>(
+ getLoadStorePointerOperand(IRInsertPos)->stripPointerCasts()))
+ InBounds = Gep->isInBounds();
+
+ // We cannot re-use the address of the first member because it does not
+ // dominate the insert position. Use the address of the insert position
+ // and create a PtrAdd to adjust the index to start at the first member.
+ APInt Offset(32,
+ getLoadStoreType(IRInsertPos)->getScalarSizeInBits() / 8 *
+ IG->getIndex(IRInsertPos),
+ /*IsSigned=*/true);
+ VPValue *OffsetVPV = Plan.getOrAddLiveIn(
+ ConstantInt::get(IRInsertPos->getParent()->getContext(), -Offset));
+ VPBuilder B(InsertPos);
+ Addr = InBounds ? B.createInBoundsPtrAdd(InsertPos->getAddr(), OffsetVPV)
+ : B.createPtrAdd(InsertPos->getAddr(), OffsetVPV);
+ }
+ auto *VPIG = new VPInterleaveRecipe(IG, Addr, StoredValues,
+ InsertPos->getMask(), NeedsMaskForGaps);
+ VPIG->insertBefore(InsertPos);
+
unsigned J = 0;
for (unsigned i = 0; i < IG->getFactor(); ++i)
if (Instruction *Member = IG->getMember(i)) {
@@ -1623,3 +1661,129 @@ void VPlanTransforms::createInterleaveGroups(
}
}
}
+
+static bool supportedLoad(VPWidenRecipe *R0, VPValue *V, unsigned Idx) {
+ if (auto *W = dyn_cast_or_null<VPWidenLoadRecipe>(V->getDefiningRecipe())) {
+ if (W->getMask())
+ return false;
+ return !W->getMask() && (R0->getOperand(0) == V || R0->getOperand(1) == V);
+ }
+
+ if (auto *IR = dyn_cast_or_null<VPInterleaveRecipe>(V->getDefiningRecipe())) {
+ return IR->getInterleaveGroup()->getFactor() ==
+ IR->getInterleaveGroup()->getNumMembers() &&
+ IR->getVPValue(Idx) == V;
+ }
+ return false;
+}
+
+/// Returns true if \p IR is a full interleave group with factor and number of
+/// members both equal to \p VF.
+static bool isConsecutiveInterleaveGroup(VPInterleaveRecipe *IR,
+ ElementCount VF) {
+ if (!IR)
+ return false;
+ auto IG = IR->getInterleaveGroup();
+ return IG->getFactor() == IG->getNumMembers() &&
+ IG->getNumMembers() == VF.getFixedValue();
+}
+
+void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF) {
+ using namespace llvm::VPlanPatternMatch;
+ if (VF.isScalable())
+ return;
+
+ SmallVector<VPInterleaveRecipe *> StoreGroups;
+ for (auto &R : *Plan.getVectorLoopRegion()->getEntryBasicBlock()) {
+ if (match(&R, m_BranchOnCount(m_VPValue(), m_VPValue())) ||
+ isa<VPCanonicalIVPHIRecipe>(&R))
+ continue;
+
+ // Bail out on recipes not supported at the moment:
+ // * phi recipes other than the canonical induction
+ // * recipes writing to memory except interleave groups
+ // Only support plans with a canonical induction phi.
+ if (R.isPhi())
+ return;
+
+ auto *IR = dyn_cast<VPInterleaveRecipe>(&R);
+ if (R.mayWriteToMemory() && !IR)
+ return;
+
+ if (!IR)
+ continue;
+
+ if (!isConsecutiveInterleaveGroup(IR, VF))
+ return;
+ if (IR->getStoredValues().empty())
+ continue;
+
+ auto *Lane0 = dyn_cast_or_null<VPWidenRecipe>(
+ IR->getStoredValues()[0]->getDefiningRecipe());
+ if (!Lane0)
+ return;
+ for (const auto &[I, V] : enumerate(IR->getStoredValues())) {
+ auto *R = dyn_cast<VPWidenRecipe>(V->getDefiningRecipe());
+ if (!R || R->getOpcode() != Lane0->getOpcode())
+ return;
+ // Work around captured structured bindings being a C++20 extension.
+ auto Idx = I;
+ if (any_of(R->operands(), [Lane0, Idx](VPValue *V) {
+ return !supportedLoad(Lane0, V, Idx);
+ }))
+ return;
+ }
+
+ StoreGroups.push_back(IR);
+ }
+
+ if (StoreGroups.empty())
+ return;
+
+ // Narrow operation tree rooted at store groups.
+ for (auto *StoreGroup : StoreGroups) {
+ auto *Lane0 = cast<VPWidenRecipe>(
+ StoreGroup->getStoredValues()[0]->getDefiningRecipe());
+
+ unsigned LoadGroupIdx =
+ isa<VPInterleaveRecipe>(Lane0->getOperand(1)->getDefiningRecipe()) ? 1
+ : 0;
+ unsigned WideLoadIdx = 1 - LoadGroupIdx;
+ auto *LoadGroup = cast<VPInterleaveRecipe>(
+ Lane0->getOperand(LoadGroupIdx)->getDefiningRecipe());
+
+ auto *WideLoad = cast<VPWidenLoadRecipe>(
+ Lane0->getOperand(WideLoadIdx)->getDefiningRecipe());
+
+ // Narrow wide load to uniform scalar load, as transformed VPlan will only
+ // process one original iteration.
+ auto *N = new VPReplicateRecipe(&WideLoad->getIngredient(),
+ WideLoad->operands(), true);
+ // Narrow interleave group to wide load, as transformed VPlan will only
+ // process one original iteration.
+ auto *L = new VPWidenLoadRecipe(
+ *cast<LoadInst>(LoadGroup->getInterleaveGroup()->getInsertPos()),
+ LoadGroup->getAddr(), LoadGroup->getMask(), true, false,
+ LoadGroup->getDebugLoc());
+ L->insertBefore(LoadGroup);
+ N->insertBefore(LoadGroup);
+ Lane0->setOperand(LoadGroupIdx, L);
+ Lane0->setOperand(WideLoadIdx, N);
+
+ auto *S = new VPWidenStoreRecipe(
+ *cast<StoreInst>(StoreGroup->getInterleaveGroup()->getInsertPos()),
+ StoreGroup->getAddr(), Lane0, nullptr, true, false,
+ StoreGroup->getDebugLoc());
+ S->insertBefore(StoreGroup);
+ StoreGroup->eraseFromParent();
+ }
+
+ // Adjust induction to reflect that the transformed plan only processes one
+ // original iteration.
+ auto *CanIV = Plan.getCanonicalIV();
+ VPInstruction *Inc = cast<VPInstruction>(CanIV->getBackedgeValue());
+ Inc->setOperand(
+ 1, Plan.getOrAddLiveIn(ConstantInt::get(CanIV->getScalarType(), 1)));
+ removeDeadRecipes(Plan);
+ LLVM_DEBUG(dbgs() << "Narrowed interleave\n");
+}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index f4a17aec42b244..efe4bde8998fd4 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -114,11 +114,15 @@ struct VPlanTransforms {
// widening its memory instructions with a single VPInterleaveRecipe at its
// insertion point.
static void createInterleaveGroups(
- const SmallPtrSetImpl<const InterleaveGroup<Instruction> *> &InterleaveGroups,
+ VPlan &Plan,
+ const SmallPtrSetImpl<const InterleaveGroup<Instruction> *>
+ &InterleaveGroups,
VPRecipeBuilder &RecipeBuilder, bool ScalarEpilogueAllowed);
/// Remove dead recipes from \p Plan.
static void removeDeadRecipes(VPlan &Plan);
+
+ static void narrowInterleaveGroups(VPlan &Plan, ElementCount VF);
};
} // namespace llvm
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved-store-of-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved-store-of-first-order-recurrence.ll
index 87674f611251cc..997ef7466d5cfc 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved-store-of-first-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved-store-of-first-order-recurrence.ll
@@ -14,8 +14,7 @@ define void @interleaved_store_first_order_recurrence(ptr noalias %src, ptr %dst
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[BROADCAST_SPLAT]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw nsw i64 [[TMP0]], 3
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[TMP3]]
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 2
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 -2
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> zeroinitializer, <4 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLAT]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <8 x i32> [[TMP9]], <8 x i32> [[TMP10]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll
index 0959d913fd0cd5..ba4145217c3ba5 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll
@@ -41,13 +41,11 @@ define void @test_array_load2_store2(i32 %C, i32 %D) #1 {
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]])
; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0
; CHECK-NEXT: ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/106441
More information about the llvm-commits
mailing list