[llvm] [VPlan] Add new VPInstruction ocpode for header mask. (PR #89603)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 22 05:50:10 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Florian Hahn (fhahn)
<details>
<summary>Changes</summary>
This patch adds a new VPInstruction::HeaderMask opcode to model the abstract header-mask used for tail-folding. It will be lowered depending on target preference (either using active-lane-mask, explicit-vector-length or a wide compare of the canonical IV and the backedge taken count)
Similarly to https://github.com/llvm/llvm-project/pull/82270, it would be good to clarify/agree on the terminology w.r.t. to recipes/opcodes that cannot be code-gen'd directly (i.e. require further gradual lowering).
NOTE: some tests are failing or needed updating, due to widened IVs being replaced by scalar-steps, as their only use was the earlier wide compare. This could be fixed by either adding a suitable wide canonical IV as operand to the header-mask recipe and exactly preserve the original behavior. Alternatively we could keep the current behavior of the patch and update the tests. Or introduce a wide induction PHI instead of VPWidenCanonicalIVReicpe; currently we *only* use a wide IV for VPWidenCanonicalIVRecipe, if there was a suitable IV in the original loop, *even* if the mask compare is the *only* wide use. Either never or always using a wide PHI would be more consistent (or eventually make a more informed cost-based decision).
---
Patch is 86.23 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/89603.diff
9 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+5-11)
- (modified) llvm/lib/Transforms/Vectorize/VPlan.h (+3)
- (modified) llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (+1)
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (+73-122)
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.h (+3)
- (modified) llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll (+20-19)
- (modified) llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll (+129-114)
- (modified) llvm/test/Transforms/LoopVectorize/reduction-predselect.ll (+37-28)
- (modified) llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll (+6-6)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 33c4decd58a6c2..70beb4b1bfd415 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8035,21 +8035,14 @@ void VPRecipeBuilder::createHeaderMask() {
return;
}
- // Introduce the early-exit compare IV <= BTC to form header block mask.
- // This is used instead of IV < TC because TC may wrap, unlike BTC. Start by
- // constructing the desired canonical IV in the header block as its first
- // non-phi instructions.
-
+ // Introduce an abstract header-mask VPInstruction. This will be lowered later
+ // depending on target preference.
VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
auto NewInsertionPoint = HeaderVPBB->getFirstNonPhi();
- auto *IV = new VPWidenCanonicalIVRecipe(Plan.getCanonicalIV());
- HeaderVPBB->insert(IV, NewInsertionPoint);
-
VPBuilder::InsertPointGuard Guard(Builder);
Builder.setInsertPoint(HeaderVPBB, NewInsertionPoint);
- VPValue *BlockMask = nullptr;
- VPValue *BTC = Plan.getOrCreateBackedgeTakenCount();
- BlockMask = Builder.createICmp(CmpInst::ICMP_ULE, IV, BTC);
+ VPValue *BlockMask =
+ Builder.createNaryOp(VPInstruction::HeaderMask, {Plan.getCanonicalIV()});
BlockMaskCache[Header] = BlockMask;
}
@@ -8555,6 +8548,7 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
// TODO: try to put it close to addActiveLaneMask().
if (CM.foldTailWithEVL())
VPlanTransforms::addExplicitVectorLength(*Plan);
+ VPlanTransforms::lowerRecipes(*Plan);
assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
VPlans.push_back(std::move(Plan));
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index c74329a0bcc4ac..bd472d770803e0 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1180,6 +1180,9 @@ class VPInstruction : public VPRecipeWithIRFlags {
// operand). Only generates scalar values (either for the first lane only or
// for all lanes, depending on its uses).
PtrAdd,
+ // An abstract representation of the vector loops header mask, to be lowered
+ // later depending on target preference.
+ HeaderMask,
};
private:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 9ec422ec002c82..87bcea50b21572 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -132,6 +132,7 @@ bool VPRecipeBase::mayHaveSideEffects() const {
case VPInstruction::CalculateTripCountMinusVF:
case VPInstruction::CanonicalIVIncrementForPart:
case VPInstruction::PtrAdd:
+ case VPInstruction::HeaderMask:
return false;
default:
return true;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 007dc3f89b3fb9..6d643b81b2c88e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -434,44 +434,6 @@ static void removeRedundantInductionCasts(VPlan &Plan) {
}
}
-/// Try to replace VPWidenCanonicalIVRecipes with a widened canonical IV
-/// recipe, if it exists.
-static void removeRedundantCanonicalIVs(VPlan &Plan) {
- VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV();
- VPWidenCanonicalIVRecipe *WidenNewIV = nullptr;
- for (VPUser *U : CanonicalIV->users()) {
- WidenNewIV = dyn_cast<VPWidenCanonicalIVRecipe>(U);
- if (WidenNewIV)
- break;
- }
-
- if (!WidenNewIV)
- return;
-
- VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
- for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
- auto *WidenOriginalIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
-
- if (!WidenOriginalIV || !WidenOriginalIV->isCanonical() ||
- WidenOriginalIV->getScalarType() != WidenNewIV->getScalarType())
- continue;
-
- // Replace WidenNewIV with WidenOriginalIV if WidenOriginalIV provides
- // everything WidenNewIV's users need. That is, WidenOriginalIV will
- // generate a vector phi or all users of WidenNewIV demand the first lane
- // only.
- if (any_of(WidenOriginalIV->users(),
- [WidenOriginalIV](VPUser *U) {
- return !U->usesScalars(WidenOriginalIV);
- }) ||
- vputils::onlyFirstLaneUsed(WidenNewIV)) {
- WidenNewIV->replaceAllUsesWith(WidenOriginalIV);
- WidenNewIV->eraseFromParent();
- return;
- }
- }
-}
-
/// Returns true if \p R is dead and can be removed.
static bool isDeadRecipe(VPRecipeBase &R) {
using namespace llvm::PatternMatch;
@@ -1086,7 +1048,6 @@ void VPlanTransforms::truncateToMinimalBitwidths(
}
void VPlanTransforms::optimize(VPlan &Plan, ScalarEvolution &SE) {
- removeRedundantCanonicalIVs(Plan);
removeRedundantInductionCasts(Plan);
simplifyRecipes(Plan, SE.getContext());
@@ -1203,52 +1164,33 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch(
return LaneMaskPhi;
}
-/// Collect all VPValues representing a header mask through the (ICMP_ULE,
-/// WideCanonicalIV, backedge-taken-count) pattern.
-/// TODO: Introduce explicit recipe for header-mask instead of searching
-/// for the header-mask pattern manually.
-static SmallVector<VPValue *> collectAllHeaderMasks(VPlan &Plan) {
- SmallVector<VPValue *> WideCanonicalIVs;
- auto *FoundWidenCanonicalIVUser =
- find_if(Plan.getCanonicalIV()->users(),
- [](VPUser *U) { return isa<VPWidenCanonicalIVRecipe>(U); });
- assert(count_if(Plan.getCanonicalIV()->users(),
- [](VPUser *U) { return isa<VPWidenCanonicalIVRecipe>(U); }) <=
- 1 &&
- "Must have at most one VPWideCanonicalIVRecipe");
- if (FoundWidenCanonicalIVUser != Plan.getCanonicalIV()->users().end()) {
- auto *WideCanonicalIV =
- cast<VPWidenCanonicalIVRecipe>(*FoundWidenCanonicalIVUser);
- WideCanonicalIVs.push_back(WideCanonicalIV);
- }
-
- // Also include VPWidenIntOrFpInductionRecipes that represent a widened
- // version of the canonical induction.
+/// Return the header mask recipe of the VPlan, if there is one.
+static VPInstruction *getHeaderMask(VPlan &Plan) {
VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
- for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
- auto *WidenOriginalIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
- if (WidenOriginalIV && WidenOriginalIV->isCanonical())
- WideCanonicalIVs.push_back(WidenOriginalIV);
- }
+ auto R = find_if(*HeaderVPBB, [](VPRecipeBase &R)
+ {
+ using namespace llvm::VPlanPatternMatch;
+ return match(&R, m_VPInstruction<VPInstruction::HeaderMask>(m_VPValue()));
+ });
+ return R == HeaderVPBB->end() ? nullptr : cast<VPInstruction>(&*R);
+}
- // Walk users of wide canonical IVs and collect to all compares of the form
- // (ICMP_ULE, WideCanonicalIV, backedge-taken-count).
- SmallVector<VPValue *> HeaderMasks;
- VPValue *BTC = Plan.getOrCreateBackedgeTakenCount();
- for (auto *Wide : WideCanonicalIVs) {
- for (VPUser *U : SmallVector<VPUser *>(Wide->users())) {
- auto *HeaderMask = dyn_cast<VPInstruction>(U);
- if (!HeaderMask || HeaderMask->getOpcode() != Instruction::ICmp ||
- HeaderMask->getPredicate() != CmpInst::ICMP_ULE ||
- HeaderMask->getOperand(1) != BTC)
- continue;
+static VPValue *getOrCreateWideCanonicalIV(VPlan &Plan,
+ VPRecipeBase *InsertPt) {
- assert(HeaderMask->getOperand(0) == Wide &&
- "WidenCanonicalIV must be the first operand of the compare");
- HeaderMasks.push_back(HeaderMask);
- }
+ VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
+ for (VPRecipeBase &R : HeaderVPBB->phis()) {
+ auto *WideIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R);
+ if (!WideIV || !WideIV->isCanonical() ||
+ Plan.getCanonicalIV()->getScalarType() != WideIV->getScalarType())
+ continue;
+ return WideIV;
+ break;
}
- return HeaderMasks;
+
+ auto *IV = new VPWidenCanonicalIVRecipe(Plan.getCanonicalIV());
+ IV->insertBefore(InsertPt);
+ return IV;
}
void VPlanTransforms::addActiveLaneMask(
@@ -1258,30 +1200,23 @@ void VPlanTransforms::addActiveLaneMask(
UseActiveLaneMaskForControlFlow) &&
"DataAndControlFlowWithoutRuntimeCheck implies "
"UseActiveLaneMaskForControlFlow");
-
- auto FoundWidenCanonicalIVUser =
- find_if(Plan.getCanonicalIV()->users(),
- [](VPUser *U) { return isa<VPWidenCanonicalIVRecipe>(U); });
- assert(FoundWidenCanonicalIVUser &&
- "Must have widened canonical IV when tail folding!");
- auto *WideCanonicalIV =
- cast<VPWidenCanonicalIVRecipe>(*FoundWidenCanonicalIVUser);
+ VPValue *HeaderMask = getHeaderMask(Plan);
+ assert(HeaderMask && "Active-lane-mask not needed?");
VPSingleDefRecipe *LaneMask;
if (UseActiveLaneMaskForControlFlow) {
LaneMask = addVPLaneMaskPhiAndUpdateExitBranch(
Plan, DataAndControlFlowWithoutRuntimeCheck);
} else {
- VPBuilder B = VPBuilder::getToInsertAfter(WideCanonicalIV);
- LaneMask = B.createNaryOp(VPInstruction::ActiveLaneMask,
- {WideCanonicalIV, Plan.getTripCount()}, nullptr,
- "active.lane.mask");
+ VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
+ VPBuilder B;
+ B.setInsertPoint(HeaderVPBB, HeaderVPBB->getFirstNonPhi());
+ LaneMask = B.createNaryOp(
+ VPInstruction::ActiveLaneMask,
+ {getOrCreateWideCanonicalIV(Plan, &*HeaderVPBB->getFirstNonPhi()),
+ Plan.getTripCount()},
+ nullptr, "active.lane.mask");
}
-
- // Walk users of WideCanonicalIV and replace all compares of the form
- // (ICMP_ULE, WideCanonicalIV, backedge-taken-count) with an
- // active-lane-mask.
- for (VPValue *HeaderMask : collectAllHeaderMasks(Plan))
- HeaderMask->replaceAllUsesWith(LaneMask);
+ HeaderMask->replaceAllUsesWith(LaneMask);
}
/// Add a VPEVLBasedIVPHIRecipe and related recipes to \p Plan and
@@ -1307,6 +1242,10 @@ void VPlanTransforms::addActiveLaneMask(
/// ...
///
void VPlanTransforms::addExplicitVectorLength(VPlan &Plan) {
+ VPValue *HeaderMask = getHeaderMask(Plan);
+ if (!HeaderMask)
+ return;
+
VPBasicBlock *Header = Plan.getVectorLoopRegion()->getEntryBasicBlock();
auto *CanonicalIVPHI = Plan.getCanonicalIV();
VPValue *StartV = CanonicalIVPHI->getStartValue();
@@ -1336,31 +1275,30 @@ void VPlanTransforms::addExplicitVectorLength(VPlan &Plan) {
NextEVLIV->insertBefore(CanonicalIVIncrement);
EVLPhi->addOperand(NextEVLIV);
- for (VPValue *HeaderMask : collectAllHeaderMasks(Plan)) {
- for (VPUser *U : collectUsersRecursively(HeaderMask)) {
- auto *MemR = dyn_cast<VPWidenMemoryRecipe>(U);
- if (!MemR)
- continue;
- assert(!MemR->isReverse() &&
- "Reversed memory operations not supported yet.");
- VPValue *OrigMask = MemR->getMask();
- assert(OrigMask && "Unmasked widen memory recipe when folding tail");
- VPValue *NewMask = HeaderMask == OrigMask ? nullptr : OrigMask;
- if (auto *L = dyn_cast<VPWidenLoadRecipe>(MemR)) {
- auto *N = new VPWidenLoadEVLRecipe(L, VPEVL, NewMask);
- N->insertBefore(L);
- L->replaceAllUsesWith(N);
- L->eraseFromParent();
- } else if (auto *S = dyn_cast<VPWidenStoreRecipe>(MemR)) {
- auto *N = new VPWidenStoreEVLRecipe(S, VPEVL, NewMask);
- N->insertBefore(S);
- S->eraseFromParent();
- } else {
- llvm_unreachable("unsupported recipe");
- }
+ for (VPUser *U : collectUsersRecursively(HeaderMask)) {
+ auto *MemR = dyn_cast<VPWidenMemoryRecipe>(U);
+ if (!MemR)
+ continue;
+ assert(!MemR->isReverse() &&
+ "Reversed memory operations not supported yet.");
+ VPValue *OrigMask = MemR->getMask();
+ assert(OrigMask && "Unmasked widen memory recipe when folding tail");
+ VPValue *NewMask = HeaderMask == OrigMask ? nullptr : OrigMask;
+ if (auto *L = dyn_cast<VPWidenLoadRecipe>(MemR)) {
+ auto *N = new VPWidenLoadEVLRecipe(L, VPEVL, NewMask);
+ N->insertBefore(L);
+ L->replaceAllUsesWith(N);
+ L->eraseFromParent();
+ } else if (auto *S = dyn_cast<VPWidenStoreRecipe>(MemR)) {
+ auto *N = new VPWidenStoreEVLRecipe(S, VPEVL, NewMask);
+ N->insertBefore(S);
+ S->eraseFromParent();
+ } else {
+ llvm_unreachable("unsupported recipe");
}
- recursivelyDeleteDeadRecipes(HeaderMask);
}
+ recursivelyDeleteDeadRecipes(HeaderMask);
+
// Replace all uses of VPCanonicalIVPHIRecipe by
// VPEVLBasedIVPHIRecipe except for the canonical IV increment.
CanonicalIVPHI->replaceAllUsesWith(EVLPhi);
@@ -1465,3 +1403,16 @@ void VPlanTransforms::dropPoisonGeneratingRecipes(
}
}
}
+
+void VPlanTransforms::lowerRecipes(VPlan &Plan) {
+ VPInstruction *HeaderMask = getHeaderMask(Plan);
+ if (!HeaderMask)
+ return;
+
+ VPValue *IV = getOrCreateWideCanonicalIV(Plan, HeaderMask);
+ VPBuilder Builder(HeaderMask);
+ VPValue *BTC = Plan.getOrCreateBackedgeTakenCount();
+ VPValue *M = Builder.createICmp(CmpInst::ICMP_ULE, IV, BTC);
+ HeaderMask->replaceAllUsesWith(M);
+ HeaderMask->eraseFromParent();
+}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 0cbc70713d9c10..86a29e0a11aafc 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -105,6 +105,9 @@ struct VPlanTransforms {
/// VPCanonicalIVPHIRecipe is only used to control the loop after
/// this transformation.
static void addExplicitVectorLength(VPlan &Plan);
+
+ /// Lower abstract VPInstruction recipes to a concrete sequence of recipes for which code can be generated.
+ static void lowerRecipes(VPlan &Plan);
};
} // namespace llvm
diff --git a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll
index 76ca2507b914cf..cc015ce465c472 100644
--- a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll
+++ b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll
@@ -37,7 +37,7 @@ define dso_local void @alignTC(ptr noalias nocapture %A, i32 %n) optsize {
; CHECK-NEXT: store i32 13, ptr [[ARRAYIDX]], align 1
; CHECK-NEXT: [[RIVPLUS1]] = add nuw nsw i32 [[RIV]], 1
; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[RIVPLUS1]], [[ALIGNEDTC]]
-; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
@@ -158,13 +158,15 @@ define dso_local void @cannotProveAlignedTC(ptr noalias nocapture %A, i32 %p, i3
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[N]], 1
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT1]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE8:%.*]] ]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[VEC_IV:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i32> [[VEC_IV]], [[BROADCAST_SPLAT2]]
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
@@ -174,31 +176,30 @@ define dso_local void @cannotProveAlignedTC(ptr noalias nocapture %A, i32 %p, i3
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
-; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]]
-; CHECK: pred.store.if1:
+; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
+; CHECK: pred.store.if3:
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP5]]
; CHECK-NEXT: store i32 13, ptr [[TMP6]], align 1
-; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]]
-; CHECK: pred.store.continue2:
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
+; CHECK: pred.store.continue4:
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
-; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
-; CHECK: pred.store.if3:
+; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
+; CHECK: pred.store.if5:
; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP8]]
; CHECK-NEXT: store i32 13, ptr [[TMP9]], align 1
-; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
-; CHECK: pred.store.continue4:
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
+; CHECK: pred.store.continue6:
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
-; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]]
-; CHECK: pred.store.if5:
+; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8]]
+; CHECK: pred.store.if7:
; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[INDEX]], 3
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP11]]
; CHECK-NEXT: store i32 13, ptr [[TMP12]], align 1
-; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
-; CHECK: pred.store.continue6:
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]]
+; CHECK: pred.store.continue8:
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll
index e79f0983a63785..c18c2f27e0f437 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll
@@ -11,9 +11,11 @@ define i32 @reduction_sum_single(ptr noalias nocapture %A) {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/89603
More information about the llvm-commits
mailing list