[llvm] [VPlan] Sink predicated stores with complementary masks. (PR #168771)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 1 07:19:29 PST 2025
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/168771
>From 12e674cb80360b97172564242d163b9ee8386d73 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Wed, 19 Nov 2025 16:23:28 +0000
Subject: [PATCH 1/2] [VPlan] Sink predicated stores with complementary masks.
Extend the logic to hoist predicated loads
(https://github.com/llvm/llvm-project/pull/168373) to sink predicated
stores with complementary masks in a similar fashion.
The patch refactors some of the existing logic for legality checks to be
shared between hosting and sinking, and adds a new sinking transform on
top.
With respect to the legality checks, for sinking stores the code also
checks if there are any aliasing stores that may alias, not only loads.
---
.../Transforms/Vectorize/LoopVectorize.cpp | 1 +
.../Transforms/Vectorize/VPlanTransforms.cpp | 306 ++++++++++++------
.../Transforms/Vectorize/VPlanTransforms.h | 7 +
...predicated-loads-with-predicated-stores.ll | 284 +++++-----------
4 files changed, 301 insertions(+), 297 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 4a89f7dd8672e..9b727a7998392 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8369,6 +8369,7 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
std::unique_ptr<VPlan>(VPlan0->duplicate()), SubRange, &LVer)) {
// Now optimize the initial VPlan.
VPlanTransforms::hoistPredicatedLoads(*Plan, *PSE.getSE(), OrigLoop);
+ VPlanTransforms::sinkPredicatedStores(*Plan, *PSE.getSE(), OrigLoop);
VPlanTransforms::runPass(VPlanTransforms::truncateToMinimalBitwidths,
*Plan, CM.getMinimalBitwidths());
VPlanTransforms::runPass(VPlanTransforms::optimize, *Plan);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index f7281283bae81..7bc4eb97b94e3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -139,35 +139,51 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(
return true;
}
-// Check if a load can be hoisted by verifying it doesn't alias with any stores
-// in blocks between FirstBB and LastBB using scoped noalias metadata.
-static bool canHoistLoadWithNoAliasCheck(VPReplicateRecipe *Load,
- VPBasicBlock *FirstBB,
- VPBasicBlock *LastBB) {
- // Get the load's memory location and check if it aliases with any stores
- // using scoped noalias metadata.
- auto LoadLoc = vputils::getMemoryLocation(*Load);
- if (!LoadLoc || !LoadLoc->AATags.Scope)
+// Check if a memory operation doesn't alias with memory operations in blocks
+// between FirstBB and LastBB using scoped noalias metadata.
+// For load hoisting, we only check writes in one direction.
+// For store sinking, we check both reads and writes bidirectionally.
+static bool canHoistOrSinkWithNoAliasCheck(
+ const MemoryLocation &MemLoc, VPBasicBlock *FirstBB, VPBasicBlock *LastBB,
+ bool CheckReads,
+ const SmallPtrSetImpl<VPRecipeBase *> *ExcludeRecipes = nullptr) {
+ if (!MemLoc.AATags.Scope)
return false;
- const AAMDNodes &LoadAA = LoadLoc->AATags;
+ const AAMDNodes &MemAA = MemLoc.AATags;
+
for (VPBlockBase *Block = FirstBB; Block;
Block = Block->getSingleSuccessor()) {
- // This function assumes a simple linear chain of blocks. If there are
- // multiple successors, we would need more complex analysis.
assert(Block->getNumSuccessors() <= 1 &&
"Expected at most one successor in block chain");
auto *VPBB = cast<VPBasicBlock>(Block);
for (VPRecipeBase &R : *VPBB) {
- if (R.mayWriteToMemory()) {
- auto Loc = vputils::getMemoryLocation(R);
- // Bail out if we can't get the location or if the scoped noalias
- // metadata indicates potential aliasing.
- if (!Loc || ScopedNoAliasAAResult::mayAliasInScopes(
- LoadAA.Scope, Loc->AATags.NoAlias))
- return false;
- }
+ if (ExcludeRecipes && ExcludeRecipes->contains(&R))
+ continue;
+
+ // Skip recipes that don't need checking.
+ if (!R.mayWriteToMemory() && !(CheckReads && R.mayReadFromMemory()))
+ continue;
+
+ auto Loc = vputils::getMemoryLocation(R);
+ if (!Loc)
+ // Conservatively assume aliasing for memory operations without
+ // location.
+ return false;
+
+ // For reads, check if they don't alias in the reverse direction and
+ // skip if so.
+ if (CheckReads && R.mayReadFromMemory() &&
+ !ScopedNoAliasAAResult::mayAliasInScopes(Loc->AATags.Scope,
+ MemAA.NoAlias))
+ continue;
+
+ // Check if the memory operations may alias in the forward direction.
+ if (ScopedNoAliasAAResult::mayAliasInScopes(MemAA.Scope,
+ Loc->AATags.NoAlias))
+ return false;
}
+
if (Block == LastBB)
break;
}
@@ -4128,119 +4144,217 @@ void VPlanTransforms::hoistInvariantLoads(VPlan &Plan) {
}
}
-// Returns the intersection of metadata from a group of loads.
-static VPIRMetadata getCommonLoadMetadata(ArrayRef<VPReplicateRecipe *> Loads) {
- VPIRMetadata CommonMetadata = *Loads.front();
- for (VPReplicateRecipe *Load : drop_begin(Loads))
- CommonMetadata.intersect(*Load);
+// Collect common metadata from a group of replicate recipes by intersecting
+// metadata from all recipes in the group.
+static VPIRMetadata getCommonMetadata(ArrayRef<VPReplicateRecipe *> Recipes) {
+ VPIRMetadata CommonMetadata = *Recipes.front();
+ for (VPReplicateRecipe *Recipe : drop_begin(Recipes))
+ CommonMetadata.intersect(*Recipe);
return CommonMetadata;
}
-void VPlanTransforms::hoistPredicatedLoads(VPlan &Plan, ScalarEvolution &SE,
- const Loop *L) {
+template <unsigned Opcode>
+static SmallVector<SmallVector<VPReplicateRecipe *, 4>>
+collectComplementaryPredicatedMemOps(VPlan &Plan, ScalarEvolution &SE,
+ const Loop *L) {
+ static_assert(Opcode == Instruction::Load || Opcode == Instruction::Store,
+ "Only Load and Store opcodes supported");
+ constexpr bool IsLoad = (Opcode == Instruction::Load);
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
VPTypeAnalysis TypeInfo(Plan);
- VPDominatorTree VPDT(Plan);
- // Group predicated loads by their address SCEV.
- DenseMap<const SCEV *, SmallVector<VPReplicateRecipe *>> LoadsByAddress;
+ // Group predicated operations by their address SCEV.
+ DenseMap<const SCEV *, SmallVector<VPReplicateRecipe *>> RecipesByAddress;
for (VPBlockBase *Block : vp_depth_first_shallow(LoopRegion->getEntry())) {
auto *VPBB = cast<VPBasicBlock>(Block);
for (VPRecipeBase &R : *VPBB) {
auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
- if (!RepR || RepR->getOpcode() != Instruction::Load ||
- !RepR->isPredicated())
+ if (!RepR || RepR->getOpcode() != Opcode || !RepR->isPredicated())
continue;
- VPValue *Addr = RepR->getOperand(0);
+ // For loads, operand 0 is address; for stores, operand 1 is address.
+ VPValue *Addr = RepR->getOperand(IsLoad ? 0 : 1);
const SCEV *AddrSCEV = vputils::getSCEVExprForVPValue(Addr, SE, L);
if (!isa<SCEVCouldNotCompute>(AddrSCEV))
- LoadsByAddress[AddrSCEV].push_back(RepR);
+ RecipesByAddress[AddrSCEV].push_back(RepR);
}
}
- // For each address, collect loads with complementary masks, sort by
- // dominance, and use the earliest load.
- for (auto &[Addr, Loads] : LoadsByAddress) {
- if (Loads.size() < 2)
+ // For each address, collect operations with the same or complementary masks.
+ SmallVector<SmallVector<VPReplicateRecipe *, 4>> AllGroups;
+ auto GetLoadStoreValueType = [&](VPReplicateRecipe *Recipe) {
+ return TypeInfo.inferScalarType(IsLoad ? Recipe : Recipe->getOperand(0));
+ };
+ for (auto &[Addr, Recipes] : RecipesByAddress) {
+ if (Recipes.size() < 2)
continue;
- // Collect groups of loads with complementary masks.
- SmallVector<SmallVector<VPReplicateRecipe *, 4>> LoadGroups;
- for (VPReplicateRecipe *&LoadI : Loads) {
- if (!LoadI)
+ // Collect groups with the same or complementary masks.
+ for (VPReplicateRecipe *&RecipeI : Recipes) {
+ if (!RecipeI)
continue;
- VPValue *MaskI = LoadI->getMask();
- Type *TypeI = TypeInfo.inferScalarType(LoadI);
+ VPValue *MaskI = RecipeI->getMask();
+ Type *TypeI = GetLoadStoreValueType(RecipeI);
SmallVector<VPReplicateRecipe *, 4> Group;
- Group.push_back(LoadI);
- LoadI = nullptr;
+ Group.push_back(RecipeI);
+ RecipeI = nullptr;
- // Find all loads with the same type.
- for (VPReplicateRecipe *&LoadJ : Loads) {
- if (!LoadJ)
+ // Find all operations with the same or complementary masks.
+ bool HasComplementaryMask = false;
+ for (VPReplicateRecipe *&RecipeJ : Recipes) {
+ if (!RecipeJ)
continue;
- Type *TypeJ = TypeInfo.inferScalarType(LoadJ);
+ VPValue *MaskJ = RecipeJ->getMask();
+ Type *TypeJ = GetLoadStoreValueType(RecipeJ);
if (TypeI == TypeJ) {
- Group.push_back(LoadJ);
- LoadJ = nullptr;
+ // Check if any operation in the group has a complementary mask with
+ // another, that is M1 == NOT(M2) or M2 == NOT(M1).
+ HasComplementaryMask |= match(MaskI, m_Not(m_Specific(MaskJ))) ||
+ match(MaskJ, m_Not(m_Specific(MaskI)));
+ Group.push_back(RecipeJ);
+ RecipeJ = nullptr;
}
}
- // Check if any load in the group has a complementary mask with another,
- // that is M1 == NOT(M2) or M2 == NOT(M1).
- bool HasComplementaryMask =
- any_of(drop_begin(Group), [MaskI](VPReplicateRecipe *Load) {
- VPValue *MaskJ = Load->getMask();
- return match(MaskI, m_Not(m_Specific(MaskJ))) ||
- match(MaskJ, m_Not(m_Specific(MaskI)));
- });
+ if (HasComplementaryMask) {
+ assert(Group.size() >= 2 && "must have at least 2 entries");
+ AllGroups.push_back(std::move(Group));
+ }
+ }
+ }
+
+ return AllGroups;
+}
+
+// Find the recipe with minimum alignment in the group.
+template <typename InstType>
+static VPReplicateRecipe *
+findRecipeWithMinAlign(ArrayRef<VPReplicateRecipe *> Group) {
+ return *min_element(Group, [](VPReplicateRecipe *A, VPReplicateRecipe *B) {
+ return cast<InstType>(A->getUnderlyingInstr())->getAlign() <
+ cast<InstType>(B->getUnderlyingInstr())->getAlign();
+ });
+}
+
+void VPlanTransforms::hoistPredicatedLoads(VPlan &Plan, ScalarEvolution &SE,
+ const Loop *L) {
+ auto Groups =
+ collectComplementaryPredicatedMemOps<Instruction::Load>(Plan, SE, L);
+ if (Groups.empty())
+ return;
+
+ VPDominatorTree VPDT(Plan);
- if (HasComplementaryMask)
- LoadGroups.push_back(std::move(Group));
+ // Process each group of loads.
+ for (auto &Group : Groups) {
+ // Sort loads by dominance order, with earliest (most dominating) first.
+ sort(Group, [&VPDT](VPReplicateRecipe *A, VPReplicateRecipe *B) {
+ return VPDT.properlyDominates(A, B);
+ });
+
+ // Try to use the earliest (most dominating) load to replace all others.
+ VPReplicateRecipe *EarliestLoad = Group[0];
+ VPBasicBlock *FirstBB = EarliestLoad->getParent();
+ VPBasicBlock *LastBB = Group.back()->getParent();
+
+ // Check that the load doesn't alias with stores between first and last.
+ auto LoadLoc = vputils::getMemoryLocation(*EarliestLoad);
+ if (!LoadLoc || !canHoistOrSinkWithNoAliasCheck(*LoadLoc, FirstBB, LastBB,
+ /*CheckReads=*/false))
+ continue;
+
+ // Collect common metadata from all loads in the group.
+ VPIRMetadata CommonMetadata = getCommonMetadata(Group);
+
+ // Find the load with minimum alignment to use.
+ auto *LoadWithMinAlign = findRecipeWithMinAlign<LoadInst>(Group);
+
+ // Create an unpredicated version of the earliest load with common
+ // metadata.
+ auto *UnpredicatedLoad = new VPReplicateRecipe(
+ LoadWithMinAlign->getUnderlyingInstr(), {EarliestLoad->getOperand(0)},
+ /*IsSingleScalar=*/false, /*Mask=*/nullptr, *EarliestLoad,
+ CommonMetadata);
+
+ UnpredicatedLoad->insertBefore(EarliestLoad);
+
+ // Replace all loads in the group with the unpredicated load.
+ for (VPReplicateRecipe *Load : Group) {
+ Load->replaceAllUsesWith(UnpredicatedLoad);
+ Load->eraseFromParent();
}
+ }
+}
- // For each group, check memory dependencies and hoist the earliest load.
- for (auto &Group : LoadGroups) {
- // Sort loads by dominance order, with earliest (most dominating) first.
- sort(Group, [&VPDT](VPReplicateRecipe *A, VPReplicateRecipe *B) {
- return VPDT.properlyDominates(A, B);
- });
+static bool
+canSinkStoreWithNoAliasCheck(ArrayRef<VPReplicateRecipe *> StoresToSink) {
+ auto StoreLoc = vputils::getMemoryLocation(*StoresToSink.front());
+ if (!StoreLoc || !StoreLoc->AATags.Scope)
+ return false;
- VPReplicateRecipe *EarliestLoad = Group.front();
- VPBasicBlock *FirstBB = EarliestLoad->getParent();
- VPBasicBlock *LastBB = Group.back()->getParent();
+ // When sinking a group of stores, all members of the group alias each other.
+ // Skip them during the alias checks.
+ SmallPtrSet<VPRecipeBase *, 4> StoresToSinkSet(StoresToSink.begin(),
+ StoresToSink.end());
- // Check that the load doesn't alias with stores between first and last.
- if (!canHoistLoadWithNoAliasCheck(EarliestLoad, FirstBB, LastBB))
- continue;
+ VPBasicBlock *FirstBB = StoresToSink.front()->getParent();
+ VPBasicBlock *LastBB = StoresToSink.back()->getParent();
+ return canHoistOrSinkWithNoAliasCheck(*StoreLoc, FirstBB, LastBB,
+ /*CheckReads=*/true, &StoresToSinkSet);
+}
- // Find the load with minimum alignment to use.
- auto *LoadWithMinAlign =
- *min_element(Group, [](VPReplicateRecipe *A, VPReplicateRecipe *B) {
- return cast<LoadInst>(A->getUnderlyingInstr())->getAlign() <
- cast<LoadInst>(B->getUnderlyingInstr())->getAlign();
- });
+void VPlanTransforms::sinkPredicatedStores(VPlan &Plan, ScalarEvolution &SE,
+ const Loop *L) {
+ auto Groups =
+ collectComplementaryPredicatedMemOps<Instruction::Store>(Plan, SE, L);
+ if (Groups.empty())
+ return;
- // Collect common metadata from all loads in the group.
- VPIRMetadata CommonMetadata = getCommonLoadMetadata(Group);
-
- // Create an unpredicated load with minimum alignment using the earliest
- // dominating address and common metadata.
- auto *UnpredicatedLoad = new VPReplicateRecipe(
- LoadWithMinAlign->getUnderlyingInstr(), EarliestLoad->getOperand(0),
- /*IsSingleScalar=*/false, /*Mask=*/nullptr, /*Flags=*/{},
- CommonMetadata);
- UnpredicatedLoad->insertBefore(EarliestLoad);
-
- // Replace all loads in the group with the unpredicated load.
- for (VPReplicateRecipe *Load : Group) {
- Load->replaceAllUsesWith(UnpredicatedLoad);
- Load->eraseFromParent();
- }
+ VPDominatorTree VPDT(Plan);
+
+ for (auto &Group : Groups) {
+ sort(Group, [&VPDT](VPReplicateRecipe *A, VPReplicateRecipe *B) {
+ return VPDT.properlyDominates(A, B);
+ });
+
+ if (!canSinkStoreWithNoAliasCheck(Group))
+ continue;
+
+ // Use the last (most dominated) store's location for the unconditional
+ // store.
+ VPReplicateRecipe *LastStore = Group.back();
+ VPBasicBlock *InsertBB = LastStore->getParent();
+
+ // Collect common alias metadata from all stores in the group.
+ VPIRMetadata CommonMetadata = getCommonMetadata(Group);
+
+ // Build select chain for stored values.
+ VPValue *SelectedValue = Group[0]->getOperand(0);
+ VPBuilder Builder(InsertBB, LastStore->getIterator());
+
+ for (unsigned I = 1; I < Group.size(); ++I) {
+ VPValue *Mask = Group[I]->getMask();
+ VPValue *Value = Group[I]->getOperand(0);
+ SelectedValue = Builder.createSelect(Mask, Value, SelectedValue,
+ Group[I]->getDebugLoc());
}
+
+ // Find the store with minimum alignment to use.
+ auto *StoreWithMinAlign = findRecipeWithMinAlign<StoreInst>(Group);
+
+ // Create unconditional store with selected value and common metadata.
+ auto *UnpredicatedStore =
+ new VPReplicateRecipe(StoreWithMinAlign->getUnderlyingInstr(),
+ {SelectedValue, LastStore->getOperand(1)},
+ /*IsSingleScalar=*/false,
+ /*Mask=*/nullptr, *LastStore, CommonMetadata);
+ UnpredicatedStore->insertBefore(*InsertBB, LastStore->getIterator());
+
+ // Remove all predicated stores from the group.
+ for (VPReplicateRecipe *Store : Group)
+ Store->eraseFromParent();
}
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index ae3797dee1f07..afdf1655b4622 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -325,6 +325,13 @@ struct VPlanTransforms {
static void hoistPredicatedLoads(VPlan &Plan, ScalarEvolution &SE,
const Loop *L);
+ /// Sink predicated stores to the same address with complementary predicates
+ /// (P and NOT P) to an unconditional store with select recipes for the
+ /// stored values. This eliminates branching overhead when all paths
+ /// unconditionally store to the same location.
+ static void sinkPredicatedStores(VPlan &Plan, ScalarEvolution &SE,
+ const Loop *L);
+
// Materialize vector trip counts for constants early if it can simply be
// computed as (Original TC / VF * UF) * VF * UF.
static void
diff --git a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll
index 87942911e915f..d695d642cc0a3 100644
--- a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll
+++ b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll
@@ -21,13 +21,12 @@ define void @test_stores_noalias_via_rt_checks_after_loads(ptr %dst, ptr %src, p
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE11:.*]] ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META0:![0-9]+]]
; CHECK-NEXT: [[TMP7:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11)
-; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i1> [[TMP7]], splat (i1 true)
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]]
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]]
; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP10]], align 4, !alias.scope [[META3:![0-9]+]]
@@ -35,39 +34,14 @@ define void @test_stores_noalias_via_rt_checks_after_loads(ptr %dst, ptr %src, p
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> poison, i32 [[TMP9]], i32 0
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <2 x i32> [[TMP13]], i32 [[TMP16]], i32 1
; CHECK-NEXT: [[TMP19:%.*]] = sub <2 x i32> [[TMP17]], splat (i32 5)
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0
-; CHECK-NEXT: br i1 [[TMP20]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
-; CHECK: [[PRED_STORE_IF]]:
+; CHECK-NEXT: [[TMP36:%.*]] = add <2 x i32> [[TMP17]], splat (i32 10)
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]]
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i32> [[TMP19]], i32 0
-; CHECK-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4, !alias.scope [[META5:![0-9]+]], !noalias [[META7:![0-9]+]]
-; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
-; CHECK: [[PRED_STORE_CONTINUE]]:
-; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1
-; CHECK-NEXT: br i1 [[TMP23]], label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7:.*]]
-; CHECK: [[PRED_STORE_IF6]]:
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]]
-; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i32> [[TMP19]], i32 1
-; CHECK-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4, !alias.scope [[META5]], !noalias [[META7]]
-; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE7]]
-; CHECK: [[PRED_STORE_CONTINUE7]]:
-; CHECK-NEXT: [[TMP36:%.*]] = add <2 x i32> [[TMP17]], splat (i32 10)
-; CHECK-NEXT: [[TMP37:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0
-; CHECK-NEXT: br i1 [[TMP37]], label %[[PRED_STORE_IF8:.*]], label %[[PRED_STORE_CONTINUE9:.*]]
-; CHECK: [[PRED_STORE_IF8]]:
-; CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]]
-; CHECK-NEXT: [[TMP39:%.*]] = extractelement <2 x i32> [[TMP36]], i32 0
-; CHECK-NEXT: store i32 [[TMP39]], ptr [[TMP38]], align 4, !alias.scope [[META5]], !noalias [[META7]]
-; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE9]]
-; CHECK: [[PRED_STORE_CONTINUE9]]:
-; CHECK-NEXT: [[TMP40:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1
-; CHECK-NEXT: br i1 [[TMP40]], label %[[PRED_STORE_IF10:.*]], label %[[PRED_STORE_CONTINUE11]]
-; CHECK: [[PRED_STORE_IF10]]:
-; CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]]
-; CHECK-NEXT: [[TMP42:%.*]] = extractelement <2 x i32> [[TMP36]], i32 1
-; CHECK-NEXT: store i32 [[TMP42]], ptr [[TMP41]], align 4, !alias.scope [[META5]], !noalias [[META7]]
-; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE11]]
-; CHECK: [[PRED_STORE_CONTINUE11]]:
+; CHECK-NEXT: [[TMP14:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> [[TMP36]], <2 x i32> [[TMP19]]
+; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x i32> [[TMP14]], i32 0
+; CHECK-NEXT: store i32 [[TMP18]], ptr [[TMP21]], align 4, !alias.scope [[META5:![0-9]+]], !noalias [[META7:![0-9]+]]
+; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x i32> [[TMP14]], i32 1
+; CHECK-NEXT: store i32 [[TMP20]], ptr [[TMP24]], align 4, !alias.scope [[META5]], !noalias [[META7]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100
; CHECK-NEXT: br i1 [[TMP43]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
@@ -134,7 +108,7 @@ define void @test_aliasing_store(ptr %dst, ptr %src, ptr %cond) {
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE21:.*]] ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE15:.*]] ]
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]]
@@ -162,57 +136,32 @@ define void @test_aliasing_store(ptr %dst, ptr %src, ptr %cond) {
; CHECK: [[PRED_LOAD_CONTINUE11]]:
; CHECK-NEXT: [[TMP18:%.*]] = phi <2 x i32> [ [[TMP13]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], %[[PRED_LOAD_IF10]] ]
; CHECK-NEXT: [[TMP19:%.*]] = sub <2 x i32> [[TMP18]], splat (i32 5)
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0
-; CHECK-NEXT: br i1 [[TMP20]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
-; CHECK: [[PRED_STORE_IF]]:
-; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]]
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i32> [[TMP19]], i32 0
-; CHECK-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4, !alias.scope [[META19:![0-9]+]], !noalias [[META12]]
-; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
-; CHECK: [[PRED_STORE_CONTINUE]]:
-; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1
-; CHECK-NEXT: br i1 [[TMP23]], label %[[PRED_STORE_IF12:.*]], label %[[PRED_STORE_CONTINUE13:.*]]
-; CHECK: [[PRED_STORE_IF12]]:
-; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]]
-; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i32> [[TMP19]], i32 1
-; CHECK-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4, !alias.scope [[META19]], !noalias [[META12]]
-; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE13]]
-; CHECK: [[PRED_STORE_CONTINUE13]]:
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0
-; CHECK-NEXT: br i1 [[TMP26]], label %[[PRED_LOAD_IF14:.*]], label %[[PRED_LOAD_CONTINUE15:.*]]
-; CHECK: [[PRED_LOAD_IF14]]:
+; CHECK-NEXT: br i1 [[TMP26]], label %[[PRED_LOAD_IF12:.*]], label %[[PRED_LOAD_CONTINUE13:.*]]
+; CHECK: [[PRED_LOAD_IF12]]:
; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]]
; CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4, !alias.scope [[META15]], !noalias [[META17]]
; CHECK-NEXT: [[TMP29:%.*]] = insertelement <2 x i32> poison, i32 [[TMP28]], i32 0
-; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE15]]
-; CHECK: [[PRED_LOAD_CONTINUE15]]:
-; CHECK-NEXT: [[TMP30:%.*]] = phi <2 x i32> [ poison, %[[PRED_STORE_CONTINUE13]] ], [ [[TMP29]], %[[PRED_LOAD_IF14]] ]
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE13]]
+; CHECK: [[PRED_LOAD_CONTINUE13]]:
+; CHECK-NEXT: [[TMP30:%.*]] = phi <2 x i32> [ poison, %[[PRED_LOAD_CONTINUE11]] ], [ [[TMP29]], %[[PRED_LOAD_IF12]] ]
; CHECK-NEXT: [[TMP31:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1
-; CHECK-NEXT: br i1 [[TMP31]], label %[[PRED_LOAD_IF16:.*]], label %[[PRED_LOAD_CONTINUE17:.*]]
-; CHECK: [[PRED_LOAD_IF16]]:
+; CHECK-NEXT: br i1 [[TMP31]], label %[[PRED_LOAD_IF14:.*]], label %[[PRED_LOAD_CONTINUE15]]
+; CHECK: [[PRED_LOAD_IF14]]:
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]]
; CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4, !alias.scope [[META15]], !noalias [[META17]]
; CHECK-NEXT: [[TMP34:%.*]] = insertelement <2 x i32> [[TMP30]], i32 [[TMP33]], i32 1
-; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE17]]
-; CHECK: [[PRED_LOAD_CONTINUE17]]:
-; CHECK-NEXT: [[TMP35:%.*]] = phi <2 x i32> [ [[TMP30]], %[[PRED_LOAD_CONTINUE15]] ], [ [[TMP34]], %[[PRED_LOAD_IF16]] ]
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE15]]
+; CHECK: [[PRED_LOAD_CONTINUE15]]:
+; CHECK-NEXT: [[TMP35:%.*]] = phi <2 x i32> [ [[TMP30]], %[[PRED_LOAD_CONTINUE13]] ], [ [[TMP34]], %[[PRED_LOAD_IF14]] ]
; CHECK-NEXT: [[TMP36:%.*]] = add <2 x i32> [[TMP35]], splat (i32 10)
-; CHECK-NEXT: [[TMP37:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0
-; CHECK-NEXT: br i1 [[TMP37]], label %[[PRED_STORE_IF18:.*]], label %[[PRED_STORE_CONTINUE19:.*]]
-; CHECK: [[PRED_STORE_IF18]]:
-; CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]]
-; CHECK-NEXT: [[TMP39:%.*]] = extractelement <2 x i32> [[TMP36]], i32 0
-; CHECK-NEXT: store i32 [[TMP39]], ptr [[TMP38]], align 4, !alias.scope [[META19]], !noalias [[META12]]
-; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE19]]
-; CHECK: [[PRED_STORE_CONTINUE19]]:
-; CHECK-NEXT: [[TMP40:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1
-; CHECK-NEXT: br i1 [[TMP40]], label %[[PRED_STORE_IF20:.*]], label %[[PRED_STORE_CONTINUE21]]
-; CHECK: [[PRED_STORE_IF20]]:
+; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]]
; CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]]
-; CHECK-NEXT: [[TMP42:%.*]] = extractelement <2 x i32> [[TMP36]], i32 1
-; CHECK-NEXT: store i32 [[TMP42]], ptr [[TMP41]], align 4, !alias.scope [[META19]], !noalias [[META12]]
-; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE21]]
-; CHECK: [[PRED_STORE_CONTINUE21]]:
+; CHECK-NEXT: [[TMP37:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> [[TMP36]], <2 x i32> [[TMP19]]
+; CHECK-NEXT: [[TMP38:%.*]] = extractelement <2 x i32> [[TMP37]], i32 0
+; CHECK-NEXT: store i32 [[TMP38]], ptr [[TMP40]], align 4, !alias.scope [[META19:![0-9]+]], !noalias [[META12]]
+; CHECK-NEXT: [[TMP39:%.*]] = extractelement <2 x i32> [[TMP37]], i32 1
+; CHECK-NEXT: store i32 [[TMP39]], ptr [[TMP41]], align 4, !alias.scope [[META19]], !noalias [[META12]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100
; CHECK-NEXT: br i1 [[TMP43]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
@@ -289,13 +238,12 @@ define void @test_noalias_store_via_runtime_checks(ptr %dst, ptr %dst.1, ptr %sr
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE28:.*]] ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE20:.*]] ]
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META22:![0-9]+]]
-; CHECK-NEXT: [[TMP7:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11)
-; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i1> [[TMP7]], splat (i1 true)
+; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt <2 x i32> [[WIDE_LOAD]], splat (i32 11)
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0
; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
@@ -304,7 +252,7 @@ define void @test_noalias_store_via_runtime_checks(ptr %dst, ptr %dst.1, ptr %sr
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
; CHECK: [[PRED_STORE_CONTINUE]]:
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1
-; CHECK-NEXT: br i1 [[TMP15]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]]
+; CHECK-NEXT: br i1 [[TMP15]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20]]
; CHECK: [[PRED_STORE_IF19]]:
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[DST_1]], i32 [[TMP5]]
; CHECK-NEXT: store i32 10, ptr [[TMP16]], align 4, !alias.scope [[META25]], !noalias [[META27]]
@@ -317,39 +265,14 @@ define void @test_noalias_store_via_runtime_checks(ptr %dst, ptr %dst.1, ptr %sr
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> poison, i32 [[TMP11]], i32 0
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> [[TMP14]], i32 [[TMP18]], i32 1
; CHECK-NEXT: [[TMP21:%.*]] = sub <2 x i32> [[TMP19]], splat (i32 5)
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0
-; CHECK-NEXT: br i1 [[TMP22]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]]
-; CHECK: [[PRED_STORE_IF21]]:
+; CHECK-NEXT: [[TMP38:%.*]] = add <2 x i32> [[TMP19]], splat (i32 10)
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]]
-; CHECK-NEXT: [[TMP24:%.*]] = extractelement <2 x i32> [[TMP21]], i32 0
-; CHECK-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4, !alias.scope [[META31:![0-9]+]], !noalias [[META32:![0-9]+]]
-; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE22]]
-; CHECK: [[PRED_STORE_CONTINUE22]]:
-; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1
-; CHECK-NEXT: br i1 [[TMP25]], label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]]
-; CHECK: [[PRED_STORE_IF23]]:
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]]
-; CHECK-NEXT: [[TMP27:%.*]] = extractelement <2 x i32> [[TMP21]], i32 1
-; CHECK-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4, !alias.scope [[META31]], !noalias [[META32]]
-; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE24]]
-; CHECK: [[PRED_STORE_CONTINUE24]]:
-; CHECK-NEXT: [[TMP38:%.*]] = add <2 x i32> [[TMP19]], splat (i32 10)
-; CHECK-NEXT: [[TMP39:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0
-; CHECK-NEXT: br i1 [[TMP39]], label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26:.*]]
-; CHECK: [[PRED_STORE_IF25]]:
-; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]]
-; CHECK-NEXT: [[TMP41:%.*]] = extractelement <2 x i32> [[TMP38]], i32 0
-; CHECK-NEXT: store i32 [[TMP41]], ptr [[TMP40]], align 4, !alias.scope [[META31]], !noalias [[META32]]
-; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE26]]
-; CHECK: [[PRED_STORE_CONTINUE26]]:
-; CHECK-NEXT: [[TMP42:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1
-; CHECK-NEXT: br i1 [[TMP42]], label %[[PRED_STORE_IF27:.*]], label %[[PRED_STORE_CONTINUE28]]
-; CHECK: [[PRED_STORE_IF27]]:
-; CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]]
-; CHECK-NEXT: [[TMP44:%.*]] = extractelement <2 x i32> [[TMP38]], i32 1
-; CHECK-NEXT: store i32 [[TMP44]], ptr [[TMP43]], align 4, !alias.scope [[META31]], !noalias [[META32]]
-; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE28]]
-; CHECK: [[PRED_STORE_CONTINUE28]]:
+; CHECK-NEXT: [[TMP22:%.*]] = select <2 x i1> [[TMP8]], <2 x i32> [[TMP21]], <2 x i32> [[TMP38]]
+; CHECK-NEXT: [[TMP24:%.*]] = extractelement <2 x i32> [[TMP22]], i32 0
+; CHECK-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4, !alias.scope [[META31:![0-9]+]], !noalias [[META32:![0-9]+]]
+; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x i32> [[TMP22]], i32 1
+; CHECK-NEXT: store i32 [[TMP20]], ptr [[TMP26]], align 4, !alias.scope [[META31]], !noalias [[META32]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP45:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100
; CHECK-NEXT: br i1 [[TMP45]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP33:![0-9]+]]
@@ -418,7 +341,7 @@ define void @test_memory_op_between_loads_alias(ptr %dst, ptr %src, ptr %cond, p
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE17:.*]] ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE15:.*]] ]
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]]
@@ -446,40 +369,31 @@ define void @test_memory_op_between_loads_alias(ptr %dst, ptr %src, ptr %cond, p
; CHECK: [[PRED_LOAD_CONTINUE11]]:
; CHECK-NEXT: [[TMP18:%.*]] = phi <2 x i32> [ [[TMP13]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], %[[PRED_LOAD_IF10]] ]
; CHECK-NEXT: [[TMP19:%.*]] = add <2 x i32> [[TMP18]], splat (i32 10)
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0
-; CHECK-NEXT: br i1 [[TMP20]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
-; CHECK: [[PRED_STORE_IF]]:
-; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]]
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i32> [[TMP19]], i32 0
-; CHECK-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4, !alias.scope [[META42:![0-9]+]], !noalias [[META35]]
-; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
-; CHECK: [[PRED_STORE_CONTINUE]]:
-; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1
-; CHECK-NEXT: br i1 [[TMP23]], label %[[PRED_STORE_IF12:.*]], label %[[PRED_STORE_CONTINUE13:.*]]
-; CHECK: [[PRED_STORE_IF12]]:
-; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]]
-; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i32> [[TMP19]], i32 1
-; CHECK-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4, !alias.scope [[META42]], !noalias [[META35]]
-; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE13]]
-; CHECK: [[PRED_STORE_CONTINUE13]]:
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0
-; CHECK-NEXT: br i1 [[TMP26]], label %[[PRED_STORE_IF14:.*]], label %[[PRED_STORE_CONTINUE15:.*]]
-; CHECK: [[PRED_STORE_IF14]]:
+; CHECK-NEXT: br i1 [[TMP26]], label %[[PRED_LOAD_IF12:.*]], label %[[PRED_LOAD_CONTINUE13:.*]]
+; CHECK: [[PRED_LOAD_IF12]]:
; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]]
-; CHECK-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP27]], align 4, !alias.scope [[META38]], !noalias [[META40]]
-; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]]
-; CHECK-NEXT: store i32 [[TMP32]], ptr [[TMP29]], align 4, !alias.scope [[META42]], !noalias [[META35]]
-; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE15]]
-; CHECK: [[PRED_STORE_CONTINUE15]]:
+; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP27]], align 4, !alias.scope [[META38]], !noalias [[META40]]
+; CHECK-NEXT: [[TMP23:%.*]] = insertelement <2 x i32> poison, i32 [[TMP20]], i32 0
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE13]]
+; CHECK: [[PRED_LOAD_CONTINUE13]]:
+; CHECK-NEXT: [[TMP22:%.*]] = phi <2 x i32> [ poison, %[[PRED_LOAD_CONTINUE11]] ], [ [[TMP23]], %[[PRED_LOAD_IF12]] ]
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1
-; CHECK-NEXT: br i1 [[TMP30]], label %[[PRED_STORE_IF16:.*]], label %[[PRED_STORE_CONTINUE17]]
-; CHECK: [[PRED_STORE_IF16]]:
+; CHECK-NEXT: br i1 [[TMP30]], label %[[PRED_LOAD_IF14:.*]], label %[[PRED_LOAD_CONTINUE15]]
+; CHECK: [[PRED_LOAD_IF14]]:
; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]]
-; CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP31]], align 4, !alias.scope [[META38]], !noalias [[META40]]
-; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]]
-; CHECK-NEXT: store i32 [[TMP28]], ptr [[TMP33]], align 4, !alias.scope [[META42]], !noalias [[META35]]
-; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE17]]
-; CHECK: [[PRED_STORE_CONTINUE17]]:
+; CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP31]], align 4, !alias.scope [[META38]], !noalias [[META40]]
+; CHECK-NEXT: [[TMP32:%.*]] = insertelement <2 x i32> [[TMP22]], i32 [[TMP25]], i32 1
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE15]]
+; CHECK: [[PRED_LOAD_CONTINUE15]]:
+; CHECK-NEXT: [[TMP33:%.*]] = phi <2 x i32> [ [[TMP22]], %[[PRED_LOAD_CONTINUE13]] ], [ [[TMP32]], %[[PRED_LOAD_IF14]] ]
+; CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]]
+; CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]]
+; CHECK-NEXT: [[TMP28:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> [[TMP33]], <2 x i32> [[TMP19]]
+; CHECK-NEXT: [[TMP29:%.*]] = extractelement <2 x i32> [[TMP28]], i32 0
+; CHECK-NEXT: store i32 [[TMP29]], ptr [[TMP36]], align 4, !alias.scope [[META42:![0-9]+]], !noalias [[META35]]
+; CHECK-NEXT: [[TMP35:%.*]] = extractelement <2 x i32> [[TMP28]], i32 1
+; CHECK-NEXT: store i32 [[TMP35]], ptr [[TMP37]], align 4, !alias.scope [[META42]], !noalias [[META35]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP34:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100
; CHECK-NEXT: br i1 [[TMP34]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]]
@@ -559,13 +473,12 @@ define void @test_memory_op_between_loads_no_alias_via_rt_checks(ptr %dst, ptr %
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE28:.*]] ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE20:.*]] ]
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META45:![0-9]+]]
-; CHECK-NEXT: [[TMP7:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11)
-; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i1> [[TMP7]], splat (i1 true)
+; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt <2 x i32> [[WIDE_LOAD]], splat (i32 11)
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0
; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
@@ -574,7 +487,7 @@ define void @test_memory_op_between_loads_no_alias_via_rt_checks(ptr %dst, ptr %
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
; CHECK: [[PRED_STORE_CONTINUE]]:
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1
-; CHECK-NEXT: br i1 [[TMP15]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]]
+; CHECK-NEXT: br i1 [[TMP15]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20]]
; CHECK: [[PRED_STORE_IF19]]:
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[DST_1]], i32 [[TMP5]]
; CHECK-NEXT: store i32 0, ptr [[TMP16]], align 4, !alias.scope [[META48]], !noalias [[META50]]
@@ -587,36 +500,13 @@ define void @test_memory_op_between_loads_no_alias_via_rt_checks(ptr %dst, ptr %
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> poison, i32 [[TMP11]], i32 0
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> [[TMP14]], i32 [[TMP18]], i32 1
; CHECK-NEXT: [[TMP21:%.*]] = add <2 x i32> [[TMP19]], splat (i32 10)
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0
-; CHECK-NEXT: br i1 [[TMP22]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]]
-; CHECK: [[PRED_STORE_IF21]]:
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]]
-; CHECK-NEXT: [[TMP24:%.*]] = extractelement <2 x i32> [[TMP21]], i32 0
-; CHECK-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4, !alias.scope [[META54:![0-9]+]], !noalias [[META55:![0-9]+]]
-; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE22]]
-; CHECK: [[PRED_STORE_CONTINUE22]]:
-; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1
-; CHECK-NEXT: br i1 [[TMP25]], label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]]
-; CHECK: [[PRED_STORE_IF23]]:
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]]
-; CHECK-NEXT: [[TMP27:%.*]] = extractelement <2 x i32> [[TMP21]], i32 1
-; CHECK-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4, !alias.scope [[META54]], !noalias [[META55]]
-; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE24]]
-; CHECK: [[PRED_STORE_CONTINUE24]]:
-; CHECK-NEXT: [[TMP28:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0
-; CHECK-NEXT: br i1 [[TMP28]], label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26:.*]]
-; CHECK: [[PRED_STORE_IF25]]:
-; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]]
-; CHECK-NEXT: store i32 [[TMP11]], ptr [[TMP31]], align 4, !alias.scope [[META54]], !noalias [[META55]]
-; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE26]]
-; CHECK: [[PRED_STORE_CONTINUE26]]:
-; CHECK-NEXT: [[TMP32:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1
-; CHECK-NEXT: br i1 [[TMP32]], label %[[PRED_STORE_IF27:.*]], label %[[PRED_STORE_CONTINUE28]]
-; CHECK: [[PRED_STORE_IF27]]:
-; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]]
-; CHECK-NEXT: store i32 [[TMP18]], ptr [[TMP35]], align 4, !alias.scope [[META54]], !noalias [[META55]]
-; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE28]]
-; CHECK: [[PRED_STORE_CONTINUE28]]:
+; CHECK-NEXT: [[TMP20:%.*]] = select <2 x i1> [[TMP8]], <2 x i32> [[TMP21]], <2 x i32> [[TMP19]]
+; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i32> [[TMP20]], i32 0
+; CHECK-NEXT: store i32 [[TMP22]], ptr [[TMP23]], align 4, !alias.scope [[META54:![0-9]+]], !noalias [[META55:![0-9]+]]
+; CHECK-NEXT: [[TMP24:%.*]] = extractelement <2 x i32> [[TMP20]], i32 1
+; CHECK-NEXT: store i32 [[TMP24]], ptr [[TMP26]], align 4, !alias.scope [[META54]], !noalias [[META55]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100
; CHECK-NEXT: br i1 [[TMP36]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP56:![0-9]+]]
@@ -685,45 +575,37 @@ define void @test_stores_not_sunk_due_to_aliasing_load(ptr %dst, ptr %alias, ptr
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE11:.*]] ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE7:.*]] ]
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META58:![0-9]+]]
-; CHECK-NEXT: [[TMP10:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11)
-; CHECK-NEXT: [[TMP7:%.*]] = xor <2 x i1> [[TMP10]], splat (i1 true)
+; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt <2 x i32> [[WIDE_LOAD]], splat (i32 11)
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0
-; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
-; CHECK: [[PRED_STORE_IF]]:
+; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
+; CHECK: [[PRED_LOAD_IF]]:
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[ALIAS]], i32 [[TMP4]]
-; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP9]], align 4, !alias.scope [[META61:![0-9]+]]
-; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]]
-; CHECK-NEXT: store i32 [[TMP15]], ptr [[TMP12]], align 4, !alias.scope [[META63:![0-9]+]], !noalias [[META65:![0-9]+]]
-; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
-; CHECK: [[PRED_STORE_CONTINUE]]:
+; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !alias.scope [[META61:![0-9]+]]
+; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x i32> poison, i32 [[TMP10]], i32 0
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
+; CHECK: [[PRED_LOAD_CONTINUE]]:
+; CHECK-NEXT: [[TMP20:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP15]], %[[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1
-; CHECK-NEXT: br i1 [[TMP13]], label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7:.*]]
-; CHECK: [[PRED_STORE_IF6]]:
+; CHECK-NEXT: br i1 [[TMP13]], label %[[PRED_LOAD_IF6:.*]], label %[[PRED_LOAD_CONTINUE7]]
+; CHECK: [[PRED_LOAD_IF6]]:
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[ALIAS]], i32 [[TMP5]]
; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP14]], align 4, !alias.scope [[META61]]
-; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]]
-; CHECK-NEXT: store i32 [[TMP11]], ptr [[TMP16]], align 4, !alias.scope [[META63]], !noalias [[META65]]
-; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE7]]
-; CHECK: [[PRED_STORE_CONTINUE7]]:
-; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0
-; CHECK-NEXT: br i1 [[TMP17]], label %[[PRED_STORE_IF8:.*]], label %[[PRED_STORE_CONTINUE9:.*]]
-; CHECK: [[PRED_STORE_IF8]]:
+; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> [[TMP20]], i32 [[TMP11]], i32 1
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE7]]
+; CHECK: [[PRED_LOAD_CONTINUE7]]:
+; CHECK-NEXT: [[TMP22:%.*]] = phi <2 x i32> [ [[TMP20]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], %[[PRED_LOAD_IF6]] ]
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]]
-; CHECK-NEXT: store i32 10, ptr [[TMP18]], align 4, !alias.scope [[META63]], !noalias [[META65]]
-; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE9]]
-; CHECK: [[PRED_STORE_CONTINUE9]]:
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x i1> [[TMP10]], i32 1
-; CHECK-NEXT: br i1 [[TMP20]], label %[[PRED_STORE_IF10:.*]], label %[[PRED_STORE_CONTINUE11]]
-; CHECK: [[PRED_STORE_IF10]]:
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]]
-; CHECK-NEXT: store i32 10, ptr [[TMP19]], align 4, !alias.scope [[META63]], !noalias [[META65]]
-; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE11]]
-; CHECK: [[PRED_STORE_CONTINUE11]]:
+; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> [[TMP22]], <2 x i32> splat (i32 10)
+; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x i32> [[TMP16]], i32 0
+; CHECK-NEXT: store i32 [[TMP17]], ptr [[TMP18]], align 4, !alias.scope [[META63:![0-9]+]], !noalias [[META65:![0-9]+]]
+; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i32> [[TMP16]], i32 1
+; CHECK-NEXT: store i32 [[TMP23]], ptr [[TMP19]], align 4, !alias.scope [[META63]], !noalias [[META65]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100
; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP66:![0-9]+]]
>From c61fb460421b656dd71d9c2350a96f3b72e0dfbe Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Mon, 1 Dec 2025 15:16:35 +0000
Subject: [PATCH 2/2] !fixup add extra test, remove stray whitespace, thanks
---
.../Transforms/Vectorize/VPlanTransforms.cpp | 2 +-
...predicated-loads-with-predicated-stores.ll | 121 ++++++++++++++++++
2 files changed, 122 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 7bc4eb97b94e3..71548a0991a01 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -4295,7 +4295,7 @@ canSinkStoreWithNoAliasCheck(ArrayRef<VPReplicateRecipe *> StoresToSink) {
return false;
// When sinking a group of stores, all members of the group alias each other.
- // Skip them during the alias checks.
+ // Skip them during the alias checks.
SmallPtrSet<VPRecipeBase *, 4> StoresToSinkSet(StoresToSink.begin(),
StoresToSink.end());
diff --git a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll
index d695d642cc0a3..ae772da8862b3 100644
--- a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll
+++ b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll
@@ -966,3 +966,124 @@ loop.latch:
exit:
ret void
}
+
+; Test with 3 predicated stores to the same address, but with different
+; (non-complementary) predicates.
+define void @test_three_stores_with_different_predicates(ptr %dst, ptr %src, ptr %cond) {
+; CHECK-LABEL: define void @test_three_stores_with_different_predicates(
+; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], ptr [[COND:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br label %[[VECTOR_MEMCHECK:.*]]
+; CHECK: [[VECTOR_MEMCHECK]]:
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 400
+; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[COND]], i64 400
+; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP1]]
+; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[COND]], [[SCEVGEP]]
+; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE11:.*]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
+; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP0]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4, !alias.scope [[META85:![0-9]+]]
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11)
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0
+; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; CHECK: [[PRED_STORE_IF]]:
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP0]]
+; CHECK-NEXT: store i32 1, ptr [[TMP5]], align 4, !alias.scope [[META88:![0-9]+]], !noalias [[META85]]
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
+; CHECK: [[PRED_STORE_CONTINUE]]:
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1
+; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_STORE_IF2:.*]], label %[[PRED_STORE_CONTINUE3:.*]]
+; CHECK: [[PRED_STORE_IF2]]:
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP1]]
+; CHECK-NEXT: store i32 1, ptr [[TMP7]], align 4, !alias.scope [[META88]], !noalias [[META85]]
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE3]]
+; CHECK: [[PRED_STORE_CONTINUE3]]:
+; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true)
+; CHECK-NEXT: [[TMP9:%.*]] = or <2 x i1> [[TMP3]], [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 10)
+; CHECK-NEXT: [[TMP11:%.*]] = select <2 x i1> [[TMP9]], <2 x i1> [[TMP10]], <2 x i1> zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP11]], i32 0
+; CHECK-NEXT: br i1 [[TMP12]], label %[[PRED_STORE_IF4:.*]], label %[[PRED_STORE_CONTINUE5:.*]]
+; CHECK: [[PRED_STORE_IF4]]:
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP0]]
+; CHECK-NEXT: store i32 2, ptr [[TMP13]], align 4, !alias.scope [[META88]], !noalias [[META85]]
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE5]]
+; CHECK: [[PRED_STORE_CONTINUE5]]:
+; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP11]], i32 1
+; CHECK-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7:.*]]
+; CHECK: [[PRED_STORE_IF6]]:
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP1]]
+; CHECK-NEXT: store i32 2, ptr [[TMP15]], align 4, !alias.scope [[META88]], !noalias [[META85]]
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE7]]
+; CHECK: [[PRED_STORE_CONTINUE7]]:
+; CHECK-NEXT: [[TMP16:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 9)
+; CHECK-NEXT: [[TMP17:%.*]] = select <2 x i1> [[TMP9]], <2 x i1> [[TMP16]], <2 x i1> zeroinitializer
+; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x i1> [[TMP17]], i32 0
+; CHECK-NEXT: br i1 [[TMP18]], label %[[PRED_STORE_IF8:.*]], label %[[PRED_STORE_CONTINUE9:.*]]
+; CHECK: [[PRED_STORE_IF8]]:
+; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP0]]
+; CHECK-NEXT: store i32 3, ptr [[TMP19]], align 4, !alias.scope [[META88]], !noalias [[META85]]
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE9]]
+; CHECK: [[PRED_STORE_CONTINUE9]]:
+; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x i1> [[TMP17]], i32 1
+; CHECK-NEXT: br i1 [[TMP20]], label %[[PRED_STORE_IF10:.*]], label %[[PRED_STORE_CONTINUE11]]
+; CHECK: [[PRED_STORE_IF10]]:
+; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP1]]
+; CHECK-NEXT: store i32 3, ptr [[TMP21]], align 4, !alias.scope [[META88]], !noalias [[META85]]
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE11]]
+; CHECK: [[PRED_STORE_CONTINUE11]]:
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
+; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100
+; CHECK-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP90:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br [[EXIT:label %.*]]
+; CHECK: [[SCALAR_PH]]:
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ %gep.cond = getelementptr inbounds i32, ptr %cond, i32 %iv
+ %c = load i32, ptr %gep.cond, align 4
+ %c.0 = icmp ule i32 %c, 11
+ br i1 %c.0, label %then.0, label %continue.0
+
+then.0:
+ %gep.dst.then.0 = getelementptr inbounds i32, ptr %dst, i32 %iv
+ store i32 1, ptr %gep.dst.then.0, align 4
+ br label %continue.0
+
+continue.0:
+ %c.1 = icmp ule i32 %c, 10
+ br i1 %c.1, label %then.1, label %continue.1
+
+then.1:
+ %gep.dst.then.1 = getelementptr inbounds i32, ptr %dst, i32 %iv
+ store i32 2, ptr %gep.dst.then.1, align 4
+ br label %continue.1
+
+continue.1:
+ %c.2 = icmp ule i32 %c, 9
+ br i1 %c.2, label %then.2, label %loop.latch
+
+then.2:
+ %gep.dst.then.2 = getelementptr inbounds i32, ptr %dst, i32 %iv
+ store i32 3, ptr %gep.dst.then.2, align 4
+ br label %loop.latch
+
+loop.latch:
+ %iv.next = add nuw nsw i32 %iv, 1
+ %ec = icmp eq i32 %iv.next, 100
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret void
+}
+
More information about the llvm-commits
mailing list