[llvm] [VPlan] Hoist loads with invariant addresses using noalias metadata. (PR #166247)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 4 07:34:56 PST 2025
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/166247
>From 2cbe52b039d533b81a0ba270f53c4b1464f15c30 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Mon, 3 Nov 2025 22:28:22 +0000
Subject: [PATCH] [VPlan] Hoist loads with invariant addresses using scoped
noalias metadata.
This patch implements a transform to hoists single-scalar replicated
loads with invariant addresses out of the vector loop to the preheader when
scoped noalias metadata proves they cannot alias with any stores in the loop.
This enables hosting of loads we can prove do not alias any stores in
the loop due to memory runtime checks added during vectorization.
---
llvm/include/llvm/Analysis/ScopedNoAliasAA.h | 8 +-
llvm/lib/Analysis/ScopedNoAliasAA.cpp | 4 +-
llvm/lib/Transforms/Vectorize/VPlan.h | 8 ++
.../Transforms/Vectorize/VPlanTransforms.cpp | 54 +++++++++++++
.../Transforms/Vectorize/VPlanTransforms.h | 5 ++
llvm/lib/Transforms/Vectorize/VPlanUtils.cpp | 18 +++++
llvm/lib/Transforms/Vectorize/VPlanUtils.h | 5 ++
.../AArch64/conditional-branches-cost.ll | 16 ++--
.../LoopVectorize/AArch64/store-costs-sve.ll | 16 ++--
.../vf-will-not-generate-any-vector-insts.ll | 6 +-
.../LoopVectorize/X86/cost-model.ll | 75 ++-----------------
.../LoopVectorize/X86/uniform_mem_op.ll | 2 +-
...nd-sink-mem-ops-with-invariant-pointers.ll | 6 +-
.../interleaved-accesses-metadata.ll | 6 +-
.../multiple-strides-vectorization.ll | 16 ++--
.../pointer-select-runtime-checks.ll | 6 +-
llvm/test/Transforms/LoopVectorize/pr50686.ll | 18 ++---
.../LoopVectorize/reduction-align.ll | 6 +-
.../LoopVectorize/runtime-checks-hoist.ll | 10 +--
.../LoopVectorize/single-scalar-cast-minbw.ll | 6 +-
...ive-path-inner-loop-with-runtime-checks.ll | 2 +-
.../AArch64/hoist-load-from-vector-loop.ll | 66 ++++++++++++++--
22 files changed, 220 insertions(+), 139 deletions(-)
diff --git a/llvm/include/llvm/Analysis/ScopedNoAliasAA.h b/llvm/include/llvm/Analysis/ScopedNoAliasAA.h
index 942cc6f2a4b2b..dbe1afa50ee3a 100644
--- a/llvm/include/llvm/Analysis/ScopedNoAliasAA.h
+++ b/llvm/include/llvm/Analysis/ScopedNoAliasAA.h
@@ -46,12 +46,12 @@ class ScopedNoAliasAAResult : public AAResultBase {
LLVM_ABI ModRefInfo getModRefInfo(const CallBase *Call1,
const CallBase *Call2, AAQueryInfo &AAQI);
- LLVM_ABI void
+ LLVM_ABI static void
collectScopedDomains(const MDNode *NoAlias,
- SmallPtrSetImpl<const MDNode *> &Domains) const;
+ SmallPtrSetImpl<const MDNode *> &Domains);
-private:
- bool mayAliasInScopes(const MDNode *Scopes, const MDNode *NoAlias) const;
+ LLVM_ABI static bool mayAliasInScopes(const MDNode *Scopes,
+ const MDNode *NoAlias);
};
/// Analysis pass providing a never-invalidated alias analysis result.
diff --git a/llvm/lib/Analysis/ScopedNoAliasAA.cpp b/llvm/lib/Analysis/ScopedNoAliasAA.cpp
index 4d6c0cc71f898..d24ad0255256c 100644
--- a/llvm/lib/Analysis/ScopedNoAliasAA.cpp
+++ b/llvm/lib/Analysis/ScopedNoAliasAA.cpp
@@ -116,7 +116,7 @@ static void collectMDInDomain(const MDNode *List, const MDNode *Domain,
/// Collect the set of scoped domains relevant to the noalias scopes.
void ScopedNoAliasAAResult::collectScopedDomains(
- const MDNode *NoAlias, SmallPtrSetImpl<const MDNode *> &Domains) const {
+ const MDNode *NoAlias, SmallPtrSetImpl<const MDNode *> &Domains) {
if (!NoAlias)
return;
assert(Domains.empty() && "Domains should be empty");
@@ -127,7 +127,7 @@ void ScopedNoAliasAAResult::collectScopedDomains(
}
bool ScopedNoAliasAAResult::mayAliasInScopes(const MDNode *Scopes,
- const MDNode *NoAlias) const {
+ const MDNode *NoAlias) {
if (!Scopes || !NoAlias)
return true;
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index cfe1f1e9d7528..9dd8b227dae31 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -32,6 +32,7 @@
#include "llvm/ADT/ilist.h"
#include "llvm/ADT/ilist_node.h"
#include "llvm/Analysis/IVDescriptors.h"
+#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/FMF.h"
@@ -965,6 +966,13 @@ class VPIRMetadata {
/// Intersect this VPIRMetada object with \p MD, keeping only metadata
/// nodes that are common to both.
void intersect(const VPIRMetadata &MD);
+
+ /// Get metadata of kind \p Kind. Returns nullptr if not found.
+ MDNode *getMetadata(unsigned Kind) const {
+ auto It = llvm::find_if(Metadata,
+ [Kind](const auto &P) { return P.first == Kind; });
+ return It != Metadata.end() ? It->second : nullptr;
+ }
};
/// This is a concrete Recipe that models a single VPlan-level instruction.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 2588c878d8472..7b0e46837e477 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -24,15 +24,20 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/TypeSwitch.h"
#include "llvm/Analysis/IVDescriptors.h"
#include "llvm/Analysis/InstSimplifyFolder.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/ScalarEvolutionPatternMatch.h"
+#include "llvm/Analysis/ScopedNoAliasAA.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/TypeSize.h"
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
@@ -2327,6 +2332,7 @@ void VPlanTransforms::optimize(VPlan &Plan) {
runPass(removeDeadRecipes, Plan);
runPass(createAndOptimizeReplicateRegions, Plan);
+ runPass(hoistInvariantLoads, Plan);
runPass(mergeBlocksIntoPredecessors, Plan);
runPass(licm, Plan);
}
@@ -3840,6 +3846,54 @@ void VPlanTransforms::materializeBroadcasts(VPlan &Plan) {
}
}
+void VPlanTransforms::hoistInvariantLoads(VPlan &Plan) {
+ VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
+
+ // Collect candidate loads with invariant addresses and noalias scopes
+ // metadata and memory-writing recipes with noalias metadata.
+ SmallVector<std::pair<VPRecipeBase *, MemoryLocation>> CandidateLoads;
+ SmallVector<MemoryLocation> Stores;
+ for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
+ vp_depth_first_shallow(LoopRegion->getEntry()))) {
+ for (VPRecipeBase &R : *VPBB) {
+ // Only handle single-scalar replicated loads with invariant addresses.
+ if (auto *RepR = dyn_cast<VPReplicateRecipe>(&R)) {
+ if (RepR->isPredicated() || !RepR->isSingleScalar() ||
+ RepR->getOpcode() != Instruction::Load)
+ continue;
+
+ VPValue *Addr = RepR->getOperand(0);
+ if (Addr->isDefinedOutsideLoopRegions()) {
+ MemoryLocation Loc = *vputils::getMemoryLocation(*RepR);
+ if (!Loc.AATags.Scope)
+ continue;
+ CandidateLoads.push_back({RepR, Loc});
+ }
+ }
+ if (R.mayWriteToMemory()) {
+ auto Loc = vputils::getMemoryLocation(R);
+ if (!Loc || !Loc->AATags.Scope || !Loc->AATags.NoAlias)
+ return;
+ Stores.push_back(*Loc);
+ }
+ }
+ }
+
+ VPBasicBlock *Preheader = Plan.getVectorPreheader();
+ for (auto &[LoadRecipe, LoadLoc] : CandidateLoads) {
+ // Hoist the load to the preheader if it doesn't alias with any stores
+ // according to the noalias metadata. Other loads should have been hoisted
+ // by other passes
+ const AAMDNodes &LoadAA = LoadLoc.AATags;
+ if (all_of(Stores, [&](const MemoryLocation &StoreLoc) {
+ return !ScopedNoAliasAAResult::mayAliasInScopes(
+ LoadAA.Scope, StoreLoc.AATags.NoAlias);
+ })) {
+ LoadRecipe->moveBefore(*Preheader, Preheader->getFirstNonPhi());
+ }
+ }
+}
+
void VPlanTransforms::materializeConstantVectorTripCount(
VPlan &Plan, ElementCount BestVF, unsigned BestUF,
PredicatedScalarEvolution &PSE) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index b28559b620e13..fae615aa93c71 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -307,6 +307,11 @@ struct VPlanTransforms {
/// Add explicit broadcasts for live-ins and VPValues defined in \p Plan's entry block if they are used as vectors.
static void materializeBroadcasts(VPlan &Plan);
+ /// Hoist single-scalar loads with invariant addresses out of the vector loop
+ /// to the preheader, if they are proven not to alias with any stores in the
+ /// plan using noalias metadata.
+ static void hoistInvariantLoads(VPlan &Plan);
+
// Materialize vector trip counts for constants early if it can simply be
// computed as (Original TC / VF * UF) * VF * UF.
static void
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
index c6380d30ab2e2..c13392bcb7e65 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
@@ -11,6 +11,7 @@
#include "VPlanDominatorTree.h"
#include "VPlanPatternMatch.h"
#include "llvm/ADT/TypeSwitch.h"
+#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
using namespace llvm;
@@ -376,3 +377,20 @@ bool VPBlockUtils::isLatch(const VPBlockBase *VPB,
return VPB->getNumSuccessors() == 2 &&
VPBlockUtils::isHeader(VPB->getSuccessors()[1], VPDT);
}
+
+std::optional<MemoryLocation>
+vputils::getMemoryLocation(const VPRecipeBase &R) {
+ return TypeSwitch<const VPRecipeBase *, std::optional<MemoryLocation>>(&R)
+ .Case<VPWidenStoreRecipe, VPInterleaveBase, VPReplicateRecipe>(
+ [](auto *S) {
+ MemoryLocation Loc;
+ // Populate noalias metadata from VPIRMetadata.
+ if (MDNode *NoAliasMD = S->getMetadata(LLVMContext::MD_noalias))
+ Loc.AATags.NoAlias = NoAliasMD;
+ if (MDNode *AliasScopeMD =
+ S->getMetadata(LLVMContext::MD_alias_scope))
+ Loc.AATags.Scope = AliasScopeMD;
+ return Loc;
+ })
+ .Default([](auto *) { return std::nullopt; });
+}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
index c21a0e70c1392..d8f021be8fb49 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
@@ -12,6 +12,7 @@
#include "VPlan.h"
namespace llvm {
+class MemoryLocation;
class ScalarEvolution;
class SCEV;
} // namespace llvm
@@ -71,6 +72,10 @@ std::optional<VPValue *>
getRecipesForUncountableExit(VPlan &Plan,
SmallVectorImpl<VPRecipeBase *> &Recipes,
SmallVectorImpl<VPRecipeBase *> &GEPs);
+
+/// Return a MemoryLocation for \p R with noalias metadata populated from
+/// \p R. The pointer of the location is conservatively set to nullptr.
+std::optional<MemoryLocation> getMemoryLocation(const VPRecipeBase &R);
} // namespace vputils
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
index f16351720b20f..3d33979db6f30 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
@@ -386,7 +386,7 @@ define i32 @header_mask_and_invariant_compare(ptr %A, ptr %B, ptr %C, ptr %D, pt
; DEFAULT-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], ptr [[D:%.*]], ptr [[E:%.*]], i64 [[N:%.*]]) #[[ATTR1:[0-9]+]] {
; DEFAULT-NEXT: [[ENTRY:.*:]]
; DEFAULT-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
-; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 60
+; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 28
; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; DEFAULT: [[VECTOR_MEMCHECK]]:
; DEFAULT-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[E]], i64 4
@@ -427,20 +427,20 @@ define i32 @header_mask_and_invariant_compare(ptr %A, ptr %B, ptr %C, ptr %D, pt
; DEFAULT: [[VECTOR_PH]]:
; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4
; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
-; DEFAULT-NEXT: br label %[[VECTOR_BODY:.*]]
-; DEFAULT: [[VECTOR_BODY]]:
-; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE37:.*]] ]
-; DEFAULT-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4, !alias.scope [[META8:![0-9]+]]
+; DEFAULT-NEXT: [[TMP9:%.*]] = load i32, ptr [[C]], align 4, !alias.scope [[META8:![0-9]+]]
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT28:%.*]] = insertelement <4 x i32> poison, i32 [[TMP9]], i64 0
; DEFAULT-NEXT: [[BROADCAST_SPLAT29:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT28]], <4 x i32> poison, <4 x i32> zeroinitializer
; DEFAULT-NEXT: [[TMP19:%.*]] = load i32, ptr [[B]], align 4, !alias.scope [[META11:![0-9]+]]
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP19]], i64 0
; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
-; DEFAULT-NEXT: [[TMP6:%.*]] = or <4 x i32> [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT29]]
-; DEFAULT-NEXT: [[TMP7:%.*]] = load i32, ptr [[C]], align 4, !alias.scope [[META13:![0-9]+]]
+; DEFAULT-NEXT: [[TMP7:%.*]] = load i32, ptr [[A]], align 4, !alias.scope [[META13:![0-9]+]]
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT30:%.*]] = insertelement <4 x i32> poison, i32 [[TMP7]], i64 0
; DEFAULT-NEXT: [[BROADCAST_SPLAT31:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT30]], <4 x i32> poison, <4 x i32> zeroinitializer
-; DEFAULT-NEXT: [[TMP8:%.*]] = icmp ugt <4 x i32> [[BROADCAST_SPLAT31]], [[TMP6]]
+; DEFAULT-NEXT: [[TMP6:%.*]] = or <4 x i32> [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT31]]
+; DEFAULT-NEXT: [[TMP8:%.*]] = icmp ugt <4 x i32> [[BROADCAST_SPLAT29]], [[TMP6]]
+; DEFAULT-NEXT: br label %[[VECTOR_BODY:.*]]
+; DEFAULT: [[VECTOR_BODY]]:
+; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE37:.*]] ]
; DEFAULT-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[D]], i64 [[INDEX]]
; DEFAULT-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP8]], i32 0
; DEFAULT-NEXT: br i1 [[TMP20]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll
index 0d8a1021bd438..50807df51c99e 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll
@@ -132,15 +132,15 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 {
; DEFAULT: vector.ph:
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[X]], i64 0
; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i16> [[BROADCAST_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
-; DEFAULT-NEXT: [[TMP0:%.*]] = trunc <16 x i16> [[BROADCAST_SPLAT]] to <16 x i8>
-; DEFAULT-NEXT: br label [[VECTOR_BODY:%.*]]
-; DEFAULT: vector.body:
-; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; DEFAULT-NEXT: [[TMP1:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META6:![0-9]+]]
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <16 x i64> poison, i64 [[TMP1]], i64 0
; DEFAULT-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT2]], <16 x i64> poison, <16 x i32> zeroinitializer
; DEFAULT-NEXT: [[TMP2:%.*]] = trunc <16 x i64> [[BROADCAST_SPLAT3]] to <16 x i8>
+; DEFAULT-NEXT: [[TMP0:%.*]] = trunc <16 x i16> [[BROADCAST_SPLAT]] to <16 x i8>
; DEFAULT-NEXT: [[TMP3:%.*]] = and <16 x i8> [[TMP2]], [[TMP0]]
+; DEFAULT-NEXT: br label [[VECTOR_BODY:%.*]]
+; DEFAULT: vector.body:
+; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; DEFAULT-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]]
; DEFAULT-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i32 16
; DEFAULT-NEXT: store <16 x i8> [[TMP3]], ptr [[TMP4]], align 1, !alias.scope [[META9:![0-9]+]], !noalias [[META6]]
@@ -156,15 +156,15 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 {
; DEFAULT-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 992, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <8 x i16> poison, i16 [[X]], i64 0
; DEFAULT-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT4]], <8 x i16> poison, <8 x i32> zeroinitializer
-; DEFAULT-NEXT: [[TMP7:%.*]] = trunc <8 x i16> [[BROADCAST_SPLAT5]] to <8 x i8>
-; DEFAULT-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
-; DEFAULT: vec.epilog.vector.body:
-; DEFAULT-NEXT: [[INDEX6:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
; DEFAULT-NEXT: [[TMP8:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META6]]
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <8 x i64> poison, i64 [[TMP8]], i64 0
; DEFAULT-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT7]], <8 x i64> poison, <8 x i32> zeroinitializer
; DEFAULT-NEXT: [[TMP9:%.*]] = trunc <8 x i64> [[BROADCAST_SPLAT8]] to <8 x i8>
+; DEFAULT-NEXT: [[TMP7:%.*]] = trunc <8 x i16> [[BROADCAST_SPLAT5]] to <8 x i8>
; DEFAULT-NEXT: [[TMP10:%.*]] = and <8 x i8> [[TMP9]], [[TMP7]]
+; DEFAULT-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
+; DEFAULT: vec.epilog.vector.body:
+; DEFAULT-NEXT: [[INDEX6:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
; DEFAULT-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX6]]
; DEFAULT-NEXT: store <8 x i8> [[TMP10]], ptr [[TMP11]], align 1, !alias.scope [[META9]], !noalias [[META6]]
; DEFAULT-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 8
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vf-will-not-generate-any-vector-insts.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vf-will-not-generate-any-vector-insts.ll
index ed797fcd6c026..dca4f47738309 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vf-will-not-generate-any-vector-insts.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vf-will-not-generate-any-vector-insts.ll
@@ -17,15 +17,15 @@ define void @vf_will_not_generate_any_vector_insts(ptr %src, ptr %dst) {
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC]], align 4, !alias.scope [[META0:![0-9]+]]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP0]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT2]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[DST]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 100, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
-; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[SRC]], align 4, !alias.scope [[META0:![0-9]+]]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP6]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT2]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: call void @llvm.vp.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[BROADCAST_SPLAT3]], <vscale x 4 x ptr> align 4 [[BROADCAST_SPLAT]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]]), !alias.scope [[META3:![0-9]+]], !noalias [[META0]]
; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP5]] to i64
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP7]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
index 725fa49c0930c..9c7ecc25745fe 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
@@ -329,83 +329,24 @@ for.end:
define void @multi_exit(ptr %dst, ptr %src.1, ptr %src.2, i64 %A, i64 %B) #0 {
; CHECK-LABEL: @multi_exit(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[UMAX6:%.*]] = call i64 @llvm.umax.i64(i64 [[B:%.*]], i64 1)
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[UMAX6]], -1
-; CHECK-NEXT: [[TMP1:%.*]] = freeze i64 [[TMP0]]
-; CHECK-NEXT: [[UMIN7:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 [[A:%.*]])
-; CHECK-NEXT: [[TMP2:%.*]] = add nuw i64 [[UMIN7]], 1
-; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP2]], 28
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
-; CHECK: vector.scevcheck:
-; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[B]], i64 1)
-; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[UMAX]], -1
-; CHECK-NEXT: [[TMP4:%.*]] = freeze i64 [[TMP3]]
-; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP4]], i64 [[A]])
-; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 [[UMIN]] to i32
-; CHECK-NEXT: [[TMP7:%.*]] = add i32 1, [[TMP6]]
-; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP7]], 1
-; CHECK-NEXT: [[TMP9:%.*]] = icmp ugt i64 [[UMIN]], 4294967295
-; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]]
-; CHECK-NEXT: br i1 [[TMP10]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
-; CHECK: vector.memcheck:
-; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 1
-; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[SRC_1:%.*]], i64 8
-; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[SRC_2:%.*]], i64 8
-; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP1]]
-; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SRC_1]], [[SCEVGEP]]
-; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
-; CHECK-NEXT: [[BOUND03:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP2]]
-; CHECK-NEXT: [[BOUND14:%.*]] = icmp ult ptr [[SRC_2]], [[SCEVGEP]]
-; CHECK-NEXT: [[FOUND_CONFLICT5:%.*]] = and i1 [[BOUND03]], [[BOUND14]]
-; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT5]]
-; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
-; CHECK: vector.ph:
-; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4
-; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
-; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i64 4, i64 [[N_MOD_VF]]
-; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[TMP12]]
-; CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i32
-; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
-; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP13:%.*]] = load i64, ptr [[SRC_1]], align 8, !alias.scope [[META6:![0-9]+]]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP13]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr [[SRC_2]], align 8, !alias.scope [[META9:![0-9]+]]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP14]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT10:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT9]], <2 x i64> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP15:%.*]] = icmp eq <2 x i64> [[BROADCAST_SPLAT]], zeroinitializer
-; CHECK-NEXT: [[TMP16:%.*]] = icmp ne <2 x i64> [[BROADCAST_SPLAT10]], zeroinitializer
-; CHECK-NEXT: [[TMP17:%.*]] = and <2 x i1> [[TMP16]], [[TMP15]]
-; CHECK-NEXT: [[TMP18:%.*]] = zext <2 x i1> [[TMP17]] to <2 x i8>
-; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x i8> [[TMP18]], i32 1
-; CHECK-NEXT: store i8 [[TMP19]], ptr [[DST]], align 1, !alias.scope [[META11:![0-9]+]], !noalias [[META13:![0-9]+]]
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
-; CHECK: middle.block:
-; CHECK-NEXT: br label [[SCALAR_PH]]
-; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ]
-; CHECK-NEXT: [[BC_RESUME_VAL8:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[IV_1_WIDE:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_1_NEXT_WIDE:%.*]], [[LOOP_LATCH:%.*]] ]
-; CHECK-NEXT: [[IV_1:%.*]] = phi i32 [ [[BC_RESUME_VAL8]], [[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], [[LOOP_LATCH]] ]
-; CHECK-NEXT: [[EC_1:%.*]] = icmp ult i64 [[IV_1_WIDE]], [[A]]
+; CHECK-NEXT: [[IV_1_WIDE:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_1_NEXT_WIDE:%.*]], [[LOOP_LATCH:%.*]] ]
+; CHECK-NEXT: [[IV_1:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[IV_1_NEXT:%.*]], [[LOOP_LATCH]] ]
+; CHECK-NEXT: [[EC_1:%.*]] = icmp ult i64 [[IV_1_WIDE]], [[A:%.*]]
; CHECK-NEXT: br i1 [[EC_1]], label [[LOOP_LATCH]], label [[EXIT:%.*]]
; CHECK: loop.latch:
-; CHECK-NEXT: [[L_1:%.*]] = load i64, ptr [[SRC_1]], align 8
-; CHECK-NEXT: [[L_2:%.*]] = load i64, ptr [[SRC_2]], align 8
+; CHECK-NEXT: [[L_1:%.*]] = load i64, ptr [[SRC_1:%.*]], align 8
+; CHECK-NEXT: [[L_2:%.*]] = load i64, ptr [[SRC_2:%.*]], align 8
; CHECK-NEXT: [[CMP55_US:%.*]] = icmp eq i64 [[L_1]], 0
; CHECK-NEXT: [[CMP_I_US:%.*]] = icmp ne i64 [[L_2]], 0
; CHECK-NEXT: [[AND:%.*]] = and i1 [[CMP_I_US]], [[CMP55_US]]
; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[AND]] to i8
-; CHECK-NEXT: store i8 [[EXT]], ptr [[DST]], align 1
+; CHECK-NEXT: store i8 [[EXT]], ptr [[DST:%.*]], align 1
; CHECK-NEXT: [[IV_1_NEXT]] = add i32 [[IV_1]], 1
; CHECK-NEXT: [[IV_1_NEXT_WIDE]] = zext i32 [[IV_1_NEXT]] to i64
-; CHECK-NEXT: [[EC_2:%.*]] = icmp ult i64 [[IV_1_NEXT_WIDE]], [[B]]
-; CHECK-NEXT: br i1 [[EC_2]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP15:![0-9]+]]
+; CHECK-NEXT: [[EC_2:%.*]] = icmp ult i64 [[IV_1_NEXT_WIDE]], [[B:%.*]]
+; CHECK-NEXT: br i1 [[EC_2]], label [[LOOP]], label [[EXIT]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
diff --git a/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll b/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll
index 63f9a1310d15a..dbd7019188d07 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll
@@ -278,10 +278,10 @@ define void @uniform_copy(ptr %A, ptr %B) {
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4, !alias.scope [[META12:![0-9]+]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4, !alias.scope [[META12:![0-9]+]]
; CHECK-NEXT: store i32 [[TMP0]], ptr [[B]], align 4, !alias.scope [[META15:![0-9]+]], !noalias [[META12]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096
diff --git a/llvm/test/Transforms/LoopVectorize/hoist-and-sink-mem-ops-with-invariant-pointers.ll b/llvm/test/Transforms/LoopVectorize/hoist-and-sink-mem-ops-with-invariant-pointers.ll
index 8615401af34f8..7bbc186dcbbae 100644
--- a/llvm/test/Transforms/LoopVectorize/hoist-and-sink-mem-ops-with-invariant-pointers.ll
+++ b/llvm/test/Transforms/LoopVectorize/hoist-and-sink-mem-ops-with-invariant-pointers.ll
@@ -21,12 +21,12 @@ define void @hoist_invariant_load_noalias_due_to_memchecks(ptr %dst, ptr %invari
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]]
-; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
-; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[INVARIANT_PTR]], align 4, !alias.scope [[META0:![0-9]+]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
; CHECK-NEXT: store <4 x i32> [[BROADCAST_SPLAT]], ptr [[TMP5]], align 4, !alias.scope [[META3:![0-9]+]], !noalias [[META0]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-metadata.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-metadata.ll
index bd0fd77e7c391..9bbd67059e84d 100644
--- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-metadata.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-metadata.ll
@@ -102,6 +102,9 @@ define void @ir_tbaa_different(ptr %base, ptr %end, ptr %src) {
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 2
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP11:%.*]] = load float, ptr [[SRC]], align 4, !alias.scope [[META10:![0-9]+]]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[TMP11]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[BASE]], i64 [[TMP4]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -109,9 +112,6 @@ define void @ir_tbaa_different(ptr %base, ptr %end, ptr %src) {
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[BASE]], i64 [[OFFSET_IDX]]
-; CHECK-NEXT: [[TMP11:%.*]] = load float, ptr [[SRC]], align 4, !alias.scope [[META10:![0-9]+]]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[TMP11]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <4 x float>, ptr [[NEXT_GEP]], align 4, !alias.scope [[META13:![0-9]+]], !noalias [[META10]]
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x float> [[WIDE_VEC]], <4 x float> poison, <2 x i32> <i32 0, i32 2>
; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <4 x float> [[WIDE_VEC]], <4 x float> poison, <2 x i32> <i32 1, i32 3>
diff --git a/llvm/test/Transforms/LoopVectorize/multiple-strides-vectorization.ll b/llvm/test/Transforms/LoopVectorize/multiple-strides-vectorization.ll
index d21621e46b79c..05cfb1957a766 100644
--- a/llvm/test/Transforms/LoopVectorize/multiple-strides-vectorization.ll
+++ b/llvm/test/Transforms/LoopVectorize/multiple-strides-vectorization.ll
@@ -61,14 +61,14 @@ define void @Test(ptr nocapture %obj, i64 %z) #0 {
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[Z]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[Z]], [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !alias.scope [[META0:![0-9]+]]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP10]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[OBJ]], i64 0, i32 0, i64 [[INDEX]]
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP11]], align 4, !alias.scope [[META0:![0-9]+]]
-; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP9]], align 4, !alias.scope [[META3:![0-9]+]]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP13]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP11]], align 4, !alias.scope [[META3:![0-9]+]]
; CHECK-NEXT: [[TMP14:%.*]] = add nsw <4 x i32> [[BROADCAST_SPLAT]], [[WIDE_LOAD]]
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[OBJ]], i64 0, i32 2, i64 [[I]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i32>, ptr [[TMP15]], align 4, !alias.scope [[META5:![0-9]+]], !noalias [[META7:![0-9]+]]
@@ -125,14 +125,14 @@ define void @Test(ptr nocapture %obj, i64 %z) #0 {
; CHECK-HOIST: vector.ph:
; CHECK-HOIST-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[Z]], 4
; CHECK-HOIST-NEXT: [[N_VEC:%.*]] = sub i64 [[Z]], [[N_MOD_VF]]
+; CHECK-HOIST-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !alias.scope [[META0:![0-9]+]]
+; CHECK-HOIST-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
+; CHECK-HOIST-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-HOIST-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK-HOIST: vector.body:
; CHECK-HOIST-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-HOIST-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[OBJ]], i64 0, i32 0, i64 [[INDEX]]
-; CHECK-HOIST-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4, !alias.scope [[META0:![0-9]+]]
-; CHECK-HOIST-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP3]], align 4, !alias.scope [[META3:![0-9]+]]
-; CHECK-HOIST-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP7]], i64 0
-; CHECK-HOIST-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-HOIST-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4, !alias.scope [[META3:![0-9]+]]
; CHECK-HOIST-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[BROADCAST_SPLAT]], [[WIDE_LOAD]]
; CHECK-HOIST-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[OBJ]], i64 0, i32 2, i64 [[I]], i64 [[INDEX]]
; CHECK-HOIST-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP9]], align 4, !alias.scope [[META5:![0-9]+]], !noalias [[META7:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/pointer-select-runtime-checks.ll b/llvm/test/Transforms/LoopVectorize/pointer-select-runtime-checks.ll
index 4d50a814b621d..f49a2d90b0e84 100644
--- a/llvm/test/Transforms/LoopVectorize/pointer-select-runtime-checks.ll
+++ b/llvm/test/Transforms/LoopVectorize/pointer-select-runtime-checks.ll
@@ -23,17 +23,17 @@ define void @test1_select_invariant(ptr %src.1, ptr %src.2, ptr %dst, i1 %c, i8
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 2
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP2]], [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[PTR_SEL]], align 8, !alias.scope [[META0:![0-9]+]]
; CHECK-NEXT: [[IND_END:%.*]] = trunc i32 [[N_VEC]] to i8
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i8
; CHECK-NEXT: [[INDUCTION2:%.*]] = add i8 [[OFFSET_IDX]], 1
-; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[PTR_SEL]], align 8, !alias.scope [[META0:![0-9]+]]
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[DST]], i8 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[DST]], i8 [[INDUCTION2]]
-; CHECK-NEXT: store i8 [[TMP10]], ptr [[TMP11]], align 2, !alias.scope [[META3:![0-9]+]], !noalias [[META0]]
-; CHECK-NEXT: store i8 [[TMP10]], ptr [[TMP8]], align 2, !alias.scope [[META3]], !noalias [[META0]]
+; CHECK-NEXT: store i8 [[TMP6]], ptr [[TMP11]], align 2, !alias.scope [[META3:![0-9]+]], !noalias [[META0]]
+; CHECK-NEXT: store i8 [[TMP6]], ptr [[TMP8]], align 2, !alias.scope [[META3]], !noalias [[META0]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/pr50686.ll b/llvm/test/Transforms/LoopVectorize/pr50686.ll
index 878fbec452220..7df6566f7bab0 100644
--- a/llvm/test/Transforms/LoopVectorize/pr50686.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr50686.ll
@@ -15,23 +15,23 @@ define void @m(ptr nocapture %p, ptr nocapture %p2, i32 %q) {
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
-; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
-; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[P2]], align 4, !alias.scope [[META0:![0-9]+]]
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX9_2]], align 4, !alias.scope [[META0:![0-9]+]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP2:%.*]] = sub nsw <4 x i32> zeroinitializer, [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX9_1]], align 4, !alias.scope [[META0]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP4:%.*]] = sub nsw <4 x i32> [[TMP2]], [[BROADCAST_SPLAT3]]
-; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX9_2]], align 4, !alias.scope [[META0]]
+; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[P2]], align 4, !alias.scope [[META0]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT4]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP6:%.*]] = sub nsw <4 x i32> [[TMP4]], [[BROADCAST_SPLAT5]]
+; CHECK-NEXT: [[TMP6:%.*]] = sub nsw <4 x i32> zeroinitializer, [[BROADCAST_SPLAT5]]
+; CHECK-NEXT: [[TMP4:%.*]] = sub nsw <4 x i32> [[TMP6]], [[BROADCAST_SPLAT3]]
+; CHECK-NEXT: [[TMP9:%.*]] = sub nsw <4 x i32> [[TMP4]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[INDEX]]
-; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr [[TMP7]], align 4, !alias.scope [[META3:![0-9]+]], !noalias [[META0]]
+; CHECK-NEXT: store <4 x i32> [[TMP9]], ptr [[TMP7]], align 4, !alias.scope [[META3:![0-9]+]], !noalias [[META0]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 60
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-align.ll b/llvm/test/Transforms/LoopVectorize/reduction-align.ll
index 028eb3b05957d..0c45b96874da2 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-align.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-align.ll
@@ -23,13 +23,13 @@ define void @fn(ptr %hbuf, ptr %ref, i32 %height) {
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[HEIGHT]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[HEIGHT]], [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[REF]], align 1, !alias.scope [[META0:![0-9]+]]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[TMP0]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i16> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[REF]], align 1, !alias.scope [[META0:![0-9]+]]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[TMP0]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP1]] = add <4 x i16> [[BROADCAST_SPLAT]], [[VEC_PHI]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
diff --git a/llvm/test/Transforms/LoopVectorize/runtime-checks-hoist.ll b/llvm/test/Transforms/LoopVectorize/runtime-checks-hoist.ll
index a1329598529fd..25f40be238338 100644
--- a/llvm/test/Transforms/LoopVectorize/runtime-checks-hoist.ll
+++ b/llvm/test/Transforms/LoopVectorize/runtime-checks-hoist.ll
@@ -1511,14 +1511,14 @@ define void @stride_check_known_via_loop_guard(ptr %C, ptr %A, i32 %Acols) {
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
+; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[ARRAYIDX_US]], align 8, !alias.scope [[META69:![0-9]+]]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[TMP0]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds double, ptr [[C]], i32 [[INDEX]]
-; CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[ARRAYIDX_US]], align 8, !alias.scope [[META69:![0-9]+]]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[TMP1]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: store <4 x double> [[BROADCAST_SPLAT]], ptr [[TMP0]], align 8, !alias.scope [[META72:![0-9]+]], !noalias [[META69]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, ptr [[C]], i32 [[INDEX]]
+; CHECK-NEXT: store <4 x double> [[BROADCAST_SPLAT]], ptr [[TMP1]], align 8, !alias.scope [[META72:![0-9]+]], !noalias [[META69]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP74:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/single-scalar-cast-minbw.ll b/llvm/test/Transforms/LoopVectorize/single-scalar-cast-minbw.ll
index 70adac2103feb..fb25b2bc7b906 100644
--- a/llvm/test/Transforms/LoopVectorize/single-scalar-cast-minbw.ll
+++ b/llvm/test/Transforms/LoopVectorize/single-scalar-cast-minbw.ll
@@ -80,13 +80,13 @@ define void @single_scalar_cast_stored(ptr %src, ptr %dst, i32 %n) {
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]]
-; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
-; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[SRC]], align 2, !alias.scope [[META4:![0-9]+]]
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i16 [[TMP0]], 0
; CHECK-NEXT: [[TMP4:%.*]] = and i16 [[TMP0]], 15
; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP3]], i16 0, i16 [[TMP4]]
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: store i16 [[TMP5]], ptr [[DST]], align 2, !alias.scope [[META7:![0-9]+]], !noalias [[META4]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-native-path-inner-loop-with-runtime-checks.ll b/llvm/test/Transforms/LoopVectorize/vplan-native-path-inner-loop-with-runtime-checks.ll
index e5e02674704f9..22cf860c8b58c 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-native-path-inner-loop-with-runtime-checks.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-native-path-inner-loop-with-runtime-checks.ll
@@ -50,6 +50,7 @@ define void @expand(ptr %src, ptr %dst, i64 %0) {
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP8]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP8]], [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP19:%.*]] = load double, ptr [[SRC]], align 8, !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]]
; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[TMP0]], [[N_VEC]]
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP0]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
@@ -58,7 +59,6 @@ define void @expand(ptr %src, ptr %dst, i64 %0) {
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP19:%.*]] = load double, ptr [[SRC]], align 8, !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]]
; CHECK-NEXT: [[TMP20:%.*]] = shl <4 x i64> [[VEC_IND]], splat (i64 1)
; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i64> [[TMP20]], i32 0
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i64> [[TMP20]], i32 1
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/hoist-load-from-vector-loop.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/hoist-load-from-vector-loop.ll
index a35bcf1c5a88d..c17b15138329c 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/hoist-load-from-vector-loop.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/hoist-load-from-vector-loop.ll
@@ -8,19 +8,69 @@ target triple = "arm64-apple-macosx"
define void @hoist_invariant_load(ptr %invariant_ptr, i64 %num_elements, ptr %array) {
; CHECK-LABEL: define void @hoist_invariant_load(
; CHECK-SAME: ptr readonly captures(none) [[INVARIANT_PTR:%.*]], i64 [[NUM_ELEMENTS:%.*]], ptr captures(none) [[ARRAY:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[CMP1_NOT:%.*]] = icmp eq i64 [[NUM_ELEMENTS]], 0
-; CHECK-NEXT: br i1 [[CMP1_NOT]], label %[[EXIT:.*]], label %[[LOOP_LATCH:.*]]
-; CHECK: [[LOOP_LATCH]]:
-; CHECK-NEXT: [[I2:%.*]] = phi i64 [ [[I_NEXT:%.*]], %[[LOOP_LATCH]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: br i1 [[CMP1_NOT]], label %[[EXIT:.*]], label %[[LOOP_LATCH_PREHEADER:.*]]
+; CHECK: [[LOOP_LATCH_PREHEADER]]:
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[NUM_ELEMENTS]], 11
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[LOOP_LATCH_PREHEADER6:.*]], label %[[VECTOR_MEMCHECK:.*]]
+; CHECK: [[VECTOR_MEMCHECK]]:
+; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[NUM_ELEMENTS]], 5
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[ARRAY]], i64 [[TMP0]]
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[TMP1]], i64 -24
+; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[INVARIANT_PTR]], i64 8
+; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[ARRAY]], [[SCEVGEP3]]
+; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[INVARIANT_PTR]], [[SCEVGEP]]
+; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label %[[LOOP_LATCH_PREHEADER6]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = and i64 [[NUM_ELEMENTS]], 3
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i64 4, i64 [[N_MOD_VF]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[NUM_ELEMENTS]], [[TMP3]]
+; CHECK-NEXT: [[TMP4:%.*]] = load double, ptr [[INVARIANT_PTR]], align 8, !alias.scope [[META0:![0-9]+]]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP4]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[I2:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr nusw %"class.dealii::VectorizedArray", ptr [[ARRAY]], i64 [[I2]]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr %"class.dealii::VectorizedArray", ptr [[ARRAY]], i64 [[I2]]
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP6]], i64 32
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr %"class.dealii::VectorizedArray", ptr [[ARRAY]], i64 [[I2]]
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP8]], i64 64
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr %"class.dealii::VectorizedArray", ptr [[ARRAY]], i64 [[I2]]
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP10]], i64 96
+; CHECK-NEXT: [[TMP12:%.*]] = load <5 x double>, ptr [[GEP]], align 8, !alias.scope [[META3:![0-9]+]], !noalias [[META0]]
+; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <5 x double> [[TMP12]], <5 x double> poison, <2 x i32> <i32 0, i32 4>
+; CHECK-NEXT: [[TMP13:%.*]] = load <5 x double>, ptr [[TMP9]], align 8, !alias.scope [[META3]], !noalias [[META0]]
+; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <5 x double> [[TMP13]], <5 x double> poison, <2 x i32> <i32 0, i32 4>
+; CHECK-NEXT: [[TMP14:%.*]] = fadd <2 x double> [[BROADCAST_SPLAT]], [[STRIDED_VEC]]
+; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x double> [[TMP14]], i64 0
+; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x double> [[TMP14]], i64 1
+; CHECK-NEXT: [[TMP17:%.*]] = fadd <2 x double> [[BROADCAST_SPLAT]], [[STRIDED_VEC5]]
+; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x double> [[TMP17]], i64 0
+; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x double> [[TMP17]], i64 1
+; CHECK-NEXT: store double [[TMP15]], ptr [[GEP]], align 8, !alias.scope [[META3]], !noalias [[META0]]
+; CHECK-NEXT: store double [[TMP16]], ptr [[TMP7]], align 8, !alias.scope [[META3]], !noalias [[META0]]
+; CHECK-NEXT: store double [[TMP18]], ptr [[TMP9]], align 8, !alias.scope [[META3]], !noalias [[META0]]
+; CHECK-NEXT: store double [[TMP19]], ptr [[TMP11]], align 8, !alias.scope [[META3]], !noalias [[META0]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[I2]], 4
+; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP20]], label %[[LOOP_LATCH_PREHEADER6]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK: [[LOOP_LATCH_PREHEADER6]]:
+; CHECK-NEXT: [[I2_PH:%.*]] = phi i64 [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[LOOP_LATCH_PREHEADER]] ], [ [[N_VEC]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: br label %[[LOOP_LATCH:.*]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[I3:%.*]] = phi i64 [ [[I_NEXT:%.*]], %[[LOOP_LATCH]] ], [ [[I2_PH]], %[[LOOP_LATCH_PREHEADER6]] ]
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr nusw %"class.dealii::VectorizedArray", ptr [[ARRAY]], i64 [[I3]]
; CHECK-NEXT: [[INVARIANT_VAL:%.*]] = load double, ptr [[INVARIANT_PTR]], align 8
-; CHECK-NEXT: [[ARRAY_VAL:%.*]] = load double, ptr [[GEP]], align 8
+; CHECK-NEXT: [[ARRAY_VAL:%.*]] = load double, ptr [[GEP1]], align 8
; CHECK-NEXT: [[SUM:%.*]] = fadd double [[INVARIANT_VAL]], [[ARRAY_VAL]]
-; CHECK-NEXT: store double [[SUM]], ptr [[GEP]], align 8
-; CHECK-NEXT: [[I_NEXT]] = add nuw i64 [[I2]], 1
+; CHECK-NEXT: store double [[SUM]], ptr [[GEP1]], align 8
+; CHECK-NEXT: [[I_NEXT]] = add nuw i64 [[I3]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[I_NEXT]], [[NUM_ELEMENTS]]
-; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP_LATCH]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP_LATCH]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
More information about the llvm-commits
mailing list