[llvm] [VPlan] Hoist predicated loads with replicate recipes (PR #168373)

via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 17 06:15:23 PST 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: Florian Hahn (fhahn)

<details>
<summary>Changes</summary>

This patch adds a new VPlan transformation to hoist predicated loads, if
we can prove they execute unconditionally, i.e. there are 2 predicated
loads to the same address with complementary masks. Then we are
guaranteed to execute one of them on each iteration, allowing us to
remove the mask.

The transform groups masked replicating loads by their address SCEV,
then checks if there are 2 loads with complementary mask. If that is the
case, we check if there are any writes that may alias the load address
in the blocks between the first and last load with the same address.
The transforms operates after linearizing the CFG, but before
introducing replicate regions, which means this is just checking a chain
of consecutive blocks.

Currently this only uses noalias metadata to check for no-alias (using
the helpers added in https://github.com/llvm/llvm-project/pull/166247).

Then we create an unpredicated VPReplicateRecipe at the position of the
first load, then replace all users of the grouped loads with it.

---

Patch is 107.76 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/168373.diff


25 Files Affected:

- (modified) llvm/include/llvm/Analysis/ScopedNoAliasAA.h (+4-4) 
- (modified) llvm/lib/Analysis/ScopedNoAliasAA.cpp (+2-2) 
- (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+1) 
- (modified) llvm/lib/Transforms/Vectorize/VPlan.h (+8) 
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (+199) 
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.h (+11) 
- (modified) llvm/lib/Transforms/Vectorize/VPlanUtils.cpp (+18) 
- (modified) llvm/lib/Transforms/Vectorize/VPlanUtils.h (+5) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll (+6-6) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll (+8-8) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/vf-will-not-generate-any-vector-insts.ll (+3-3) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/cost-model.ll (+8-67) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/hoist-and-sink-mem-ops-with-invariant-pointers.ll (+3-3) 
- (modified) llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll (+77-133) 
- (modified) llvm/test/Transforms/LoopVectorize/hoist-predicated-loads.ll (+24-228) 
- (modified) llvm/test/Transforms/LoopVectorize/interleaved-accesses-metadata.ll (+3-3) 
- (modified) llvm/test/Transforms/LoopVectorize/multiple-strides-vectorization.ll (+8-8) 
- (modified) llvm/test/Transforms/LoopVectorize/pointer-select-runtime-checks.ll (+3-3) 
- (modified) llvm/test/Transforms/LoopVectorize/pr50686.ll (+6-6) 
- (modified) llvm/test/Transforms/LoopVectorize/reduction-align.ll (+3-3) 
- (modified) llvm/test/Transforms/LoopVectorize/runtime-checks-hoist.ll (+5-5) 
- (modified) llvm/test/Transforms/LoopVectorize/single-scalar-cast-minbw.ll (+3-3) 
- (modified) llvm/test/Transforms/LoopVectorize/vplan-native-path-inner-loop-with-runtime-checks.ll (+1-1) 
- (modified) llvm/test/Transforms/PhaseOrdering/AArch64/hoist-load-from-vector-loop.ll (+58-8) 


``````````diff
diff --git a/llvm/include/llvm/Analysis/ScopedNoAliasAA.h b/llvm/include/llvm/Analysis/ScopedNoAliasAA.h
index 942cc6f2a4b2b..dbe1afa50ee3a 100644
--- a/llvm/include/llvm/Analysis/ScopedNoAliasAA.h
+++ b/llvm/include/llvm/Analysis/ScopedNoAliasAA.h
@@ -46,12 +46,12 @@ class ScopedNoAliasAAResult : public AAResultBase {
   LLVM_ABI ModRefInfo getModRefInfo(const CallBase *Call1,
                                     const CallBase *Call2, AAQueryInfo &AAQI);
 
-  LLVM_ABI void
+  LLVM_ABI static void
   collectScopedDomains(const MDNode *NoAlias,
-                       SmallPtrSetImpl<const MDNode *> &Domains) const;
+                       SmallPtrSetImpl<const MDNode *> &Domains);
 
-private:
-  bool mayAliasInScopes(const MDNode *Scopes, const MDNode *NoAlias) const;
+  LLVM_ABI static bool mayAliasInScopes(const MDNode *Scopes,
+                                        const MDNode *NoAlias);
 };
 
 /// Analysis pass providing a never-invalidated alias analysis result.
diff --git a/llvm/lib/Analysis/ScopedNoAliasAA.cpp b/llvm/lib/Analysis/ScopedNoAliasAA.cpp
index 4d6c0cc71f898..d24ad0255256c 100644
--- a/llvm/lib/Analysis/ScopedNoAliasAA.cpp
+++ b/llvm/lib/Analysis/ScopedNoAliasAA.cpp
@@ -116,7 +116,7 @@ static void collectMDInDomain(const MDNode *List, const MDNode *Domain,
 
 /// Collect the set of scoped domains relevant to the noalias scopes.
 void ScopedNoAliasAAResult::collectScopedDomains(
-    const MDNode *NoAlias, SmallPtrSetImpl<const MDNode *> &Domains) const {
+    const MDNode *NoAlias, SmallPtrSetImpl<const MDNode *> &Domains) {
   if (!NoAlias)
     return;
   assert(Domains.empty() && "Domains should be empty");
@@ -127,7 +127,7 @@ void ScopedNoAliasAAResult::collectScopedDomains(
 }
 
 bool ScopedNoAliasAAResult::mayAliasInScopes(const MDNode *Scopes,
-                                             const MDNode *NoAlias) const {
+                                             const MDNode *NoAlias) {
   if (!Scopes || !NoAlias)
     return true;
 
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index cbfbc29360b0b..c4e362cccd80b 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8301,6 +8301,7 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
     if (auto Plan = tryToBuildVPlanWithVPRecipes(
             std::unique_ptr<VPlan>(VPlan0->duplicate()), SubRange, &LVer)) {
       // Now optimize the initial VPlan.
+      VPlanTransforms::hoistPredicatedLoads(*Plan, *PSE.getSE(), OrigLoop);
       VPlanTransforms::runPass(VPlanTransforms::truncateToMinimalBitwidths,
                                *Plan, CM.getMinimalBitwidths());
       VPlanTransforms::runPass(VPlanTransforms::optimize, *Plan);
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 13131a2b61722..841bbd2c22406 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -32,6 +32,7 @@
 #include "llvm/ADT/ilist.h"
 #include "llvm/ADT/ilist_node.h"
 #include "llvm/Analysis/IVDescriptors.h"
+#include "llvm/Analysis/MemoryLocation.h"
 #include "llvm/Analysis/VectorUtils.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/FMF.h"
@@ -983,6 +984,13 @@ class VPIRMetadata {
   /// Intersect this VPIRMetada object with \p MD, keeping only metadata
   /// nodes that are common to both.
   void intersect(const VPIRMetadata &MD);
+
+  /// Get metadata of kind \p Kind. Returns nullptr if not found.
+  MDNode *getMetadata(unsigned Kind) const {
+    auto It = llvm::find_if(Metadata,
+                            [Kind](const auto &P) { return P.first == Kind; });
+    return It != Metadata.end() ? It->second : nullptr;
+  }
 };
 
 /// This is a concrete Recipe that models a single VPlan-level instruction.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 9bb61308cb7d9..ae032692852a9 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -24,15 +24,20 @@
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetOperations.h"
 #include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/TypeSwitch.h"
 #include "llvm/Analysis/IVDescriptors.h"
 #include "llvm/Analysis/InstSimplifyFolder.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemoryLocation.h"
 #include "llvm/Analysis/ScalarEvolutionPatternMatch.h"
+#include "llvm/Analysis/ScopedNoAliasAA.h"
 #include "llvm/Analysis/VectorUtils.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Metadata.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/TypeSize.h"
 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
@@ -2389,6 +2394,7 @@ void VPlanTransforms::optimize(VPlan &Plan) {
   runPass(removeDeadRecipes, Plan);
 
   runPass(createAndOptimizeReplicateRegions, Plan);
+  runPass(hoistInvariantLoads, Plan);
   runPass(mergeBlocksIntoPredecessors, Plan);
   runPass(licm, Plan);
 }
@@ -3904,6 +3910,199 @@ void VPlanTransforms::materializeBroadcasts(VPlan &Plan) {
   }
 }
 
+void VPlanTransforms::hoistInvariantLoads(VPlan &Plan) {
+  VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
+
+  // Collect candidate loads with invariant addresses and noalias scopes
+  // metadata and memory-writing recipes with noalias metadata.
+  SmallVector<std::pair<VPRecipeBase *, MemoryLocation>> CandidateLoads;
+  SmallVector<MemoryLocation> Stores;
+  for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
+           vp_depth_first_shallow(LoopRegion->getEntry()))) {
+    for (VPRecipeBase &R : *VPBB) {
+      // Only handle single-scalar replicated loads with invariant addresses.
+      if (auto *RepR = dyn_cast<VPReplicateRecipe>(&R)) {
+        if (RepR->isPredicated() || !RepR->isSingleScalar() ||
+            RepR->getOpcode() != Instruction::Load)
+          continue;
+
+        VPValue *Addr = RepR->getOperand(0);
+        if (Addr->isDefinedOutsideLoopRegions()) {
+          MemoryLocation Loc = *vputils::getMemoryLocation(*RepR);
+          if (!Loc.AATags.Scope)
+            continue;
+          CandidateLoads.push_back({RepR, Loc});
+        }
+      }
+      if (R.mayWriteToMemory()) {
+        auto Loc = vputils::getMemoryLocation(R);
+        if (!Loc || !Loc->AATags.Scope || !Loc->AATags.NoAlias)
+          return;
+        Stores.push_back(*Loc);
+      }
+    }
+  }
+
+  VPBasicBlock *Preheader = Plan.getVectorPreheader();
+  for (auto &[LoadRecipe, LoadLoc] : CandidateLoads) {
+    // Hoist the load to the preheader if it doesn't alias with any stores
+    // according to the noalias metadata. Other loads should have been hoisted
+    // by other passes
+    const AAMDNodes &LoadAA = LoadLoc.AATags;
+    if (all_of(Stores, [&](const MemoryLocation &StoreLoc) {
+          return !ScopedNoAliasAAResult::mayAliasInScopes(
+              LoadAA.Scope, StoreLoc.AATags.NoAlias);
+        })) {
+      LoadRecipe->moveBefore(*Preheader, Preheader->getFirstNonPhi());
+    }
+  }
+}
+
+// Returns the intersection of metadata from a group of loads.
+static VPIRMetadata getCommonLoadMetadata(ArrayRef<VPReplicateRecipe *> Loads) {
+  VPIRMetadata CommonMetadata = *Loads.front();
+  for (VPReplicateRecipe *Load : drop_begin(Loads))
+    CommonMetadata.intersect(*Load);
+  return CommonMetadata;
+}
+
+// Check if a load can be hoisted by verifying it doesn't alias with any stores
+// in blocks between FirstBB and LastBB using scoped noalias metadata.
+static bool canHoistLoadWithNoAliasCheck(VPReplicateRecipe *Load,
+                                         VPBasicBlock *FirstBB,
+                                         VPBasicBlock *LastBB) {
+  // Get the load's memory location and check if it aliases with any stores
+  // using scoped noalias metadata.
+  auto LoadLoc = vputils::getMemoryLocation(*Load);
+  if (!LoadLoc || !LoadLoc->AATags.Scope)
+    return false;
+
+  const AAMDNodes &LoadAA = LoadLoc->AATags;
+  for (VPBlockBase *Block = FirstBB; Block;
+       Block = Block->getSingleSuccessor()) {
+    // This function assumes a simple linear chain of blocks. If there are
+    // multiple successors, we would need more complex analysis.
+    assert(Block->getNumSuccessors() <= 1 &&
+           "Expected at most one successor in block chain");
+    auto *VPBB = cast<VPBasicBlock>(Block);
+    for (VPRecipeBase &R : *VPBB) {
+      if (R.mayWriteToMemory()) {
+        auto Loc = vputils::getMemoryLocation(R);
+        // Bail out if we can't get the location or if the scoped noalias
+        // metadata indicates potential aliasing.
+        if (!Loc || ScopedNoAliasAAResult::mayAliasInScopes(
+                        LoadAA.Scope, Loc->AATags.NoAlias))
+          return false;
+      }
+    }
+
+    if (Block == LastBB)
+      break;
+  }
+  return true;
+}
+
+void VPlanTransforms::hoistPredicatedLoads(VPlan &Plan, ScalarEvolution &SE,
+                                           const Loop *L) {
+  VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
+  VPTypeAnalysis TypeInfo(Plan);
+  VPDominatorTree VPDT(Plan);
+
+  // Group predicated loads by their address SCEV.
+  MapVector<const SCEV *, SmallVector<VPReplicateRecipe *>> LoadsByAddress;
+  for (VPBlockBase *Block : vp_depth_first_shallow(LoopRegion->getEntry())) {
+    auto *VPBB = cast<VPBasicBlock>(Block);
+    for (VPRecipeBase &R : *VPBB) {
+      auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
+      if (!RepR || RepR->getOpcode() != Instruction::Load ||
+          !RepR->isPredicated())
+        continue;
+
+      VPValue *Addr = RepR->getOperand(0);
+      const SCEV *AddrSCEV = vputils::getSCEVExprForVPValue(Addr, SE, L);
+      if (!isa<SCEVCouldNotCompute>(AddrSCEV))
+        LoadsByAddress[AddrSCEV].push_back(RepR);
+    }
+  }
+
+  // For each address, collect loads with complementary masks, sort by
+  // dominance, and use the earliest load.
+  for (auto &[Addr, Loads] : LoadsByAddress) {
+    if (Loads.size() < 2)
+      continue;
+
+    // Collect groups of loads with complementary masks.
+    SmallVector<SmallVector<VPReplicateRecipe *, 4>> LoadGroups;
+    for (VPReplicateRecipe *&LoadI : Loads) {
+      if (!LoadI)
+        continue;
+
+      VPValue *MaskI = LoadI->getMask();
+      Type *TypeI = TypeInfo.inferScalarType(LoadI);
+      SmallVector<VPReplicateRecipe *, 4> Group;
+      Group.push_back(LoadI);
+      LoadI = nullptr;
+
+      // Find all loads with the same type.
+      for (VPReplicateRecipe *&LoadJ : Loads) {
+        if (!LoadJ)
+          continue;
+
+        Type *TypeJ = TypeInfo.inferScalarType(LoadJ);
+        if (TypeI == TypeJ) {
+          Group.push_back(LoadJ);
+          LoadJ = nullptr;
+        }
+      }
+
+      // Check if any load in the group has a complementary mask with another,
+      // that is M1 == NOT(M2) or M2 == NOT(M1).
+      bool HasComplementaryMask =
+          any_of(drop_begin(Group), [MaskI](VPReplicateRecipe *Load) {
+            VPValue *MaskJ = Load->getMask();
+            return match(MaskI, m_Not(m_Specific(MaskJ))) ||
+                   match(MaskJ, m_Not(m_Specific(MaskI)));
+          });
+
+      if (HasComplementaryMask)
+        LoadGroups.push_back(std::move(Group));
+    }
+
+    // For each group, check memory dependencies and hoist the earliest load.
+    for (auto &Group : LoadGroups) {
+      // Sort loads by dominance order, with earliest (most dominating) first.
+      sort(Group, [&VPDT](VPReplicateRecipe *A, VPReplicateRecipe *B) {
+        return VPDT.properlyDominates(A, B);
+      });
+
+      VPReplicateRecipe *EarliestLoad = Group.front();
+      VPBasicBlock *FirstBB = EarliestLoad->getParent();
+      VPBasicBlock *LastBB = Group.back()->getParent();
+
+      // Check that the load doesn't alias with stores between first and last.
+      if (!canHoistLoadWithNoAliasCheck(EarliestLoad, FirstBB, LastBB))
+        continue;
+
+      // Collect common metadata from all loads in the group.
+      VPIRMetadata CommonMetadata = getCommonLoadMetadata(Group);
+
+      // Create an unpredicated version of the earliest load with common
+      // metadata.
+      auto *UnpredicatedLoad = new VPReplicateRecipe(
+          EarliestLoad->getUnderlyingInstr(), {EarliestLoad->getOperand(0)},
+          /*IsSingleScalar=*/false, /*Mask=*/nullptr, CommonMetadata);
+
+      UnpredicatedLoad->insertBefore(EarliestLoad);
+
+      // Replace all loads in the group with the unpredicated load.
+      for (VPReplicateRecipe *Load : Group) {
+        Load->replaceAllUsesWith(UnpredicatedLoad);
+        Load->eraseFromParent();
+      }
+    }
+  }
+}
+
 void VPlanTransforms::materializeConstantVectorTripCount(
     VPlan &Plan, ElementCount BestVF, unsigned BestUF,
     PredicatedScalarEvolution &PSE) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index e3bde8a47dcbc..62f89694cd309 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -308,6 +308,17 @@ struct VPlanTransforms {
   /// Add explicit broadcasts for live-ins and VPValues defined in \p Plan's entry block if they are used as vectors.
   static void materializeBroadcasts(VPlan &Plan);
 
+  /// Hoist single-scalar loads with invariant addresses out of the vector loop
+  /// to the preheader, if they are proven not to alias with any stores in the
+  /// plan using noalias metadata.
+  static void hoistInvariantLoads(VPlan &Plan);
+
+  /// Hoist predicated loads from the same address to the loop entry block, if
+  /// they are guaranteed to execute on both paths (i.e., in replicate regions
+  /// with complementary masks P and NOT P).
+  static void hoistPredicatedLoads(VPlan &Plan, ScalarEvolution &SE,
+                                   const Loop *L);
+
   // Materialize vector trip counts for constants early if it can simply be
   // computed as (Original TC / VF * UF) * VF * UF.
   static void
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
index 3bc2dfd623777..9efdd16777b11 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
@@ -11,6 +11,7 @@
 #include "VPlanDominatorTree.h"
 #include "VPlanPatternMatch.h"
 #include "llvm/ADT/TypeSwitch.h"
+#include "llvm/Analysis/MemoryLocation.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 
 using namespace llvm;
@@ -393,3 +394,20 @@ bool VPBlockUtils::isLatch(const VPBlockBase *VPB,
   return VPB->getNumSuccessors() == 2 &&
          VPBlockUtils::isHeader(VPB->getSuccessors()[1], VPDT);
 }
+
+std::optional<MemoryLocation>
+vputils::getMemoryLocation(const VPRecipeBase &R) {
+  return TypeSwitch<const VPRecipeBase *, std::optional<MemoryLocation>>(&R)
+      .Case<VPWidenStoreRecipe, VPInterleaveBase, VPReplicateRecipe>(
+          [](auto *S) {
+            MemoryLocation Loc;
+            // Populate noalias metadata from VPIRMetadata.
+            if (MDNode *NoAliasMD = S->getMetadata(LLVMContext::MD_noalias))
+              Loc.AATags.NoAlias = NoAliasMD;
+            if (MDNode *AliasScopeMD =
+                    S->getMetadata(LLVMContext::MD_alias_scope))
+              Loc.AATags.Scope = AliasScopeMD;
+            return Loc;
+          })
+      .Default([](auto *) { return std::nullopt; });
+}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
index df1613d760a04..9efd61c310542 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
@@ -13,6 +13,7 @@
 #include "llvm/Support/Compiler.h"
 
 namespace llvm {
+class MemoryLocation;
 class ScalarEvolution;
 class SCEV;
 } // namespace llvm
@@ -73,6 +74,10 @@ std::optional<VPValue *>
 getRecipesForUncountableExit(VPlan &Plan,
                              SmallVectorImpl<VPRecipeBase *> &Recipes,
                              SmallVectorImpl<VPRecipeBase *> &GEPs);
+
+/// Return a MemoryLocation for \p R with noalias metadata populated from
+/// \p R. The pointer of the location is conservatively set to nullptr.
+std::optional<MemoryLocation> getMemoryLocation(const VPRecipeBase &R);
 } // namespace vputils
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
index 2f7e3568d5654..e1116711fe6b9 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
@@ -386,7 +386,7 @@ define i32 @header_mask_and_invariant_compare(ptr %A, ptr %B, ptr %C, ptr %D, pt
 ; DEFAULT-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], ptr [[D:%.*]], ptr [[E:%.*]], i64 [[N:%.*]]) #[[ATTR1:[0-9]+]] {
 ; DEFAULT-NEXT:  [[ENTRY:.*:]]
 ; DEFAULT-NEXT:    [[TMP0:%.*]] = add i64 [[N]], 1
-; DEFAULT-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 60
+; DEFAULT-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 28
 ; DEFAULT-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
 ; DEFAULT:       [[VECTOR_MEMCHECK]]:
 ; DEFAULT-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[E]], i64 4
@@ -427,16 +427,16 @@ define i32 @header_mask_and_invariant_compare(ptr %A, ptr %B, ptr %C, ptr %D, pt
 ; DEFAULT:       [[VECTOR_PH]]:
 ; DEFAULT-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4
 ; DEFAULT-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
-; DEFAULT-NEXT:    br label %[[VECTOR_BODY:.*]]
-; DEFAULT:       [[VECTOR_BODY]]:
-; DEFAULT-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE33:.*]] ]
-; DEFAULT-NEXT:    [[TMP3:%.*]] = load i32, ptr [[A]], align 4, !alias.scope [[META8:![0-9]+]]
+; DEFAULT-NEXT:    [[TMP6:%.*]] = load i32, ptr [[C]], align 4, !alias.scope [[META8:![0-9]+]]
 ; DEFAULT-NEXT:    [[TMP4:%.*]] = load i32, ptr [[B]], align 4, !alias.scope [[META11:![0-9]+]]
+; DEFAULT-NEXT:    [[TMP3:%.*]] = load i32, ptr [[A]], align 4, !alias.scope [[META13:![0-9]+]]
 ; DEFAULT-NEXT:    [[TMP5:%.*]] = or i32 [[TMP4]], [[TMP3]]
-; DEFAULT-NEXT:    [[TMP6:%.*]] = load i32, ptr [[C]], align 4, !alias.scope [[META13:![0-9]+]]
 ; DEFAULT-NEXT:    [[TMP7:%.*]] = icmp ugt i32 [[TMP6]], [[TMP5]]
 ; DEFAULT-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP7]], i64 0
 ; DEFAULT-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer
+; DEFAULT-NEXT:    br label %[[VECTOR_BODY:.*]]
+; DEFAULT:       [[VECTOR_BODY]]:
+; DEFAULT-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE33:.*]] ]
 ; DEFAULT-NEXT:    [[TMP16:%.*]] = getelementptr i32, ptr [[D]], i64 [[INDEX]]
 ; DEFAULT-NEXT:    br i1 [[TMP7]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; DEFAULT:       [[PRED_STORE_IF]]:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll
index 0d8a1021bd438..50807df51c99e 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll
@@ -132,15 +132,15 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 {
 ; DEFAULT:       vector.ph:
 ; DEFAULT-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[X]], i64 0
 ; DEFAULT-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i16> [[BROADCAST_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
-; DEFAULT-NEXT:    [[TMP0:%.*]] = trunc <16 x i16> [[BROADCAST_SPLAT]] to <16 x i8>
-; DEFAULT-NEXT:    br label [[VECTOR_BODY:%.*]]
-; DEFAULT:       vector.body:
-; DEFAULT-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; DEFAULT-NEXT:    [[TMP1:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META6:![0-9]+]]
 ; DEFAULT-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <16 x i64> poi...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/168373


More information about the llvm-commits mailing list