[llvm] [LV]Enable max safe distance in predicated DataWithEVL vectorization mode. (PR #100755)

via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 26 07:39:09 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-analysis

Author: Alexey Bataev (alexey-bataev)

<details>
<summary>Changes</summary>

DataWithEVL tail folded loops still use scalable vectorization with
the special check for max safe distance, which allows to support
non-power-of-2 distances.
The patch does extra analysis for the max store-load forwarding distance
to allow non-power-of-2 distance. Because of that it has to relax some
checks, because the compiler does not know yet, how the loop will be
vectorized (in EVL predicated mode or not).
This change required introducing disableTailFolding() function to
disable tail folding, if previously set, if it is known that the trip
count modulo VF is zero.


---

Patch is 59.15 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/100755.diff


16 Files Affected:

- (modified) llvm/include/llvm/Analysis/LoopAccessAnalysis.h (+25) 
- (modified) llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h (+12) 
- (modified) llvm/lib/Analysis/LoopAccessAnalysis.cpp (+45-10) 
- (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+57-18) 
- (modified) llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (+5) 
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (+8-3) 
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.h (+3-1) 
- (modified) llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll (+2-2) 
- (modified) llvm/test/Analysis/LoopAccessAnalysis/forward-loop-independent.ll (+2-3) 
- (modified) llvm/test/Analysis/LoopAccessAnalysis/forward-negative-step.ll (+2-3) 
- (modified) llvm/test/Analysis/LoopAccessAnalysis/num-iters-for-store-load-conflict.ll (+16-24) 
- (modified) llvm/test/Analysis/LoopAccessAnalysis/pr64637.ll (+2-3) 
- (modified) llvm/test/Analysis/LoopAccessAnalysis/stride-access-dependence.ll (+2-3) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-safe-dep-distance.ll (+169-56) 
- (modified) llvm/test/Transforms/LoopVectorize/memory-dep-remarks.ll (+7-18) 


``````````diff
diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index cc40d2e83f2e0..b661e117d01ee 100644
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -37,6 +37,8 @@ class Value;
 struct VectorizerParams {
   /// Maximum SIMD width.
   static const unsigned MaxVectorWidth;
+  /// Maximum LMUL factor.
+  static const unsigned MaxVectorLMUL;
 
   /// VF as overridden by the user.
   static unsigned VectorizationFactor;
@@ -222,6 +224,23 @@ class MemoryDepChecker {
     return MaxSafeVectorWidthInBits;
   }
 
+  /// Return safe power-of-2 number of elements, which do not prevent store-load
+  /// forwarding.
+  std::optional<uint64_t> getStoreLoadForwardSafeVFPowerOf2() const {
+    if (MaxStoreLoadForwardSafeVF.first == std::numeric_limits<uint64_t>::max())
+      return std::nullopt;
+    return MaxStoreLoadForwardSafeVF.first;
+  }
+
+  /// Return safe non-power-of-2 number of elements, which do not prevent
+  /// store-load forwarding.
+  std::optional<uint64_t> getStoreLoadForwardSafeVFNonPowerOf2() const {
+    if (MaxStoreLoadForwardSafeVF.second ==
+        std::numeric_limits<uint64_t>::max())
+      return std::nullopt;
+    return MaxStoreLoadForwardSafeVF.second;
+  }
+
   /// In same cases when the dependency check fails we can still
   /// vectorize the loop with a dynamic array access check.
   bool shouldRetryWithRuntimeCheck() const {
@@ -310,6 +329,12 @@ class MemoryDepChecker {
   /// restrictive.
   uint64_t MaxSafeVectorWidthInBits = -1U;
 
+  /// Maximum number of elements (power-of-2 and non-power-of-2), which do not
+  /// prevent store-load forwarding.
+  std::pair<uint64_t, uint64_t> MaxStoreLoadForwardSafeVF =
+      std::make_pair(std::numeric_limits<uint64_t>::max(),
+                     std::numeric_limits<uint64_t>::max());
+
   /// If we see a non-constant dependence distance we can still try to
   /// vectorize this loop with runtime checks.
   bool FoundNonConstantDistanceDependence = false;
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index 0f4d1355dd2bf..c16a5f9a1344c 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -377,6 +377,18 @@ class LoopVectorizationLegality {
     return LAI->getDepChecker().getMaxSafeVectorWidthInBits();
   }
 
+  /// Return safe power-of-2 number of elements, which do not prevent store-load
+  /// forwarding.
+  std::optional<unsigned> getMaxStoreLoadForwardSafeVFPowerOf2() const {
+    return LAI->getDepChecker().getStoreLoadForwardSafeVFPowerOf2();
+  }
+
+  /// Return safe non-power-of-2 number of elements, which do not prevent
+  /// store-load forwarding.
+  std::optional<unsigned> getMaxStoreLoadForwardSafeVFNonPowerOf2() const {
+    return LAI->getDepChecker().getStoreLoadForwardSafeVFNonPowerOf2();
+  }
+
   /// Returns true if vector representation of the instruction \p I
   /// requires mask.
   bool isMaskRequired(const Instruction *I) const {
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 646d2f7ef3077..29816bd1d845c 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -100,6 +100,8 @@ static cl::opt<unsigned> MemoryCheckMergeThreshold(
 
 /// Maximum SIMD width.
 const unsigned VectorizerParams::MaxVectorWidth = 64;
+/// Maximum LMUL factor.
+const unsigned VectorizerParams::MaxVectorLMUL = 8;
 
 /// We collect dependences up to this threshold.
 static cl::opt<unsigned>
@@ -1764,31 +1766,64 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
   // cause any slowdowns.
   const uint64_t NumItersForStoreLoadThroughMemory = 8 * TypeByteSize;
   // Maximum vector factor.
-  uint64_t MaxVFWithoutSLForwardIssues = std::min(
-      VectorizerParams::MaxVectorWidth * TypeByteSize, MinDepDistBytes);
+  uint64_t MaxVFWithoutSLForwardIssuesPowerOf2 =
+      std::min(VectorizerParams::MaxVectorWidth * TypeByteSize,
+               MaxStoreLoadForwardSafeVF.first);
+  uint64_t MaxVFWithoutSLForwardIssuesNonPowerOf2 =
+      std::min(VectorizerParams::MaxVectorLMUL *
+                   VectorizerParams::MaxVectorWidth * TypeByteSize,
+               MaxStoreLoadForwardSafeVF.second);
 
   // Compute the smallest VF at which the store and load would be misaligned.
-  for (uint64_t VF = 2 * TypeByteSize; VF <= MaxVFWithoutSLForwardIssues;
-       VF *= 2) {
+  for (uint64_t VF = 2 * TypeByteSize;
+       VF <= MaxVFWithoutSLForwardIssuesPowerOf2; VF *= 2) {
     // If the number of vector iteration between the store and the load are
     // small we could incur conflicts.
     if (Distance % VF && Distance / VF < NumItersForStoreLoadThroughMemory) {
-      MaxVFWithoutSLForwardIssues = (VF >> 1);
+      MaxVFWithoutSLForwardIssuesPowerOf2 = (VF >> 1);
+      break;
+    }
+  }
+  // RISCV VLA supports non-power-2 vector factor. So, we iterate in a
+  // backward order to find largest VF, which allows aligned stores-loads or
+  // the number of iterations between conflicting memory addresses is not less
+  // than 8 (NumItersForStoreLoadThroughMemory).
+  for (uint64_t VF = MaxVFWithoutSLForwardIssuesNonPowerOf2,
+                E = 2 * TypeByteSize;
+       VF >= E; VF -= TypeByteSize) {
+    if (Distance % VF == 0 ||
+        Distance / VF >= NumItersForStoreLoadThroughMemory) {
+      uint64_t GCD = MaxStoreLoadForwardSafeVF.second ==
+                             std::numeric_limits<uint64_t>::max()
+                         ? VF
+                         : std::gcd(MaxStoreLoadForwardSafeVF.second, VF);
+      MaxVFWithoutSLForwardIssuesNonPowerOf2 = GCD;
       break;
     }
   }
 
-  if (MaxVFWithoutSLForwardIssues < 2 * TypeByteSize) {
+  if (MaxVFWithoutSLForwardIssuesPowerOf2 < 2 * TypeByteSize &&
+      MaxVFWithoutSLForwardIssuesNonPowerOf2 < 2 * TypeByteSize) {
     LLVM_DEBUG(
         dbgs() << "LAA: Distance " << Distance
                << " that could cause a store-load forwarding conflict\n");
     return true;
   }
 
-  if (MaxVFWithoutSLForwardIssues < MinDepDistBytes &&
-      MaxVFWithoutSLForwardIssues !=
-          VectorizerParams::MaxVectorWidth * TypeByteSize)
-    MinDepDistBytes = MaxVFWithoutSLForwardIssues;
+  if (MaxVFWithoutSLForwardIssuesPowerOf2 < 2 * TypeByteSize)
+    MaxStoreLoadForwardSafeVF.first = 1;
+  else if (MaxVFWithoutSLForwardIssuesPowerOf2 <
+               MaxStoreLoadForwardSafeVF.first &&
+           MaxVFWithoutSLForwardIssuesPowerOf2 !=
+               VectorizerParams::MaxVectorWidth * TypeByteSize)
+    MaxStoreLoadForwardSafeVF.first = MaxVFWithoutSLForwardIssuesPowerOf2;
+  if (MaxVFWithoutSLForwardIssuesNonPowerOf2 < 2 * TypeByteSize)
+    MaxStoreLoadForwardSafeVF.second = 1;
+  else if (MaxVFWithoutSLForwardIssuesNonPowerOf2 <
+               MaxStoreLoadForwardSafeVF.second &&
+           MaxVFWithoutSLForwardIssuesNonPowerOf2 !=
+               VectorizerParams::MaxVectorWidth * TypeByteSize)
+    MaxStoreLoadForwardSafeVF.second = MaxVFWithoutSLForwardIssuesNonPowerOf2;
   return false;
 }
 
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 09ca859f52680..28e814e9c89e9 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1444,9 +1444,8 @@ class LoopVectorizationCostModel {
 
   /// Selects and saves TailFoldingStyle for 2 options - if IV update may
   /// overflow or not.
-  /// \param IsScalableVF true if scalable vector factors enabled.
   /// \param UserIC User specific interleave count.
-  void setTailFoldingStyles(bool IsScalableVF, unsigned UserIC) {
+  void setTailFoldingStyles(unsigned UserIC) {
     assert(!ChosenTailFoldingStyle && "Tail folding must not be selected yet.");
     if (!Legal->canFoldTailByMasking()) {
       ChosenTailFoldingStyle =
@@ -1470,11 +1469,9 @@ class LoopVectorizationCostModel {
     // FIXME: use actual opcode/data type for analysis here.
     // FIXME: Investigate opportunity for fixed vector factor.
     bool EVLIsLegal =
-        IsScalableVF && UserIC <= 1 &&
+        UserIC <= 1 &&
         TTI.hasActiveVectorLength(0, nullptr, Align()) &&
-        !EnableVPlanNativePath &&
-        // FIXME: implement support for max safe dependency distance.
-        Legal->isSafeForAnyVectorWidth();
+        !EnableVPlanNativePath;
     if (!EVLIsLegal) {
       // If for some reason EVL mode is unsupported, fallback to
       // DataWithoutLaneMask to try to vectorize the loop with folded tail
@@ -1492,6 +1489,14 @@ class LoopVectorizationCostModel {
     }
   }
 
+  /// Disables previously chosen tail folding policy, sets it to None. Expects,
+  /// that the tail policy was selected.
+  void disableTailFolding() {
+    assert(ChosenTailFoldingStyle && "Tail folding must be selected.");
+    ChosenTailFoldingStyle =
+        std::make_pair(TailFoldingStyle::None, TailFoldingStyle::None);
+  }
+
   /// Returns true if all loop blocks should be masked to fold tail loop.
   bool foldTailByMasking() const {
     // TODO: check if it is possible to check for None style independent of
@@ -1499,6 +1504,14 @@ class LoopVectorizationCostModel {
     return getTailFoldingStyle() != TailFoldingStyle::None;
   }
 
+  /// Return maximum safe number of elements to be processed, which do not
+  /// prevent store-load forwarding.
+  /// TODO: need to consider adjusting cost model to use this value as a
+  /// vectorization factor for EVL-based vectorization.
+  std::optional<unsigned> getMaxEVLSafeElements() const {
+    return MaxEVLSafeElements;
+  }
+
   /// Returns true if the instructions in this block requires predication
   /// for any reason, e.g. because tail folding now requires a predicate
   /// or because the block in the original loop was predicated.
@@ -1654,6 +1667,10 @@ class LoopVectorizationCostModel {
   /// true if scalable vectorization is supported and enabled.
   std::optional<bool> IsScalableVectorizationAllowed;
 
+  /// Maximum safe number of elements to be processed, which do not
+  /// prevent store-load forwarding.
+  std::optional<unsigned> MaxEVLSafeElements;
+
   /// A map holding scalar costs for different vectorization factors. The
   /// presence of a cost for an instruction in the mapping indicates that the
   /// instruction will be scalarized when vectorizing with the associated
@@ -3903,11 +3920,31 @@ FixedScalableVFPair LoopVectorizationCostModel::computeFeasibleMaxVF(
   // It is computed by MaxVF * sizeOf(type) * 8, where type is taken from
   // the memory accesses that is most restrictive (involved in the smallest
   // dependence distance).
-  unsigned MaxSafeElements =
-      llvm::bit_floor(Legal->getMaxSafeVectorWidthInBits() / WidestType);
+  unsigned MaxSafeElements = Legal->getMaxSafeVectorWidthInBits() / WidestType;
+  if (Legal->isSafeForAnyVectorWidth())
+    MaxSafeElements = PowerOf2Ceil(MaxSafeElements);
+  unsigned MaxFixedSafeElements = std::gcd(
+      MaxSafeElements,
+      Legal->getMaxStoreLoadForwardSafeVFPowerOf2().value_or(MaxSafeElements));
+  MaxFixedSafeElements = bit_floor(MaxFixedSafeElements);
+  unsigned MaxScalableSafeElements = MaxFixedSafeElements;
+  if (foldTailWithEVL()) {
+    MaxScalableSafeElements = std::numeric_limits<unsigned>::max();
+    std::optional<unsigned> SafeStoreLoadForwarding =
+        Legal->getMaxStoreLoadForwardSafeVFNonPowerOf2();
+    if (!Legal->isSafeForAnyVectorWidth() || SafeStoreLoadForwarding) {
+      unsigned SLForwardDist =
+          Legal->getMaxStoreLoadForwardSafeVFNonPowerOf2().value_or(
+              MaxSafeElements);
+      if (MaxSafeElements >= SLForwardDist)
+        MaxEVLSafeElements = SLForwardDist;
+      else
+        MaxEVLSafeElements = std::gcd(MaxSafeElements, SLForwardDist);
+    }
+  }
 
-  auto MaxSafeFixedVF = ElementCount::getFixed(MaxSafeElements);
-  auto MaxSafeScalableVF = getMaxLegalScalableVF(MaxSafeElements);
+  auto MaxSafeFixedVF = ElementCount::getFixed(MaxFixedSafeElements);
+  auto MaxSafeScalableVF = getMaxLegalScalableVF(MaxScalableSafeElements);
 
   LLVM_DEBUG(dbgs() << "LV: The max safe fixed VF is: " << MaxSafeFixedVF
                     << ".\n");
@@ -4077,7 +4114,13 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
     InterleaveInfo.invalidateGroupsRequiringScalarEpilogue();
   }
 
-  FixedScalableVFPair MaxFactors = computeFeasibleMaxVF(MaxTC, UserVF, true);
+  // If we don't know the precise trip count, or if the trip count that we
+  // found modulo the vectorization factor is not zero, try to fold the tail
+  // by masking.
+  // FIXME: look for a smaller MaxVF that does divide TC rather than masking.
+  setTailFoldingStyles(UserIC);
+  FixedScalableVFPair MaxFactors =
+      computeFeasibleMaxVF(MaxTC, UserVF, foldTailByMasking());
 
   // Avoid tail folding if the trip count is known to be a multiple of any VF
   // we choose.
@@ -4108,15 +4151,11 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
     if (Rem->isZero()) {
       // Accept MaxFixedVF if we do not have a tail.
       LLVM_DEBUG(dbgs() << "LV: No tail will remain for any chosen VF.\n");
+      disableTailFolding();
       return MaxFactors;
     }
   }
 
-  // If we don't know the precise trip count, or if the trip count that we
-  // found modulo the vectorization factor is not zero, try to fold the tail
-  // by masking.
-  // FIXME: look for a smaller MaxVF that does divide TC rather than masking.
-  setTailFoldingStyles(MaxFactors.ScalableVF.isScalable(), UserIC);
   if (foldTailByMasking()) {
     if (getTailFoldingStyle() == TailFoldingStyle::DataWithEVL) {
       LLVM_DEBUG(
@@ -8388,8 +8427,8 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
       VPlanTransforms::optimize(*Plan, *PSE.getSE());
       // TODO: try to put it close to addActiveLaneMask().
       // Discard the plan if it is not EVL-compatible
-      if (CM.foldTailWithEVL() &&
-          !VPlanTransforms::tryAddExplicitVectorLength(*Plan))
+      if (CM.foldTailWithEVL() && !VPlanTransforms::tryAddExplicitVectorLength(
+                                      *Plan, CM.getMaxEVLSafeElements()))
         break;
       assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
       VPlans.push_back(std::move(Plan));
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 2d6d67a55c17d..de24688593ebe 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -471,6 +471,11 @@ Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) {
       assert(State.VF.isScalable() && "Expected scalable vector factor.");
       Value *VFArg = State.Builder.getInt32(State.VF.getKnownMinValue());
 
+      if (getNumOperands() == 3) {
+        Value *MaxSafeVF = State.get(getOperand(2), VPIteration(0, 0));
+        AVL = State.Builder.CreateBinaryIntrinsic(Intrinsic::umin, AVL,
+                                                  MaxSafeVF);
+      }
       Value *EVL = State.Builder.CreateIntrinsic(
           State.Builder.getInt32Ty(), Intrinsic::experimental_get_vector_length,
           {AVL, VFArg, State.Builder.getTrue()});
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index c91fd0f118e31..e703bb893d938 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1427,7 +1427,8 @@ void VPlanTransforms::addActiveLaneMask(
 /// %NextEVLIV = add IVSize (cast i32 %VPEVVL to IVSize), %EVLPhi
 /// ...
 ///
-bool VPlanTransforms::tryAddExplicitVectorLength(VPlan &Plan) {
+bool VPlanTransforms::tryAddExplicitVectorLength(
+    VPlan &Plan, const std::optional<unsigned> &MaxEVLSafeElements) {
   VPBasicBlock *Header = Plan.getVectorLoopRegion()->getEntryBasicBlock();
   // The transform updates all users of inductions to work based on EVL, instead
   // of the VF directly. At the moment, widened inductions cannot be updated, so
@@ -1452,8 +1453,12 @@ bool VPlanTransforms::tryAddExplicitVectorLength(VPlan &Plan) {
   // Create the ExplicitVectorLengthPhi recipe in the main loop.
   auto *EVLPhi = new VPEVLBasedIVPHIRecipe(StartV, DebugLoc());
   EVLPhi->insertAfter(CanonicalIVPHI);
-  auto *VPEVL = new VPInstruction(VPInstruction::ExplicitVectorLength,
-                                  {EVLPhi, Plan.getTripCount()});
+  SmallVector<VPValue *, 3> Operands = {EVLPhi, Plan.getTripCount()};
+  if (MaxEVLSafeElements)
+    Operands.push_back(Plan.getOrAddLiveIn(ConstantInt::get(
+        CanonicalIVPHI->getScalarType(), *MaxEVLSafeElements)));
+  auto *VPEVL = new VPInstruction(VPInstruction::ExplicitVectorLength, Operands,
+                                  DebugLoc());
   VPEVL->insertBefore(*Header, Header->getFirstNonPhi());
 
   auto *CanonicalIVIncrement =
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 96b8a6639723c..8158c832f1a95 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -105,7 +105,9 @@ struct VPlanTransforms {
   /// VPCanonicalIVPHIRecipe is only used to control the loop after
   /// this transformation.
   /// \returns true if the transformation succeeds, or false if it doesn't.
-  static bool tryAddExplicitVectorLength(VPlan &Plan);
+  static bool
+  tryAddExplicitVectorLength(VPlan &Plan,
+                             const std::optional<unsigned> &MaxEVLSafeElements);
 };
 
 } // namespace llvm
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll b/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll
index 81d8b01fe7fb7..c5ba25a5c0ace 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll
@@ -140,11 +140,11 @@ define void @neg_dist_dep_type_size_equivalence(ptr nocapture %vec, i64 %n) {
 ; CHECK-NEXT:            %ld.i64 = load i64, ptr %gep.iv, align 8 ->
 ; CHECK-NEXT:            store i32 %ld.i64.i32, ptr %gep.iv.n.i64, align 8
 ; CHECK-EMPTY:
-; CHECK-NEXT:        BackwardVectorizableButPreventsForwarding:
+; CHECK-NEXT:        BackwardVectorizable:
 ; CHECK-NEXT:            %ld.f64 = load double, ptr %gep.iv, align 8 ->
 ; CHECK-NEXT:            store double %val, ptr %gep.iv.101.i64, align 8
 ; CHECK-EMPTY:
-; CHECK-NEXT:        ForwardButPreventsForwarding:
+; CHECK-NEXT:        Forward:
 ; CHECK-NEXT:            store double %val, ptr %gep.iv.101.i64, align 8 ->
 ; CHECK-NEXT:            %ld.i64 = load i64, ptr %gep.iv, align 8
 ; CHECK-EMPTY:
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/forward-loop-independent.ll b/llvm/test/Analysis/LoopAccessAnalysis/forward-loop-independent.ll
index 7fc9958dba552..6e4bcec013a73 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/forward-loop-independent.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/forward-loop-independent.ll
@@ -24,14 +24,13 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 define void @f(ptr noalias %A, ptr noalias %B, ptr noalias %C, i64 %N) {
 ; CHECK-LABEL: 'f'
 ; CHECK-NEXT:    for.body:
-; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
-; CHECK-NEXT:  Forward loop carried data dependence that prevents store-to-load forwarding.
+; CHECK-NEXT:      Memory dependences are safe
 ; CHECK-NEXT:      Dependences:
 ; CHECK-NEXT:        Forward:
 ; CHECK-NEXT:            store i32 %b_p1, ptr %Aidx, align 4 ->
 ; CHECK-NEXT:            %a = load i32, ptr %Aidx, align 4
 ; CHECK-EMPTY:
-; CHECK-NEXT:        ForwardButPreventsForwarding:
+; CHECK-NEXT:        Forward:
 ; CHECK-NEXT:            store i32 %b_p2, ptr %Aidx_next, align 4 ->
 ; CHECK-NEXT:            %a = load i32, ptr %Aidx, align 4
 ; CHECK-EMPTY:
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/forward-negative-step.ll b/llvm/test/Analysis/LoopAccessAnalysis/forward-negative-step.ll
...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/100755


More information about the llvm-commits mailing list