[llvm] [LV][LAA]Add initial support for non-power-of-2 store-load forwarding distance (PR #137873)

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 29 13:44:40 PDT 2025


https://github.com/alexey-bataev created https://github.com/llvm/llvm-project/pull/137873

This patch adds initial support for non-power-of-2 store-load forwarding
distance for targets, which (potentially!) support it.


>From 6142fdf0261e79bcd3e93b3ff63ecc5eafa9c593 Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev at outlook.com>
Date: Tue, 29 Apr 2025 20:44:31 +0000
Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20initia?=
 =?UTF-8?q?l=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.5
---
 .../llvm/Analysis/LoopAccessAnalysis.h        |  43 +++-
 .../Vectorize/LoopVectorizationLegality.h     |   2 +-
 llvm/lib/Analysis/LoopAccessAnalysis.cpp      |  57 ++++-
 .../safe-with-dep-distance-non-power-of-2.ll  | 204 ++++++++++++------
 4 files changed, 218 insertions(+), 88 deletions(-)

diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index f715e0ec8dbb4..02647adea95a8 100644
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -180,9 +180,10 @@ class MemoryDepChecker {
 
   MemoryDepChecker(PredicatedScalarEvolution &PSE, const Loop *L,
                    const DenseMap<Value *, const SCEV *> &SymbolicStrides,
-                   unsigned MaxTargetVectorWidthInBits)
+                   unsigned MaxTargetVectorWidthInBits, bool AllowNonPow2Deps)
       : PSE(PSE), InnermostLoop(L), SymbolicStrides(SymbolicStrides),
-        MaxTargetVectorWidthInBits(MaxTargetVectorWidthInBits) {}
+        MaxTargetVectorWidthInBits(MaxTargetVectorWidthInBits),
+        AllowNonPow2Deps(AllowNonPow2Deps) {}
 
   /// Register the location (instructions are given increasing numbers)
   /// of a write access.
@@ -218,17 +219,29 @@ class MemoryDepChecker {
 
   /// Return true if there are no store-load forwarding dependencies.
   bool isSafeForAnyStoreLoadForwardDistances() const {
-    return MaxStoreLoadForwardSafeDistanceInBits ==
-           std::numeric_limits<uint64_t>::max();
+    return MaxPowerOf2StoreLoadForwardSafeDistanceInBits ==
+               std::numeric_limits<uint64_t>::max() &&
+           MaxNonPowerOf2StoreLoadForwardSafeDistanceInBits ==
+               std::numeric_limits<uint64_t>::max();
   }
 
-  /// Return safe power-of-2 number of elements, which do not prevent store-load
-  /// forwarding, multiplied by the size of the elements in bits.
-  uint64_t getStoreLoadForwardSafeDistanceInBits() const {
+  /// Return safe number of elements, which do not prevent store-load
+  /// forwarding, multiplied by the size of the elements in bits (power-of-2).
+  uint64_t getPowerOf2StoreLoadForwardSafeDistanceInBits() const {
     assert(!isSafeForAnyStoreLoadForwardDistances() &&
            "Expected the distance, that prevent store-load forwarding, to be "
            "set.");
-    return MaxStoreLoadForwardSafeDistanceInBits;
+    return MaxPowerOf2StoreLoadForwardSafeDistanceInBits;
+  }
+
+  /// Return safe number of elements, which do not prevent store-load
+  /// forwarding, multiplied by the size of the elements in bits
+  /// (non-power-of-2).
+  uint64_t getNonPowerOf2StoreLoadForwardSafeDistanceInBits() const {
+    assert(!isSafeForAnyStoreLoadForwardDistances() &&
+           "Expected the distance, that prevent store-load forwarding, to be "
+           "set.");
+    return MaxNonPowerOf2StoreLoadForwardSafeDistanceInBits;
   }
 
   /// In same cases when the dependency check fails we can still
@@ -319,9 +332,14 @@ class MemoryDepChecker {
   /// restrictive.
   uint64_t MaxSafeVectorWidthInBits = -1U;
 
-  /// Maximum power-of-2 number of elements, which do not prevent store-load
-  /// forwarding, multiplied by the size of the elements in bits.
-  uint64_t MaxStoreLoadForwardSafeDistanceInBits =
+  /// Maximum number of elements, which do not prevent store-load forwarding,
+  /// multiplied by the size of the elements in bits (power-of-2).
+  uint64_t MaxPowerOf2StoreLoadForwardSafeDistanceInBits =
+      std::numeric_limits<uint64_t>::max();
+
+  /// Maximum number of elements, which do not prevent store-load forwarding,
+  /// multiplied by the size of the elements in bits (non-power-of-2).
+  uint64_t MaxNonPowerOf2StoreLoadForwardSafeDistanceInBits =
       std::numeric_limits<uint64_t>::max();
 
   /// If we see a non-constant dependence distance we can still try to
@@ -348,6 +366,9 @@ class MemoryDepChecker {
   /// backwards-vectorizable or unknown (triggering a runtime check).
   unsigned MaxTargetVectorWidthInBits = 0;
 
+  /// True if current target supports non-power-of-2 dependence distances.
+  bool AllowNonPow2Deps = false;
+
   /// Mapping of SCEV expressions to their expanded pointer bounds (pair of
   /// start and end pointer expressions).
   DenseMap<std::pair<const SCEV *, Type *>,
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index d654ac3ec9273..65d9938c8a0cd 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -415,7 +415,7 @@ class LoopVectorizationLegality {
   /// Return safe power-of-2 number of elements, which do not prevent store-load
   /// forwarding and safe to operate simultaneously.
   uint64_t getMaxStoreLoadForwardSafeDistanceInBits() const {
-    return LAI->getDepChecker().getStoreLoadForwardSafeDistanceInBits();
+    return LAI->getDepChecker().getPowerOf2StoreLoadForwardSafeDistanceInBits();
   }
 
   /// Returns true if vector representation of the instruction \p I
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index c65bb8be8b996..30fd50bd15303 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -1757,7 +1757,8 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
   // Maximum vector factor.
   uint64_t MaxVFWithoutSLForwardIssuesPowerOf2 =
       std::min(VectorizerParams::MaxVectorWidth * TypeByteSize,
-               MaxStoreLoadForwardSafeDistanceInBits);
+               MaxPowerOf2StoreLoadForwardSafeDistanceInBits);
+  uint64_t MaxVFWithoutSLForwardIssuesNonPowerOf2 = 0;
 
   // Compute the smallest VF at which the store and load would be misaligned.
   for (uint64_t VF = 2 * TypeByteSize;
@@ -1769,24 +1770,61 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
       break;
     }
   }
+  // RISCV VLA supports non-power-2 vector factor. So, we iterate in a
+  // backward order to find largest VF, which allows aligned stores-loads or
+  // the number of iterations between conflicting memory addresses is not less
+  // than 8 (NumItersForStoreLoadThroughMemory).
+  if (AllowNonPow2Deps) {
+    MaxVFWithoutSLForwardIssuesNonPowerOf2 =
+        std::min(8 * VectorizerParams::MaxVectorWidth / TypeByteSize,
+                 MaxNonPowerOf2StoreLoadForwardSafeDistanceInBits);
+
+    for (uint64_t VF = MaxVFWithoutSLForwardIssuesNonPowerOf2;
+         VF > MaxVFWithoutSLForwardIssuesPowerOf2; VF -= TypeByteSize) {
+      if (Distance % VF == 0 ||
+          Distance / VF >= NumItersForStoreLoadThroughMemory) {
+        uint64_t GCD =
+            isSafeForAnyStoreLoadForwardDistances()
+                ? VF
+                : std::gcd(MaxNonPowerOf2StoreLoadForwardSafeDistanceInBits,
+                           VF);
+        MaxVFWithoutSLForwardIssuesNonPowerOf2 = GCD;
+        break;
+      }
+    }
+  }
 
-  if (MaxVFWithoutSLForwardIssuesPowerOf2 < 2 * TypeByteSize) {
+  if (MaxVFWithoutSLForwardIssuesPowerOf2 < 2 * TypeByteSize &&
+      MaxVFWithoutSLForwardIssuesNonPowerOf2 < 2 * TypeByteSize) {
     LLVM_DEBUG(
         dbgs() << "LAA: Distance " << Distance
                << " that could cause a store-load forwarding conflict\n");
     return true;
   }
 
+  // Handle non-power-2 store-load forwarding distance, power-of-2 distance can
+  // be calculated.
+  if (AllowNonPow2Deps && CommonStride &&
+      MaxVFWithoutSLForwardIssuesNonPowerOf2 <
+          MaxNonPowerOf2StoreLoadForwardSafeDistanceInBits &&
+      MaxVFWithoutSLForwardIssuesNonPowerOf2 !=
+          8 * VectorizerParams::MaxVectorWidth / TypeByteSize) {
+    uint64_t MaxVF = MaxVFWithoutSLForwardIssuesNonPowerOf2 / CommonStride;
+    uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
+    MaxNonPowerOf2StoreLoadForwardSafeDistanceInBits =
+        std::min(MaxNonPowerOf2StoreLoadForwardSafeDistanceInBits, MaxVFInBits);
+  }
+
   if (CommonStride &&
       MaxVFWithoutSLForwardIssuesPowerOf2 <
-          MaxStoreLoadForwardSafeDistanceInBits &&
+          MaxPowerOf2StoreLoadForwardSafeDistanceInBits &&
       MaxVFWithoutSLForwardIssuesPowerOf2 !=
           VectorizerParams::MaxVectorWidth * TypeByteSize) {
     uint64_t MaxVF =
         bit_floor(MaxVFWithoutSLForwardIssuesPowerOf2 / CommonStride);
     uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
-    MaxStoreLoadForwardSafeDistanceInBits =
-        std::min(MaxStoreLoadForwardSafeDistanceInBits, MaxVFInBits);
+    MaxPowerOf2StoreLoadForwardSafeDistanceInBits =
+        std::min(MaxPowerOf2StoreLoadForwardSafeDistanceInBits, MaxVFInBits);
   }
   return false;
 }
@@ -2985,8 +3023,9 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
     MaxTargetVectorWidthInBits =
         TTI->getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector) * 2;
 
-  DepChecker = std::make_unique<MemoryDepChecker>(*PSE, L, SymbolicStrides,
-                                                  MaxTargetVectorWidthInBits);
+  DepChecker = std::make_unique<MemoryDepChecker>(
+      *PSE, L, SymbolicStrides, MaxTargetVectorWidthInBits,
+      TTI && TTI->hasActiveVectorLength(0, nullptr, Align()));
   PtrRtChecking = std::make_unique<RuntimePointerChecking>(*DepChecker, SE);
   if (canAnalyzeLoop())
     CanVecMem = analyzeLoop(AA, LI, TLI, DT);
@@ -3000,7 +3039,9 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
       OS << " with a maximum safe vector width of "
          << DC.getMaxSafeVectorWidthInBits() << " bits";
     if (!DC.isSafeForAnyStoreLoadForwardDistances()) {
-      uint64_t SLDist = DC.getStoreLoadForwardSafeDistanceInBits();
+      uint64_t SLDist = DC.getNonPowerOf2StoreLoadForwardSafeDistanceInBits();
+      if (SLDist == std::numeric_limits<uint64_t>::max())
+        SLDist = DC.getPowerOf2StoreLoadForwardSafeDistanceInBits();
       OS << ", with a maximum safe store-load forward width of " << SLDist
          << " bits";
     }
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/safe-with-dep-distance-non-power-of-2.ll b/llvm/test/Analysis/LoopAccessAnalysis/safe-with-dep-distance-non-power-of-2.ll
index 79dcfd2c4c08d..15fb79807b965 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/safe-with-dep-distance-non-power-of-2.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/safe-with-dep-distance-non-power-of-2.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -passes='print<access-info>' -disable-output -mtriple=riscv64 -mattr=+v < %s 2>&1 | FileCheck %s
-; RUN: opt -passes='print<access-info>' -disable-output -mtriple=x86_64 < %s 2>&1 | FileCheck %s
+; RUN: opt -passes='print<access-info>' -disable-output -mtriple=riscv64 -mattr=+v < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,RISCV64
+; RUN: opt -passes='print<access-info>' -disable-output -mtriple=x86_64 < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,X86_64
 
 ; REQUIRES: riscv-registered-target, x86-registered-target
 
@@ -41,21 +41,37 @@ exit:
 ; Dependence distance is less than trip count, thus we must prove that
 ; chosen VF guaranteed to be less than dependence distance.
 define void @test_may_clobber1(ptr %p) {
-; CHECK-LABEL: 'test_may_clobber1'
-; CHECK-NEXT:    loop:
-; CHECK-NEXT:      Memory dependences are safe with a maximum safe vector width of 6400 bits, with a maximum safe store-load forward width of 256 bits
-; CHECK-NEXT:      Dependences:
-; CHECK-NEXT:        BackwardVectorizable:
-; CHECK-NEXT:            %v = load i64, ptr %a1, align 32 ->
-; CHECK-NEXT:            store i64 %v, ptr %a2, align 32
-; CHECK-EMPTY:
-; CHECK-NEXT:      Run-time memory checks:
-; CHECK-NEXT:      Grouped accesses:
-; CHECK-EMPTY:
-; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
-; CHECK-NEXT:      SCEV assumptions:
-; CHECK-EMPTY:
-; CHECK-NEXT:      Expressions re-written:
+; RISCV64-LABEL: 'test_may_clobber1'
+; RISCV64-NEXT:    loop:
+; RISCV64-NEXT:      Memory dependences are safe with a maximum safe vector width of 6400 bits, with a maximum safe store-load forward width of 320 bits
+; RISCV64-NEXT:      Dependences:
+; RISCV64-NEXT:        BackwardVectorizable:
+; RISCV64-NEXT:            %v = load i64, ptr %a1, align 32 ->
+; RISCV64-NEXT:            store i64 %v, ptr %a2, align 32
+; RISCV64-EMPTY:
+; RISCV64-NEXT:      Run-time memory checks:
+; RISCV64-NEXT:      Grouped accesses:
+; RISCV64-EMPTY:
+; RISCV64-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; RISCV64-NEXT:      SCEV assumptions:
+; RISCV64-EMPTY:
+; RISCV64-NEXT:      Expressions re-written:
+;
+; X86_64-LABEL: 'test_may_clobber1'
+; X86_64-NEXT:    loop:
+; X86_64-NEXT:      Memory dependences are safe with a maximum safe vector width of 6400 bits, with a maximum safe store-load forward width of 256 bits
+; X86_64-NEXT:      Dependences:
+; X86_64-NEXT:        BackwardVectorizable:
+; X86_64-NEXT:            %v = load i64, ptr %a1, align 32 ->
+; X86_64-NEXT:            store i64 %v, ptr %a2, align 32
+; X86_64-EMPTY:
+; X86_64-NEXT:      Run-time memory checks:
+; X86_64-NEXT:      Grouped accesses:
+; X86_64-EMPTY:
+; X86_64-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; X86_64-NEXT:      SCEV assumptions:
+; X86_64-EMPTY:
+; X86_64-NEXT:      Expressions re-written:
 ;
 entry:
   br label %loop
@@ -76,22 +92,38 @@ exit:
 }
 
 define void @test_may_clobber2(ptr %p) {
-; CHECK-LABEL: 'test_may_clobber2'
-; CHECK-NEXT:    loop:
-; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
-; CHECK-NEXT:  Backward loop carried data dependence that prevents store-to-load forwarding.
-; CHECK-NEXT:      Dependences:
-; CHECK-NEXT:        BackwardVectorizableButPreventsForwarding:
-; CHECK-NEXT:            %v = load i64, ptr %a1, align 32 ->
-; CHECK-NEXT:            store i64 %v, ptr %a2, align 32
-; CHECK-EMPTY:
-; CHECK-NEXT:      Run-time memory checks:
-; CHECK-NEXT:      Grouped accesses:
-; CHECK-EMPTY:
-; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
-; CHECK-NEXT:      SCEV assumptions:
-; CHECK-EMPTY:
-; CHECK-NEXT:      Expressions re-written:
+; RISCV64-LABEL: 'test_may_clobber2'
+; RISCV64-NEXT:    loop:
+; RISCV64-NEXT:      Memory dependences are safe with a maximum safe vector width of 576 bits, with a maximum safe store-load forward width of 192 bits
+; RISCV64-NEXT:      Dependences:
+; RISCV64-NEXT:        BackwardVectorizable:
+; RISCV64-NEXT:            %v = load i64, ptr %a1, align 32 ->
+; RISCV64-NEXT:            store i64 %v, ptr %a2, align 32
+; RISCV64-EMPTY:
+; RISCV64-NEXT:      Run-time memory checks:
+; RISCV64-NEXT:      Grouped accesses:
+; RISCV64-EMPTY:
+; RISCV64-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; RISCV64-NEXT:      SCEV assumptions:
+; RISCV64-EMPTY:
+; RISCV64-NEXT:      Expressions re-written:
+;
+; X86_64-LABEL: 'test_may_clobber2'
+; X86_64-NEXT:    loop:
+; X86_64-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; X86_64-NEXT:  Backward loop carried data dependence that prevents store-to-load forwarding.
+; X86_64-NEXT:      Dependences:
+; X86_64-NEXT:        BackwardVectorizableButPreventsForwarding:
+; X86_64-NEXT:            %v = load i64, ptr %a1, align 32 ->
+; X86_64-NEXT:            store i64 %v, ptr %a2, align 32
+; X86_64-EMPTY:
+; X86_64-NEXT:      Run-time memory checks:
+; X86_64-NEXT:      Grouped accesses:
+; X86_64-EMPTY:
+; X86_64-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; X86_64-NEXT:      SCEV assumptions:
+; X86_64-EMPTY:
+; X86_64-NEXT:      Expressions re-written:
 ;
 entry:
   br label %loop
@@ -112,21 +144,37 @@ exit:
 }
 
 define void @test_may_clobber3(ptr %p) {
-; CHECK-LABEL: 'test_may_clobber3'
-; CHECK-NEXT:    loop:
-; CHECK-NEXT:      Memory dependences are safe with a maximum safe vector width of 640 bits, with a maximum safe store-load forward width of 128 bits
-; CHECK-NEXT:      Dependences:
-; CHECK-NEXT:        BackwardVectorizable:
-; CHECK-NEXT:            %v = load i64, ptr %a1, align 32 ->
-; CHECK-NEXT:            store i64 %v, ptr %a2, align 32
-; CHECK-EMPTY:
-; CHECK-NEXT:      Run-time memory checks:
-; CHECK-NEXT:      Grouped accesses:
-; CHECK-EMPTY:
-; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
-; CHECK-NEXT:      SCEV assumptions:
-; CHECK-EMPTY:
-; CHECK-NEXT:      Expressions re-written:
+; RISCV64-LABEL: 'test_may_clobber3'
+; RISCV64-NEXT:    loop:
+; RISCV64-NEXT:      Memory dependences are safe with a maximum safe vector width of 640 bits, with a maximum safe store-load forward width of 320 bits
+; RISCV64-NEXT:      Dependences:
+; RISCV64-NEXT:        BackwardVectorizable:
+; RISCV64-NEXT:            %v = load i64, ptr %a1, align 32 ->
+; RISCV64-NEXT:            store i64 %v, ptr %a2, align 32
+; RISCV64-EMPTY:
+; RISCV64-NEXT:      Run-time memory checks:
+; RISCV64-NEXT:      Grouped accesses:
+; RISCV64-EMPTY:
+; RISCV64-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; RISCV64-NEXT:      SCEV assumptions:
+; RISCV64-EMPTY:
+; RISCV64-NEXT:      Expressions re-written:
+;
+; X86_64-LABEL: 'test_may_clobber3'
+; X86_64-NEXT:    loop:
+; X86_64-NEXT:      Memory dependences are safe with a maximum safe vector width of 640 bits, with a maximum safe store-load forward width of 128 bits
+; X86_64-NEXT:      Dependences:
+; X86_64-NEXT:        BackwardVectorizable:
+; X86_64-NEXT:            %v = load i64, ptr %a1, align 32 ->
+; X86_64-NEXT:            store i64 %v, ptr %a2, align 32
+; X86_64-EMPTY:
+; X86_64-NEXT:      Run-time memory checks:
+; X86_64-NEXT:      Grouped accesses:
+; X86_64-EMPTY:
+; X86_64-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; X86_64-NEXT:      SCEV assumptions:
+; X86_64-EMPTY:
+; X86_64-NEXT:      Expressions re-written:
 ;
 entry:
   br label %loop
@@ -215,26 +263,46 @@ exit:
 }
 
 define void @non_power_2_storeloadforward(ptr %A) {
-; CHECK-LABEL: 'non_power_2_storeloadforward'
-; CHECK-NEXT:    loop:
-; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
-; CHECK-NEXT:  Backward loop carried data dependence that prevents store-to-load forwarding.
-; CHECK-NEXT:      Dependences:
-; CHECK-NEXT:        Forward:
-; CHECK-NEXT:            %3 = load i32, ptr %gep.iv.4, align 4 ->
-; CHECK-NEXT:            store i32 %add3, ptr %gep.iv, align 4
-; CHECK-EMPTY:
-; CHECK-NEXT:        BackwardVectorizableButPreventsForwarding:
-; CHECK-NEXT:            %1 = load i32, ptr %gep.iv.sub.3, align 4 ->
-; CHECK-NEXT:            store i32 %add3, ptr %gep.iv, align 4
-; CHECK-EMPTY:
-; CHECK-NEXT:      Run-time memory checks:
-; CHECK-NEXT:      Grouped accesses:
-; CHECK-EMPTY:
-; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
-; CHECK-NEXT:      SCEV assumptions:
-; CHECK-EMPTY:
-; CHECK-NEXT:      Expressions re-written:
+; RISCV64-LABEL: 'non_power_2_storeloadforward'
+; RISCV64-NEXT:    loop:
+; RISCV64-NEXT:      Memory dependences are safe with a maximum safe vector width of 96 bits, with a maximum safe store-load forward width of 96 bits
+; RISCV64-NEXT:      Dependences:
+; RISCV64-NEXT:        Forward:
+; RISCV64-NEXT:            %3 = load i32, ptr %gep.iv.4, align 4 ->
+; RISCV64-NEXT:            store i32 %add3, ptr %gep.iv, align 4
+; RISCV64-EMPTY:
+; RISCV64-NEXT:        BackwardVectorizable:
+; RISCV64-NEXT:            %1 = load i32, ptr %gep.iv.sub.3, align 4 ->
+; RISCV64-NEXT:            store i32 %add3, ptr %gep.iv, align 4
+; RISCV64-EMPTY:
+; RISCV64-NEXT:      Run-time memory checks:
+; RISCV64-NEXT:      Grouped accesses:
+; RISCV64-EMPTY:
+; RISCV64-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; RISCV64-NEXT:      SCEV assumptions:
+; RISCV64-EMPTY:
+; RISCV64-NEXT:      Expressions re-written:
+;
+; X86_64-LABEL: 'non_power_2_storeloadforward'
+; X86_64-NEXT:    loop:
+; X86_64-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; X86_64-NEXT:  Backward loop carried data dependence that prevents store-to-load forwarding.
+; X86_64-NEXT:      Dependences:
+; X86_64-NEXT:        Forward:
+; X86_64-NEXT:            %3 = load i32, ptr %gep.iv.4, align 4 ->
+; X86_64-NEXT:            store i32 %add3, ptr %gep.iv, align 4
+; X86_64-EMPTY:
+; X86_64-NEXT:        BackwardVectorizableButPreventsForwarding:
+; X86_64-NEXT:            %1 = load i32, ptr %gep.iv.sub.3, align 4 ->
+; X86_64-NEXT:            store i32 %add3, ptr %gep.iv, align 4
+; X86_64-EMPTY:
+; X86_64-NEXT:      Run-time memory checks:
+; X86_64-NEXT:      Grouped accesses:
+; X86_64-EMPTY:
+; X86_64-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; X86_64-NEXT:      SCEV assumptions:
+; X86_64-EMPTY:
+; X86_64-NEXT:      Expressions re-written:
 ;
 entry:
   br label %loop



More information about the llvm-commits mailing list