[llvm] [LV][LAA]Add initial support for non-power-of-2 store-load forwarding distance (PR #137873)
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 29 13:44:40 PDT 2025
https://github.com/alexey-bataev created https://github.com/llvm/llvm-project/pull/137873
This patch adds initial support for non-power-of-2 store-load forwarding
distance for targets, which (potentially!) support it.
>From 6142fdf0261e79bcd3e93b3ff63ecc5eafa9c593 Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev at outlook.com>
Date: Tue, 29 Apr 2025 20:44:31 +0000
Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20initia?=
=?UTF-8?q?l=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Created using spr 1.3.5
---
.../llvm/Analysis/LoopAccessAnalysis.h | 43 +++-
.../Vectorize/LoopVectorizationLegality.h | 2 +-
llvm/lib/Analysis/LoopAccessAnalysis.cpp | 57 ++++-
.../safe-with-dep-distance-non-power-of-2.ll | 204 ++++++++++++------
4 files changed, 218 insertions(+), 88 deletions(-)
diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index f715e0ec8dbb4..02647adea95a8 100644
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -180,9 +180,10 @@ class MemoryDepChecker {
MemoryDepChecker(PredicatedScalarEvolution &PSE, const Loop *L,
const DenseMap<Value *, const SCEV *> &SymbolicStrides,
- unsigned MaxTargetVectorWidthInBits)
+ unsigned MaxTargetVectorWidthInBits, bool AllowNonPow2Deps)
: PSE(PSE), InnermostLoop(L), SymbolicStrides(SymbolicStrides),
- MaxTargetVectorWidthInBits(MaxTargetVectorWidthInBits) {}
+ MaxTargetVectorWidthInBits(MaxTargetVectorWidthInBits),
+ AllowNonPow2Deps(AllowNonPow2Deps) {}
/// Register the location (instructions are given increasing numbers)
/// of a write access.
@@ -218,17 +219,29 @@ class MemoryDepChecker {
/// Return true if there are no store-load forwarding dependencies.
bool isSafeForAnyStoreLoadForwardDistances() const {
- return MaxStoreLoadForwardSafeDistanceInBits ==
- std::numeric_limits<uint64_t>::max();
+ return MaxPowerOf2StoreLoadForwardSafeDistanceInBits ==
+ std::numeric_limits<uint64_t>::max() &&
+ MaxNonPowerOf2StoreLoadForwardSafeDistanceInBits ==
+ std::numeric_limits<uint64_t>::max();
}
- /// Return safe power-of-2 number of elements, which do not prevent store-load
- /// forwarding, multiplied by the size of the elements in bits.
- uint64_t getStoreLoadForwardSafeDistanceInBits() const {
+ /// Return safe number of elements, which do not prevent store-load
+ /// forwarding, multiplied by the size of the elements in bits (power-of-2).
+ uint64_t getPowerOf2StoreLoadForwardSafeDistanceInBits() const {
assert(!isSafeForAnyStoreLoadForwardDistances() &&
"Expected the distance, that prevent store-load forwarding, to be "
"set.");
- return MaxStoreLoadForwardSafeDistanceInBits;
+ return MaxPowerOf2StoreLoadForwardSafeDistanceInBits;
+ }
+
+ /// Return safe number of elements, which do not prevent store-load
+ /// forwarding, multiplied by the size of the elements in bits
+ /// (non-power-of-2).
+ uint64_t getNonPowerOf2StoreLoadForwardSafeDistanceInBits() const {
+ assert(!isSafeForAnyStoreLoadForwardDistances() &&
+ "Expected the distance, that prevent store-load forwarding, to be "
+ "set.");
+ return MaxNonPowerOf2StoreLoadForwardSafeDistanceInBits;
}
/// In same cases when the dependency check fails we can still
@@ -319,9 +332,14 @@ class MemoryDepChecker {
/// restrictive.
uint64_t MaxSafeVectorWidthInBits = -1U;
- /// Maximum power-of-2 number of elements, which do not prevent store-load
- /// forwarding, multiplied by the size of the elements in bits.
- uint64_t MaxStoreLoadForwardSafeDistanceInBits =
+ /// Maximum number of elements, which do not prevent store-load forwarding,
+ /// multiplied by the size of the elements in bits (power-of-2).
+ uint64_t MaxPowerOf2StoreLoadForwardSafeDistanceInBits =
+ std::numeric_limits<uint64_t>::max();
+
+ /// Maximum number of elements, which do not prevent store-load forwarding,
+ /// multiplied by the size of the elements in bits (non-power-of-2).
+ uint64_t MaxNonPowerOf2StoreLoadForwardSafeDistanceInBits =
std::numeric_limits<uint64_t>::max();
/// If we see a non-constant dependence distance we can still try to
@@ -348,6 +366,9 @@ class MemoryDepChecker {
/// backwards-vectorizable or unknown (triggering a runtime check).
unsigned MaxTargetVectorWidthInBits = 0;
+ /// True if current target supports non-power-of-2 dependence distances.
+ bool AllowNonPow2Deps = false;
+
/// Mapping of SCEV expressions to their expanded pointer bounds (pair of
/// start and end pointer expressions).
DenseMap<std::pair<const SCEV *, Type *>,
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index d654ac3ec9273..65d9938c8a0cd 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -415,7 +415,7 @@ class LoopVectorizationLegality {
/// Return safe power-of-2 number of elements, which do not prevent store-load
/// forwarding and safe to operate simultaneously.
uint64_t getMaxStoreLoadForwardSafeDistanceInBits() const {
- return LAI->getDepChecker().getStoreLoadForwardSafeDistanceInBits();
+ return LAI->getDepChecker().getPowerOf2StoreLoadForwardSafeDistanceInBits();
}
/// Returns true if vector representation of the instruction \p I
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index c65bb8be8b996..30fd50bd15303 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -1757,7 +1757,8 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
// Maximum vector factor.
uint64_t MaxVFWithoutSLForwardIssuesPowerOf2 =
std::min(VectorizerParams::MaxVectorWidth * TypeByteSize,
- MaxStoreLoadForwardSafeDistanceInBits);
+ MaxPowerOf2StoreLoadForwardSafeDistanceInBits);
+ uint64_t MaxVFWithoutSLForwardIssuesNonPowerOf2 = 0;
// Compute the smallest VF at which the store and load would be misaligned.
for (uint64_t VF = 2 * TypeByteSize;
@@ -1769,24 +1770,61 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
break;
}
}
+ // RISCV VLA supports non-power-2 vector factor. So, we iterate in a
+ // backward order to find largest VF, which allows aligned stores-loads or
+ // the number of iterations between conflicting memory addresses is not less
+ // than 8 (NumItersForStoreLoadThroughMemory).
+ if (AllowNonPow2Deps) {
+ MaxVFWithoutSLForwardIssuesNonPowerOf2 =
+ std::min(8 * VectorizerParams::MaxVectorWidth / TypeByteSize,
+ MaxNonPowerOf2StoreLoadForwardSafeDistanceInBits);
+
+ for (uint64_t VF = MaxVFWithoutSLForwardIssuesNonPowerOf2;
+ VF > MaxVFWithoutSLForwardIssuesPowerOf2; VF -= TypeByteSize) {
+ if (Distance % VF == 0 ||
+ Distance / VF >= NumItersForStoreLoadThroughMemory) {
+ uint64_t GCD =
+ isSafeForAnyStoreLoadForwardDistances()
+ ? VF
+ : std::gcd(MaxNonPowerOf2StoreLoadForwardSafeDistanceInBits,
+ VF);
+ MaxVFWithoutSLForwardIssuesNonPowerOf2 = GCD;
+ break;
+ }
+ }
+ }
- if (MaxVFWithoutSLForwardIssuesPowerOf2 < 2 * TypeByteSize) {
+ if (MaxVFWithoutSLForwardIssuesPowerOf2 < 2 * TypeByteSize &&
+ MaxVFWithoutSLForwardIssuesNonPowerOf2 < 2 * TypeByteSize) {
LLVM_DEBUG(
dbgs() << "LAA: Distance " << Distance
<< " that could cause a store-load forwarding conflict\n");
return true;
}
+ // Handle non-power-2 store-load forwarding distance, power-of-2 distance can
+ // be calculated.
+ if (AllowNonPow2Deps && CommonStride &&
+ MaxVFWithoutSLForwardIssuesNonPowerOf2 <
+ MaxNonPowerOf2StoreLoadForwardSafeDistanceInBits &&
+ MaxVFWithoutSLForwardIssuesNonPowerOf2 !=
+ 8 * VectorizerParams::MaxVectorWidth / TypeByteSize) {
+ uint64_t MaxVF = MaxVFWithoutSLForwardIssuesNonPowerOf2 / CommonStride;
+ uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
+ MaxNonPowerOf2StoreLoadForwardSafeDistanceInBits =
+ std::min(MaxNonPowerOf2StoreLoadForwardSafeDistanceInBits, MaxVFInBits);
+ }
+
if (CommonStride &&
MaxVFWithoutSLForwardIssuesPowerOf2 <
- MaxStoreLoadForwardSafeDistanceInBits &&
+ MaxPowerOf2StoreLoadForwardSafeDistanceInBits &&
MaxVFWithoutSLForwardIssuesPowerOf2 !=
VectorizerParams::MaxVectorWidth * TypeByteSize) {
uint64_t MaxVF =
bit_floor(MaxVFWithoutSLForwardIssuesPowerOf2 / CommonStride);
uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
- MaxStoreLoadForwardSafeDistanceInBits =
- std::min(MaxStoreLoadForwardSafeDistanceInBits, MaxVFInBits);
+ MaxPowerOf2StoreLoadForwardSafeDistanceInBits =
+ std::min(MaxPowerOf2StoreLoadForwardSafeDistanceInBits, MaxVFInBits);
}
return false;
}
@@ -2985,8 +3023,9 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
MaxTargetVectorWidthInBits =
TTI->getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector) * 2;
- DepChecker = std::make_unique<MemoryDepChecker>(*PSE, L, SymbolicStrides,
- MaxTargetVectorWidthInBits);
+ DepChecker = std::make_unique<MemoryDepChecker>(
+ *PSE, L, SymbolicStrides, MaxTargetVectorWidthInBits,
+ TTI && TTI->hasActiveVectorLength(0, nullptr, Align()));
PtrRtChecking = std::make_unique<RuntimePointerChecking>(*DepChecker, SE);
if (canAnalyzeLoop())
CanVecMem = analyzeLoop(AA, LI, TLI, DT);
@@ -3000,7 +3039,9 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
OS << " with a maximum safe vector width of "
<< DC.getMaxSafeVectorWidthInBits() << " bits";
if (!DC.isSafeForAnyStoreLoadForwardDistances()) {
- uint64_t SLDist = DC.getStoreLoadForwardSafeDistanceInBits();
+ uint64_t SLDist = DC.getNonPowerOf2StoreLoadForwardSafeDistanceInBits();
+ if (SLDist == std::numeric_limits<uint64_t>::max())
+ SLDist = DC.getPowerOf2StoreLoadForwardSafeDistanceInBits();
OS << ", with a maximum safe store-load forward width of " << SLDist
<< " bits";
}
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/safe-with-dep-distance-non-power-of-2.ll b/llvm/test/Analysis/LoopAccessAnalysis/safe-with-dep-distance-non-power-of-2.ll
index 79dcfd2c4c08d..15fb79807b965 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/safe-with-dep-distance-non-power-of-2.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/safe-with-dep-distance-non-power-of-2.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -passes='print<access-info>' -disable-output -mtriple=riscv64 -mattr=+v < %s 2>&1 | FileCheck %s
-; RUN: opt -passes='print<access-info>' -disable-output -mtriple=x86_64 < %s 2>&1 | FileCheck %s
+; RUN: opt -passes='print<access-info>' -disable-output -mtriple=riscv64 -mattr=+v < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,RISCV64
+; RUN: opt -passes='print<access-info>' -disable-output -mtriple=x86_64 < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,X86_64
; REQUIRES: riscv-registered-target, x86-registered-target
@@ -41,21 +41,37 @@ exit:
; Dependence distance is less than trip count, thus we must prove that
; chosen VF guaranteed to be less than dependence distance.
define void @test_may_clobber1(ptr %p) {
-; CHECK-LABEL: 'test_may_clobber1'
-; CHECK-NEXT: loop:
-; CHECK-NEXT: Memory dependences are safe with a maximum safe vector width of 6400 bits, with a maximum safe store-load forward width of 256 bits
-; CHECK-NEXT: Dependences:
-; CHECK-NEXT: BackwardVectorizable:
-; CHECK-NEXT: %v = load i64, ptr %a1, align 32 ->
-; CHECK-NEXT: store i64 %v, ptr %a2, align 32
-; CHECK-EMPTY:
-; CHECK-NEXT: Run-time memory checks:
-; CHECK-NEXT: Grouped accesses:
-; CHECK-EMPTY:
-; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
-; CHECK-NEXT: SCEV assumptions:
-; CHECK-EMPTY:
-; CHECK-NEXT: Expressions re-written:
+; RISCV64-LABEL: 'test_may_clobber1'
+; RISCV64-NEXT: loop:
+; RISCV64-NEXT: Memory dependences are safe with a maximum safe vector width of 6400 bits, with a maximum safe store-load forward width of 320 bits
+; RISCV64-NEXT: Dependences:
+; RISCV64-NEXT: BackwardVectorizable:
+; RISCV64-NEXT: %v = load i64, ptr %a1, align 32 ->
+; RISCV64-NEXT: store i64 %v, ptr %a2, align 32
+; RISCV64-EMPTY:
+; RISCV64-NEXT: Run-time memory checks:
+; RISCV64-NEXT: Grouped accesses:
+; RISCV64-EMPTY:
+; RISCV64-NEXT: Non vectorizable stores to invariant address were not found in loop.
+; RISCV64-NEXT: SCEV assumptions:
+; RISCV64-EMPTY:
+; RISCV64-NEXT: Expressions re-written:
+;
+; X86_64-LABEL: 'test_may_clobber1'
+; X86_64-NEXT: loop:
+; X86_64-NEXT: Memory dependences are safe with a maximum safe vector width of 6400 bits, with a maximum safe store-load forward width of 256 bits
+; X86_64-NEXT: Dependences:
+; X86_64-NEXT: BackwardVectorizable:
+; X86_64-NEXT: %v = load i64, ptr %a1, align 32 ->
+; X86_64-NEXT: store i64 %v, ptr %a2, align 32
+; X86_64-EMPTY:
+; X86_64-NEXT: Run-time memory checks:
+; X86_64-NEXT: Grouped accesses:
+; X86_64-EMPTY:
+; X86_64-NEXT: Non vectorizable stores to invariant address were not found in loop.
+; X86_64-NEXT: SCEV assumptions:
+; X86_64-EMPTY:
+; X86_64-NEXT: Expressions re-written:
;
entry:
br label %loop
@@ -76,22 +92,38 @@ exit:
}
define void @test_may_clobber2(ptr %p) {
-; CHECK-LABEL: 'test_may_clobber2'
-; CHECK-NEXT: loop:
-; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
-; CHECK-NEXT: Backward loop carried data dependence that prevents store-to-load forwarding.
-; CHECK-NEXT: Dependences:
-; CHECK-NEXT: BackwardVectorizableButPreventsForwarding:
-; CHECK-NEXT: %v = load i64, ptr %a1, align 32 ->
-; CHECK-NEXT: store i64 %v, ptr %a2, align 32
-; CHECK-EMPTY:
-; CHECK-NEXT: Run-time memory checks:
-; CHECK-NEXT: Grouped accesses:
-; CHECK-EMPTY:
-; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
-; CHECK-NEXT: SCEV assumptions:
-; CHECK-EMPTY:
-; CHECK-NEXT: Expressions re-written:
+; RISCV64-LABEL: 'test_may_clobber2'
+; RISCV64-NEXT: loop:
+; RISCV64-NEXT: Memory dependences are safe with a maximum safe vector width of 576 bits, with a maximum safe store-load forward width of 192 bits
+; RISCV64-NEXT: Dependences:
+; RISCV64-NEXT: BackwardVectorizable:
+; RISCV64-NEXT: %v = load i64, ptr %a1, align 32 ->
+; RISCV64-NEXT: store i64 %v, ptr %a2, align 32
+; RISCV64-EMPTY:
+; RISCV64-NEXT: Run-time memory checks:
+; RISCV64-NEXT: Grouped accesses:
+; RISCV64-EMPTY:
+; RISCV64-NEXT: Non vectorizable stores to invariant address were not found in loop.
+; RISCV64-NEXT: SCEV assumptions:
+; RISCV64-EMPTY:
+; RISCV64-NEXT: Expressions re-written:
+;
+; X86_64-LABEL: 'test_may_clobber2'
+; X86_64-NEXT: loop:
+; X86_64-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; X86_64-NEXT: Backward loop carried data dependence that prevents store-to-load forwarding.
+; X86_64-NEXT: Dependences:
+; X86_64-NEXT: BackwardVectorizableButPreventsForwarding:
+; X86_64-NEXT: %v = load i64, ptr %a1, align 32 ->
+; X86_64-NEXT: store i64 %v, ptr %a2, align 32
+; X86_64-EMPTY:
+; X86_64-NEXT: Run-time memory checks:
+; X86_64-NEXT: Grouped accesses:
+; X86_64-EMPTY:
+; X86_64-NEXT: Non vectorizable stores to invariant address were not found in loop.
+; X86_64-NEXT: SCEV assumptions:
+; X86_64-EMPTY:
+; X86_64-NEXT: Expressions re-written:
;
entry:
br label %loop
@@ -112,21 +144,37 @@ exit:
}
define void @test_may_clobber3(ptr %p) {
-; CHECK-LABEL: 'test_may_clobber3'
-; CHECK-NEXT: loop:
-; CHECK-NEXT: Memory dependences are safe with a maximum safe vector width of 640 bits, with a maximum safe store-load forward width of 128 bits
-; CHECK-NEXT: Dependences:
-; CHECK-NEXT: BackwardVectorizable:
-; CHECK-NEXT: %v = load i64, ptr %a1, align 32 ->
-; CHECK-NEXT: store i64 %v, ptr %a2, align 32
-; CHECK-EMPTY:
-; CHECK-NEXT: Run-time memory checks:
-; CHECK-NEXT: Grouped accesses:
-; CHECK-EMPTY:
-; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
-; CHECK-NEXT: SCEV assumptions:
-; CHECK-EMPTY:
-; CHECK-NEXT: Expressions re-written:
+; RISCV64-LABEL: 'test_may_clobber3'
+; RISCV64-NEXT: loop:
+; RISCV64-NEXT: Memory dependences are safe with a maximum safe vector width of 640 bits, with a maximum safe store-load forward width of 320 bits
+; RISCV64-NEXT: Dependences:
+; RISCV64-NEXT: BackwardVectorizable:
+; RISCV64-NEXT: %v = load i64, ptr %a1, align 32 ->
+; RISCV64-NEXT: store i64 %v, ptr %a2, align 32
+; RISCV64-EMPTY:
+; RISCV64-NEXT: Run-time memory checks:
+; RISCV64-NEXT: Grouped accesses:
+; RISCV64-EMPTY:
+; RISCV64-NEXT: Non vectorizable stores to invariant address were not found in loop.
+; RISCV64-NEXT: SCEV assumptions:
+; RISCV64-EMPTY:
+; RISCV64-NEXT: Expressions re-written:
+;
+; X86_64-LABEL: 'test_may_clobber3'
+; X86_64-NEXT: loop:
+; X86_64-NEXT: Memory dependences are safe with a maximum safe vector width of 640 bits, with a maximum safe store-load forward width of 128 bits
+; X86_64-NEXT: Dependences:
+; X86_64-NEXT: BackwardVectorizable:
+; X86_64-NEXT: %v = load i64, ptr %a1, align 32 ->
+; X86_64-NEXT: store i64 %v, ptr %a2, align 32
+; X86_64-EMPTY:
+; X86_64-NEXT: Run-time memory checks:
+; X86_64-NEXT: Grouped accesses:
+; X86_64-EMPTY:
+; X86_64-NEXT: Non vectorizable stores to invariant address were not found in loop.
+; X86_64-NEXT: SCEV assumptions:
+; X86_64-EMPTY:
+; X86_64-NEXT: Expressions re-written:
;
entry:
br label %loop
@@ -215,26 +263,46 @@ exit:
}
define void @non_power_2_storeloadforward(ptr %A) {
-; CHECK-LABEL: 'non_power_2_storeloadforward'
-; CHECK-NEXT: loop:
-; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
-; CHECK-NEXT: Backward loop carried data dependence that prevents store-to-load forwarding.
-; CHECK-NEXT: Dependences:
-; CHECK-NEXT: Forward:
-; CHECK-NEXT: %3 = load i32, ptr %gep.iv.4, align 4 ->
-; CHECK-NEXT: store i32 %add3, ptr %gep.iv, align 4
-; CHECK-EMPTY:
-; CHECK-NEXT: BackwardVectorizableButPreventsForwarding:
-; CHECK-NEXT: %1 = load i32, ptr %gep.iv.sub.3, align 4 ->
-; CHECK-NEXT: store i32 %add3, ptr %gep.iv, align 4
-; CHECK-EMPTY:
-; CHECK-NEXT: Run-time memory checks:
-; CHECK-NEXT: Grouped accesses:
-; CHECK-EMPTY:
-; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
-; CHECK-NEXT: SCEV assumptions:
-; CHECK-EMPTY:
-; CHECK-NEXT: Expressions re-written:
+; RISCV64-LABEL: 'non_power_2_storeloadforward'
+; RISCV64-NEXT: loop:
+; RISCV64-NEXT: Memory dependences are safe with a maximum safe vector width of 96 bits, with a maximum safe store-load forward width of 96 bits
+; RISCV64-NEXT: Dependences:
+; RISCV64-NEXT: Forward:
+; RISCV64-NEXT: %3 = load i32, ptr %gep.iv.4, align 4 ->
+; RISCV64-NEXT: store i32 %add3, ptr %gep.iv, align 4
+; RISCV64-EMPTY:
+; RISCV64-NEXT: BackwardVectorizable:
+; RISCV64-NEXT: %1 = load i32, ptr %gep.iv.sub.3, align 4 ->
+; RISCV64-NEXT: store i32 %add3, ptr %gep.iv, align 4
+; RISCV64-EMPTY:
+; RISCV64-NEXT: Run-time memory checks:
+; RISCV64-NEXT: Grouped accesses:
+; RISCV64-EMPTY:
+; RISCV64-NEXT: Non vectorizable stores to invariant address were not found in loop.
+; RISCV64-NEXT: SCEV assumptions:
+; RISCV64-EMPTY:
+; RISCV64-NEXT: Expressions re-written:
+;
+; X86_64-LABEL: 'non_power_2_storeloadforward'
+; X86_64-NEXT: loop:
+; X86_64-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; X86_64-NEXT: Backward loop carried data dependence that prevents store-to-load forwarding.
+; X86_64-NEXT: Dependences:
+; X86_64-NEXT: Forward:
+; X86_64-NEXT: %3 = load i32, ptr %gep.iv.4, align 4 ->
+; X86_64-NEXT: store i32 %add3, ptr %gep.iv, align 4
+; X86_64-EMPTY:
+; X86_64-NEXT: BackwardVectorizableButPreventsForwarding:
+; X86_64-NEXT: %1 = load i32, ptr %gep.iv.sub.3, align 4 ->
+; X86_64-NEXT: store i32 %add3, ptr %gep.iv, align 4
+; X86_64-EMPTY:
+; X86_64-NEXT: Run-time memory checks:
+; X86_64-NEXT: Grouped accesses:
+; X86_64-EMPTY:
+; X86_64-NEXT: Non vectorizable stores to invariant address were not found in loop.
+; X86_64-NEXT: SCEV assumptions:
+; X86_64-EMPTY:
+; X86_64-NEXT: Expressions re-written:
;
entry:
br label %loop
More information about the llvm-commits
mailing list