[llvm-branch-commits] [llvm] [LV, LAA] Don't vectorize loops with load and store to invar address. (PR #91092)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Sat May 4 14:46:45 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-analysis
@llvm/pr-subscribers-llvm-transforms
Author: AtariDreams (AtariDreams)
<details>
<summary>Changes</summary>
Code checking stores to invariant addresses and reductions made an incorrect assumption that the case of both a load & store to the same invariant address does not need to be handled.
In some cases when vectorizing with runtime checks, there may be dependences with a load and store to the same address, storing a reduction value.
Update LAA to separately track if there was a store-store and a load-store dependence with an invariant addresses.
Bail out early if there as a load-store dependence with invariant address. If there was a store-store one, still apply the logic checking if they all store a reduction.
(cherry picked from commit b54a78d69be1952884462cb897abb9cf60a33978)
---
Full diff: https://github.com/llvm/llvm-project/pull/91092.diff
4 Files Affected:
- (modified) llvm/include/llvm/Analysis/LoopAccessAnalysis.h (+21-7)
- (modified) llvm/lib/Analysis/LoopAccessAnalysis.cpp (+9-5)
- (modified) llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp (+12-4)
- (modified) llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll (+34)
``````````diff
diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index e39c371b41ec5c..1d67a71f43edde 100644
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -579,7 +579,11 @@ class LoopAccessInfo {
AAResults *AA, DominatorTree *DT, LoopInfo *LI);
/// Return true we can analyze the memory accesses in the loop and there are
- /// no memory dependence cycles.
+ /// no memory dependence cycles. Note that for dependences between loads &
+ /// stores with uniform addresses,
+ /// hasStoreStoreDependenceInvolvingLoopInvariantAddress and
+ /// hasLoadStoreDependenceInvolvingLoopInvariantAddress also need to be
+ /// checked.
bool canVectorizeMemory() const { return CanVecMem; }
/// Return true if there is a convergent operation in the loop. There may
@@ -632,10 +636,16 @@ class LoopAccessInfo {
/// Print the information about the memory accesses in the loop.
void print(raw_ostream &OS, unsigned Depth = 0) const;
- /// If the loop has memory dependence involving an invariant address, i.e. two
- /// stores or a store and a load, then return true, else return false.
- bool hasDependenceInvolvingLoopInvariantAddress() const {
- return HasDependenceInvolvingLoopInvariantAddress;
+ /// Return true if the loop has memory dependence involving two stores to an
+ /// invariant address, else return false.
+ bool hasStoreStoreDependenceInvolvingLoopInvariantAddress() const {
+ return HasStoreStoreDependenceInvolvingLoopInvariantAddress;
+ }
+
+ /// Return true if the loop has memory dependence involving a load and a store
+ /// to an invariant address, else return false.
+ bool hasLoadStoreDependenceInvolvingLoopInvariantAddress() const {
+ return HasLoadStoreDependenceInvolvingLoopInvariantAddress;
}
/// Return the list of stores to invariant addresses.
@@ -697,8 +707,12 @@ class LoopAccessInfo {
bool CanVecMem = false;
bool HasConvergentOp = false;
- /// Indicator that there are non vectorizable stores to a uniform address.
- bool HasDependenceInvolvingLoopInvariantAddress = false;
+ /// Indicator that there are two non vectorizable stores to the same uniform
+ /// address.
+ bool HasStoreStoreDependenceInvolvingLoopInvariantAddress = false;
+ /// Indicator that there is non vectorizable load and store to the same
+ /// uniform address.
+ bool HasLoadStoreDependenceInvolvingLoopInvariantAddress = false;
/// List of stores to invariant addresses.
SmallVector<StoreInst *> StoresToInvariantAddresses;
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index dd6b88fee415a7..fc9e82015e44f2 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -2465,7 +2465,7 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
if (isInvariant(Ptr)) {
// Record store instructions to loop invariant addresses
StoresToInvariantAddresses.push_back(ST);
- HasDependenceInvolvingLoopInvariantAddress |=
+ HasStoreStoreDependenceInvolvingLoopInvariantAddress |=
!UniformStores.insert(Ptr).second;
}
@@ -2521,7 +2521,7 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
if (UniformStores.count(Ptr)) {
LLVM_DEBUG(dbgs() << "LAA: Found an unsafe dependency between a uniform "
"load and uniform store to the same address!\n");
- HasDependenceInvolvingLoopInvariantAddress = true;
+ HasLoadStoreDependenceInvolvingLoopInvariantAddress = true;
}
MemoryLocation Loc = MemoryLocation::get(LD);
@@ -2985,9 +2985,13 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
PtrRtChecking->print(OS, Depth);
OS << "\n";
- OS.indent(Depth) << "Non vectorizable stores to invariant address were "
- << (HasDependenceInvolvingLoopInvariantAddress ? "" : "not ")
- << "found in loop.\n";
+ OS.indent(Depth)
+ << "Non vectorizable stores to invariant address were "
+ << (HasStoreStoreDependenceInvolvingLoopInvariantAddress ||
+ HasLoadStoreDependenceInvolvingLoopInvariantAddress
+ ? ""
+ : "not ")
+ << "found in loop.\n";
OS.indent(Depth) << "SCEV assumptions:\n";
PSE->getPredicate().print(OS, Depth);
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 37a356c43e29a4..cfd0c1b5e592dc 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1067,6 +1067,15 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
if (!LAI->canVectorizeMemory())
return false;
+ if (LAI->hasLoadStoreDependenceInvolvingLoopInvariantAddress()) {
+ reportVectorizationFailure("We don't allow storing to uniform addresses",
+ "write to a loop invariant address could not "
+ "be vectorized",
+ "CantVectorizeStoreToLoopInvariantAddress", ORE,
+ TheLoop);
+ return false;
+ }
+
// We can vectorize stores to invariant address when final reduction value is
// guaranteed to be stored at the end of the loop. Also, if decision to
// vectorize loop is made, runtime checks are added so as to make sure that
@@ -1102,13 +1111,12 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
}
}
- if (LAI->hasDependenceInvolvingLoopInvariantAddress()) {
+ if (LAI->hasStoreStoreDependenceInvolvingLoopInvariantAddress()) {
// For each invariant address, check its last stored value is the result
// of one of our reductions.
//
- // We do not check if dependence with loads exists because they are
- // currently rejected earlier in LoopAccessInfo::analyzeLoop. In case this
- // behaviour changes we have to modify this code.
+ // We do not check if dependence with loads exists because that is already
+ // checked via hasLoadStoreDependenceInvolvingLoopInvariantAddress.
ScalarEvolution *SE = PSE.getSE();
SmallVector<StoreInst *, 4> UnhandledStores;
for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll b/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll
index 5584aa969367ac..d5074f6a3b0827 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll
@@ -118,6 +118,40 @@ exit:
ret void
}
+; Check that if we have a read from an invariant address, we do not vectorize,
+; even if we vectorize with runtime checks. The test below is a variant of
+; @reduc_store_load with a non-constant dependence distance, resulting in
+; vectorization with runtime checks.
+;
+; CHECK-LABEL: @reduc_store_load_with_non_constant_distance_dependence
+; CHECK-NOT: vector.body:
+define void @reduc_store_load_with_non_constant_distance_dependence(ptr %dst, ptr noalias %dst.2, i64 %off) {
+entry:
+ %gep.dst = getelementptr inbounds i32, ptr %dst, i64 42
+ %dst.2.off = getelementptr inbounds i32, ptr %dst.2, i64 %off
+ store i32 0, ptr %gep.dst, align 4
+ br label %for.body
+
+for.body:
+ %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %gep.src = getelementptr inbounds i32, ptr %dst.2, i64 %iv
+ %0 = load i32, ptr %gep.src, align 4
+ %iv.off = mul i64 %iv, 2
+ %add = add nsw i32 %sum, %0
+ %lv = load i32, ptr %gep.dst
+ store i32 %add, ptr %gep.dst, align 4
+ %gep.src.2 = getelementptr inbounds i32, ptr %dst.2.off, i64 %iv
+ store i32 %lv, ptr %gep.src.2, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %exit, label %for.body
+
+exit:
+ ret void
+}
+
+
; Final value is not guaranteed to be stored in an invariant address.
; We don't vectorize in that case.
;
``````````
</details>
https://github.com/llvm/llvm-project/pull/91092
More information about the llvm-branch-commits
mailing list