[llvm-branch-commits] [llvm] PR for llvm/llvm-project#79137 (PR #79561)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Sat Jan 27 23:26:02 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: None (github-actions[bot])
<details>
<summary>Changes</summary>
resolves llvm/llvm-project#<!-- -->79137
---
Full diff: https://github.com/llvm/llvm-project/pull/79561.diff
3 Files Affected:
- (modified) llvm/lib/Analysis/LoopAccessAnalysis.cpp (+46-7)
- (modified) llvm/test/Analysis/LoopAccessAnalysis/noalias-scope-decl.ll (+10-1)
- (added) llvm/test/Transforms/PhaseOrdering/X86/loop-vectorizer-noalias.ll (+78)
``````````diff
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 7e67c90152829dc..dd6b88fee415a7a 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -657,16 +657,18 @@ class AccessAnalysis {
AccessAnalysis(Loop *TheLoop, AAResults *AA, LoopInfo *LI,
MemoryDepChecker::DepCandidates &DA,
- PredicatedScalarEvolution &PSE)
- : TheLoop(TheLoop), BAA(*AA), AST(BAA), LI(LI), DepCands(DA), PSE(PSE) {
+ PredicatedScalarEvolution &PSE,
+ SmallPtrSetImpl<MDNode *> &LoopAliasScopes)
+ : TheLoop(TheLoop), BAA(*AA), AST(BAA), LI(LI), DepCands(DA), PSE(PSE),
+ LoopAliasScopes(LoopAliasScopes) {
// We're analyzing dependences across loop iterations.
BAA.enableCrossIterationMode();
}
/// Register a load and whether it is only read from.
void addLoad(MemoryLocation &Loc, Type *AccessTy, bool IsReadOnly) {
- Value *Ptr = const_cast<Value*>(Loc.Ptr);
- AST.add(Loc.getWithNewSize(LocationSize::beforeOrAfterPointer()));
+ Value *Ptr = const_cast<Value *>(Loc.Ptr);
+ AST.add(adjustLoc(Loc));
Accesses[MemAccessInfo(Ptr, false)].insert(AccessTy);
if (IsReadOnly)
ReadOnlyPtr.insert(Ptr);
@@ -674,8 +676,8 @@ class AccessAnalysis {
/// Register a store.
void addStore(MemoryLocation &Loc, Type *AccessTy) {
- Value *Ptr = const_cast<Value*>(Loc.Ptr);
- AST.add(Loc.getWithNewSize(LocationSize::beforeOrAfterPointer()));
+ Value *Ptr = const_cast<Value *>(Loc.Ptr);
+ AST.add(adjustLoc(Loc));
Accesses[MemAccessInfo(Ptr, true)].insert(AccessTy);
}
@@ -731,6 +733,32 @@ class AccessAnalysis {
private:
typedef MapVector<MemAccessInfo, SmallSetVector<Type *, 1>> PtrAccessMap;
+ /// Adjust the MemoryLocation so that it represents accesses to this
+ /// location across all iterations, rather than a single one.
+ MemoryLocation adjustLoc(MemoryLocation Loc) const {
+ // The accessed location varies within the loop, but remains within the
+ // underlying object.
+ Loc.Size = LocationSize::beforeOrAfterPointer();
+ Loc.AATags.Scope = adjustAliasScopeList(Loc.AATags.Scope);
+ Loc.AATags.NoAlias = adjustAliasScopeList(Loc.AATags.NoAlias);
+ return Loc;
+ }
+
+ /// Drop alias scopes that are only valid within a single loop iteration.
+ MDNode *adjustAliasScopeList(MDNode *ScopeList) const {
+ if (!ScopeList)
+ return nullptr;
+
+ // For the sake of simplicity, drop the whole scope list if any scope is
+ // iteration-local.
+ if (any_of(ScopeList->operands(), [&](Metadata *Scope) {
+ return LoopAliasScopes.contains(cast<MDNode>(Scope));
+ }))
+ return nullptr;
+
+ return ScopeList;
+ }
+
/// Go over all memory access and check whether runtime pointer checks
/// are needed and build sets of dependency check candidates.
void processMemAccesses();
@@ -775,6 +803,10 @@ class AccessAnalysis {
PredicatedScalarEvolution &PSE;
DenseMap<Value *, SmallVector<const Value *, 16>> UnderlyingObjects;
+
+ /// Alias scopes that are declared inside the loop, and as such not valid
+ /// across iterations.
+ SmallPtrSetImpl<MDNode *> &LoopAliasScopes;
};
} // end anonymous namespace
@@ -2283,6 +2315,7 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
// Holds the Load and Store instructions.
SmallVector<LoadInst *, 16> Loads;
SmallVector<StoreInst *, 16> Stores;
+ SmallPtrSet<MDNode *, 8> LoopAliasScopes;
// Holds all the different accesses in the loop.
unsigned NumReads = 0;
@@ -2326,6 +2359,11 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
if (HasComplexMemInst)
continue;
+ // Record alias scopes defined inside the loop.
+ if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I))
+ for (Metadata *Op : Decl->getScopeList()->operands())
+ LoopAliasScopes.insert(cast<MDNode>(Op));
+
// Many math library functions read the rounding mode. We will only
// vectorize a loop if it contains known function calls that don't set
// the flag. Therefore, it is safe to ignore this read from memory.
@@ -2407,7 +2445,8 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
}
MemoryDepChecker::DepCandidates DependentAccesses;
- AccessAnalysis Accesses(TheLoop, AA, LI, DependentAccesses, *PSE);
+ AccessAnalysis Accesses(TheLoop, AA, LI, DependentAccesses, *PSE,
+ LoopAliasScopes);
// Holds the analyzed pointers. We don't want to call getUnderlyingObjects
// multiple times on the same object. If the ptr is accessed twice, once
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/noalias-scope-decl.ll b/llvm/test/Analysis/LoopAccessAnalysis/noalias-scope-decl.ll
index 98bb5f99a40a1e2..fb296f5089422db 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/noalias-scope-decl.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/noalias-scope-decl.ll
@@ -7,8 +7,17 @@
define void @test_scope_in_loop(ptr %arg, i64 %num) {
; CHECK-LABEL: 'test_scope_in_loop'
; CHECK-NEXT: loop:
-; CHECK-NEXT: Memory dependences are safe
+; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT: Backward loop carried data dependence.
; CHECK-NEXT: Dependences:
+; CHECK-NEXT: Backward:
+; CHECK-NEXT: %load.prev = load i8, ptr %prev.ptr, align 1, !alias.scope !0, !noalias !3 ->
+; CHECK-NEXT: store i8 %add, ptr %cur.ptr, align 1, !alias.scope !3
+; CHECK-EMPTY:
+; CHECK-NEXT: Forward:
+; CHECK-NEXT: %load.cur = load i8, ptr %cur.ptr, align 1, !alias.scope !3 ->
+; CHECK-NEXT: store i8 %add, ptr %cur.ptr, align 1, !alias.scope !3
+; CHECK-EMPTY:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Grouped accesses:
; CHECK-EMPTY:
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/loop-vectorizer-noalias.ll b/llvm/test/Transforms/PhaseOrdering/X86/loop-vectorizer-noalias.ll
new file mode 100644
index 000000000000000..5c85c0d21f59f74
--- /dev/null
+++ b/llvm/test/Transforms/PhaseOrdering/X86/loop-vectorizer-noalias.ll
@@ -0,0 +1,78 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S -O3 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+
+define internal void @acc(ptr noalias noundef %val, ptr noalias noundef %prev) {
+entry:
+ %0 = load i8, ptr %prev, align 1
+ %conv = zext i8 %0 to i32
+ %1 = load i8, ptr %val, align 1
+ %conv1 = zext i8 %1 to i32
+ %add = add nsw i32 %conv1, %conv
+ %conv2 = trunc i32 %add to i8
+ store i8 %conv2, ptr %val, align 1
+ ret void
+}
+
+; This loop should not get vectorized.
+define void @accsum(ptr noundef %vals, i64 noundef %num) #0 {
+; CHECK-LABEL: define void @accsum(
+; CHECK-SAME: ptr nocapture noundef [[VALS:%.*]], i64 noundef [[NUM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt i64 [[NUM]], 1
+; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
+; CHECK: for.body.preheader:
+; CHECK-NEXT: [[LOAD_INITIAL:%.*]] = load i8, ptr [[VALS]], align 1
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[STORE_FORWARDED:%.*]] = phi i8 [ [[LOAD_INITIAL]], [[FOR_BODY_PREHEADER]] ], [ [[ADD_I:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[I_02:%.*]] = phi i64 [ 1, [[FOR_BODY_PREHEADER]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[VALS]], i64 [[I_02]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1, !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]]
+; CHECK-NEXT: [[ADD_I]] = add i8 [[TMP0]], [[STORE_FORWARDED]]
+; CHECK-NEXT: store i8 [[ADD_I]], ptr [[ARRAYIDX]], align 1, !alias.scope [[META0]], !noalias [[META3]]
+; CHECK-NEXT: [[INC]] = add nuw i64 [[I_02]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[NUM]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]]
+; CHECK: for.end:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %i.0 = phi i64 [ 1, %entry ], [ %inc, %for.inc ]
+ %cmp = icmp ult i64 %i.0, %num
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.cond
+ br label %for.end
+
+for.body: ; preds = %for.cond
+ %arrayidx = getelementptr inbounds i8, ptr %vals, i64 %i.0
+ %sub = sub i64 %i.0, 1
+ %arrayidx1 = getelementptr inbounds i8, ptr %vals, i64 %sub
+ call void @acc(ptr noundef %arrayidx, ptr noundef %arrayidx1)
+ br label %for.inc
+
+for.inc: ; preds = %for.body
+ %inc = add i64 %i.0, 1
+ br label %for.cond
+
+for.end: ; preds = %for.cond.cleanup
+ ret void
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1
+
+attributes #0 = { "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87"}
+;.
+; CHECK: [[META0]] = !{[[META1:![0-9]+]]}
+; CHECK: [[META1]] = distinct !{[[META1]], [[META2:![0-9]+]], !"acc: %val"}
+; CHECK: [[META2]] = distinct !{[[META2]], !"acc"}
+; CHECK: [[META3]] = !{[[META4:![0-9]+]]}
+; CHECK: [[META4]] = distinct !{[[META4]], [[META2]], !"acc: %prev"}
+;.
``````````
</details>
https://github.com/llvm/llvm-project/pull/79561
More information about the llvm-branch-commits
mailing list