[llvm] b0761bb - [DependenceAnalysis] Memory dependence analysis internal caching mechanism is broken in presence of TBAA (PR42733).
Evgeniy Brevnov via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 21 05:21:05 PST 2020
Author: Evgeniy Brevnov
Date: 2020-02-21T20:20:36+07:00
New Revision: b0761bbc7639d0901d623e1fbf53ccf6ce066b16
URL: https://github.com/llvm/llvm-project/commit/b0761bbc7639d0901d623e1fbf53ccf6ce066b16
DIFF: https://github.com/llvm/llvm-project/commit/b0761bbc7639d0901d623e1fbf53ccf6ce066b16.diff
LOG: [DependenceAnalysis] Memory dependence analysis internal caching mechanism is broken in presence of TBAA (PR42733).
Summary:
There is a flaw in memory dependence analysis caching mechanism when memory accesses with TBAA are involved. Assume we first analysed and cached results for access with TBAA. Later we request dependence for the same memory but without TBAA (or different TBAA). By design these two queries should share one entry in the internal cache which corresponds to a general access (without TBAA). Thus upon second request internal cached is cleared and we continue analysis for access as if there is no TBAA.
The problem is that even though internal cache is cleared the set of visited nodes is not. That means we won't traverse visited nodes again and populate internal cache with the corresponding dependence results. So we end up with internal cache in an incomplete state. Current implementation tries to signal that situation by resetting CacheInfo->Pair at line 1104. But that doesn't actually help since later code ignores this invalidation and relies on 'Cache->empty()' property to decide on cache completeness.
Reviewers: reames, hfinkel, chandlerc, fedor.sergeev, asbirlea, fhahn, john.brawn, Prazek, sunfish
Reviewed By: john.brawn
Subscribers: DaniilSuchkov, kosarev, jfb, dantrushin, hiraditya, bmahjour, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D73032
Added:
llvm/test/Analysis/MemoryDependenceAnalysis/memdep_with_tbaa.ll
Modified:
llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h
llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h b/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h
index 6869ff2aafa5..f69adc2d9b77 100644
--- a/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h
+++ b/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h
@@ -478,7 +478,8 @@ class MemoryDependenceResults {
BasicBlock *BB,
SmallVectorImpl<NonLocalDepResult> &Result,
DenseMap<BasicBlock *, Value *> &Visited,
- bool SkipFirstBlock = false);
+ bool SkipFirstBlock = false,
+ bool IsIncomplete = false);
MemDepResult GetNonLocalInfoForBlock(Instruction *QueryInst,
const MemoryLocation &Loc, bool isLoad,
BasicBlock *BB, NonLocalDepInfo *Cache,
diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
index 2b01dc43a687..dacc5a87a337 100644
--- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -1004,7 +1004,8 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
Instruction *QueryInst, const PHITransAddr &Pointer,
const MemoryLocation &Loc, bool isLoad, BasicBlock *StartBB,
SmallVectorImpl<NonLocalDepResult> &Result,
- DenseMap<BasicBlock *, Value *> &Visited, bool SkipFirstBlock) {
+ DenseMap<BasicBlock *, Value *> &Visited, bool SkipFirstBlock,
+ bool IsIncomplete) {
// Look up the cached info for Pointer.
ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad);
@@ -1048,12 +1049,16 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
if (Instruction *Inst = Entry.getResult().getInst())
RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
CacheInfo->NonLocalDeps.clear();
+ // The cache is cleared (in the above line) so we will have lost
+ // information about blocks we have already visited. We therefore must
+ // assume that the cache information is incomplete.
+ IsIncomplete = true;
} else {
// This query's Size is less than the cached one. Conservatively restart
// the query using the greater size.
return getNonLocalPointerDepFromBB(
QueryInst, Pointer, Loc.getWithNewSize(CacheInfo->Size), isLoad,
- StartBB, Result, Visited, SkipFirstBlock);
+ StartBB, Result, Visited, SkipFirstBlock, IsIncomplete);
}
}
@@ -1068,11 +1073,15 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
if (Instruction *Inst = Entry.getResult().getInst())
RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
CacheInfo->NonLocalDeps.clear();
+ // The cache is cleared (in the above line) so we will have lost
+ // information about blocks we have already visited. We therefore must
+ // assume that the cache information is incomplete.
+ IsIncomplete = true;
}
if (Loc.AATags)
return getNonLocalPointerDepFromBB(
QueryInst, Pointer, Loc.getWithoutAATags(), isLoad, StartBB, Result,
- Visited, SkipFirstBlock);
+ Visited, SkipFirstBlock, IsIncomplete);
}
}
@@ -1080,7 +1089,11 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
// If we have valid cached information for exactly the block we are
// investigating, just return it with no recomputation.
- if (CacheInfo->Pair == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) {
+ // Don't use cached information for invariant loads since it is valid for
+ // non-invariant loads only.
+ //
+ if (!IsIncomplete &&
+ CacheInfo->Pair == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) {
// We have a fully cached result for this query then we can just return the
// cached results and populate the visited set. However, we have to verify
// that we don't already have conflicting results for these blocks. Check
@@ -1117,10 +1130,10 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
}
// Otherwise, either this is a new block, a block with an invalid cache
- // pointer or one that we're about to invalidate by putting more info into it
- // than its valid cache info. If empty, the result will be valid cache info,
- // otherwise it isn't.
- if (Cache->empty())
+ // pointer or one that we're about to invalidate by putting more info into
+ // it than its valid cache info. If empty and not explicitly indicated as
+ // incomplete, the result will be valid cache info, otherwise it isn't.
+ if (!IsIncomplete && Cache->empty())
CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock);
else
CacheInfo->Pair = BBSkipFirstBlockPair();
diff --git a/llvm/test/Analysis/MemoryDependenceAnalysis/memdep_with_tbaa.ll b/llvm/test/Analysis/MemoryDependenceAnalysis/memdep_with_tbaa.ll
new file mode 100644
index 000000000000..f1427ff53291
--- /dev/null
+++ b/llvm/test/Analysis/MemoryDependenceAnalysis/memdep_with_tbaa.ll
@@ -0,0 +1,125 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -aa-pipeline=basic-aa -passes=gvn -S < %s | FileCheck %s
+
+; This test catches an issue in MemoryDependenceAnalysis caching mechanism in presense of TBAA.
+define i64 @foo(i64 addrspace(1)** %arg, i1 %arg1, i1 %arg2, i1 %arg3, i32 %arg4) {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[TMP:%.*]] = load atomic i64 addrspace(1)*, i64 addrspace(1)** [[ARG:%.*]] unordered, align 8
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, i64 addrspace(1)* [[TMP]], i64 8
+; CHECK-NEXT: store atomic i64 0, i64 addrspace(1)* [[TMP5]] unordered, align 8
+; CHECK-NEXT: br label [[BB6:%.*]]
+; CHECK: bb6:
+; CHECK-NEXT: [[TMP7:%.*]] = phi i64 [ 0, [[BB:%.*]] ], [ [[TMP22:%.*]], [[BB19:%.*]] ]
+; CHECK-NEXT: br i1 [[ARG1:%.*]], label [[BB19]], label [[BB8:%.*]]
+; CHECK: bb8:
+; CHECK-NEXT: [[TMP9:%.*]] = load atomic i64 addrspace(1)*, i64 addrspace(1)** [[ARG]] unordered, align 8
+; CHECK-NEXT: br i1 [[ARG2:%.*]], label [[BB11:%.*]], label [[BB10:%.*]]
+; CHECK: bb10:
+; CHECK-NEXT: br label [[BB15:%.*]]
+; CHECK: bb11:
+; CHECK-NEXT: br i1 [[ARG3:%.*]], label [[BB12:%.*]], label [[BB18:%.*]]
+; CHECK: bb12:
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, i64 addrspace(1)* [[TMP9]], i64 8
+; CHECK-NEXT: store atomic i64 1, i64 addrspace(1)* [[TMP14]] unordered, align 8
+; CHECK-NEXT: ret i64 0
+; CHECK: bb15:
+; CHECK-NEXT: [[TMP16:%.*]] = phi i64 addrspace(1)* [ [[TMP9]], [[BB10]] ], [ [[TMP27:%.*]], [[BB26:%.*]] ]
+; CHECK-NEXT: [[TMP17:%.*]] = phi i64 [ [[TMP7]], [[BB10]] ], [ 0, [[BB26]] ]
+; CHECK-NEXT: switch i32 [[ARG4:%.*]], label [[BB19]] [
+; CHECK-NEXT: i32 0, label [[BB26]]
+; CHECK-NEXT: i32 1, label [[BB23:%.*]]
+; CHECK-NEXT: ]
+; CHECK: bb18:
+; CHECK-NEXT: br label [[BB19]]
+; CHECK: bb19:
+; CHECK-NEXT: [[TMP20:%.*]] = phi i64 addrspace(1)* [ [[TMP16]], [[BB15]] ], [ inttoptr (i64 1 to i64 addrspace(1)*), [[BB6]] ], [ [[TMP9]], [[BB18]] ]
+; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, i64 addrspace(1)* [[TMP20]], i64 8
+; CHECK-NEXT: [[TMP22]] = load atomic i64, i64 addrspace(1)* [[TMP21]] unordered, align 8, !tbaa !0
+; CHECK-NEXT: br label [[BB6]]
+; CHECK: bb23:
+; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i64, i64 addrspace(1)* [[TMP16]], i64 8
+; CHECK-NEXT: [[TMP25:%.*]] = load atomic i64, i64 addrspace(1)* [[TMP24]] unordered, align 8
+; CHECK-NEXT: call void @baz(i64 [[TMP25]]) #0
+; CHECK-NEXT: ret i64 0
+; CHECK: bb26:
+; CHECK-NEXT: call void @bar()
+; CHECK-NEXT: [[TMP27]] = load atomic i64 addrspace(1)*, i64 addrspace(1)** [[ARG]] unordered, align 8
+; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, i64 addrspace(1)* [[TMP27]], i64 8
+; CHECK-NEXT: [[TMP29:%.*]] = load atomic i64, i64 addrspace(1)* [[TMP28]] unordered, align 8
+; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, i64 addrspace(1)* [[TMP27]], i64 40
+; CHECK-NEXT: store atomic i64 [[TMP29]], i64 addrspace(1)* [[TMP30]] unordered, align 4
+; CHECK-NEXT: br label [[BB15]]
+;
+bb:
+ %tmp = load atomic i64 addrspace(1)*, i64 addrspace(1)** %arg unordered, align 8
+ %tmp5 = getelementptr inbounds i64, i64 addrspace(1)* %tmp, i64 8
+ store atomic i64 0, i64 addrspace(1)* %tmp5 unordered, align 8
+ br label %bb6
+
+bb6: ; preds = %bb19, %bb
+ %tmp7 = phi i64 [ 0, %bb ], [ %tmp22, %bb19 ]
+ %tmp111 = inttoptr i64 1 to i64 addrspace(1)*
+ br i1 %arg1, label %bb19, label %bb8
+
+bb8: ; preds = %bb6
+ %tmp9 = load atomic i64 addrspace(1)*, i64 addrspace(1)** %arg unordered, align 8
+ br i1 %arg2, label %bb11, label %bb10
+
+bb10: ; preds = %bb8
+ br label %bb15
+
+bb11: ; preds = %bb8
+ br i1 %arg3, label %bb12, label %bb18
+
+bb12: ; preds = %bb11
+ %tmp13 = phi i64 addrspace(1)* [ %tmp9, %bb11 ]
+ %tmp14 = getelementptr inbounds i64, i64 addrspace(1)* %tmp13, i64 8
+ store atomic i64 1, i64 addrspace(1)* %tmp14 unordered, align 8
+ ret i64 0
+
+bb15: ; preds = %bb26, %bb10
+ %tmp16 = phi i64 addrspace(1)* [ %tmp9, %bb10 ], [ %tmp27, %bb26 ]
+ %tmp17 = phi i64 [ %tmp7, %bb10 ], [ 0, %bb26 ]
+ switch i32 %arg4, label %bb19 [
+ i32 0, label %bb26
+ i32 1, label %bb23
+ ]
+
+bb18: ; preds = %bb11
+ br label %bb19
+
+bb19: ; preds = %bb18, %bb15, %bb6
+ %tmp20 = phi i64 addrspace(1)* [ %tmp16, %bb15 ], [ %tmp111, %bb6 ], [ %tmp9, %bb18 ]
+ %tmp21 = getelementptr inbounds i64, i64 addrspace(1)* %tmp20, i64 8
+ %tmp22 = load atomic i64, i64 addrspace(1)* %tmp21 unordered, align 8, !tbaa !0
+ br label %bb6
+
+bb23: ; preds = %bb15
+ %tmp24 = getelementptr inbounds i64, i64 addrspace(1)* %tmp16, i64 8
+ %tmp25 = load atomic i64, i64 addrspace(1)* %tmp24 unordered, align 8
+ call void @baz(i64 %tmp25) #0
+ ret i64 0
+
+bb26: ; preds = %bb15
+ call void @bar()
+ %tmp27 = load atomic i64 addrspace(1)*, i64 addrspace(1)** %arg unordered, align 8
+ %tmp28 = getelementptr inbounds i64, i64 addrspace(1)* %tmp27, i64 8
+ %tmp29 = load atomic i64, i64 addrspace(1)* %tmp28 unordered, align 8
+ %tmp30 = getelementptr inbounds i64, i64 addrspace(1)* %tmp27, i64 40
+ store atomic i64 %tmp29, i64 addrspace(1)* %tmp30 unordered, align 4
+ br label %bb15
+}
+
+declare void @bar()
+
+; Function Attrs: inaccessiblememonly readonly
+declare void @baz(i64) #0
+
+attributes #0 = { inaccessiblememonly readonly }
+
+!0 = !{!1, !2, i64 8}
+!1 = !{!"Name", !2, i64 8}
+!2 = !{!"tbaa_local_fields", !3, i64 0}
+!3 = !{!"tbaa-access-type"}
+
More information about the llvm-commits
mailing list