[llvm] cae643d - Reverting D73027 [DependenceAnalysis] Dependecies for loads marked with "ivnariant.load" should not be shared with general accesses(PR42151).
Evgeniy Brevnov via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 14 07:57:47 PST 2020
Author: Evgeniy Brevnov
Date: 2020-02-14T22:57:23+07:00
New Revision: cae643d596918e5a7cb00287c572099fb04715bd
URL: https://github.com/llvm/llvm-project/commit/cae643d596918e5a7cb00287c572099fb04715bd
DIFF: https://github.com/llvm/llvm-project/commit/cae643d596918e5a7cb00287c572099fb04715bd.diff
LOG: Reverting D73027 [DependenceAnalysis] Dependecies for loads marked with "ivnariant.load" should not be shared with general accesses(PR42151).
Added:
Modified:
llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
Removed:
llvm/test/Analysis/MemoryDependenceAnalysis/InvariantLoad.ll
################################################################################
diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
index ce9944a5ce4b..450595cac57b 100644
--- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -902,11 +902,6 @@ MemDepResult MemoryDependenceResults::GetNonLocalInfoForBlock(
Instruction *QueryInst, const MemoryLocation &Loc, bool isLoad,
BasicBlock *BB, NonLocalDepInfo *Cache, unsigned NumSortedEntries) {
- bool isInvariantLoad = false;
-
- if (LoadInst *LI = dyn_cast_or_null<LoadInst>(QueryInst))
- isInvariantLoad = LI->getMetadata(LLVMContext::MD_invariant_load);
-
// Do a binary search to see if we already have an entry for this block in
// the cache set. If so, find it.
NonLocalDepInfo::iterator Entry = std::upper_bound(
@@ -918,13 +913,6 @@ MemDepResult MemoryDependenceResults::GetNonLocalInfoForBlock(
if (Entry != Cache->begin() + NumSortedEntries && Entry->getBB() == BB)
ExistingResult = &*Entry;
- // Use cached result for invariant load only if there is no dependency for non
- // invariant load. In this case invariant load can not have any dependency as
- // well.
- if (ExistingResult && isInvariantLoad &&
- !ExistingResult->getResult().isNonFuncLocal())
- ExistingResult = nullptr;
-
// If we have a cached entry, and it is non-dirty, use it as the value for
// this dependency.
if (ExistingResult && !ExistingResult->getResult().isDirty()) {
@@ -953,10 +941,6 @@ MemDepResult MemoryDependenceResults::GetNonLocalInfoForBlock(
MemDepResult Dep =
getPointerDependencyFrom(Loc, isLoad, ScanPos, BB, QueryInst);
- // Don't cache results for invariant load.
- if (isInvariantLoad)
- return Dep;
-
// If we had a dirty entry for the block, update it. Otherwise, just add
// a new entry.
if (ExistingResult)
@@ -1045,10 +1029,6 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
InitialNLPI.Size = Loc.Size;
InitialNLPI.AATags = Loc.AATags;
- bool isInvariantLoad = false;
- if (LoadInst *LI = dyn_cast_or_null<LoadInst>(QueryInst))
- isInvariantLoad = LI->getMetadata(LLVMContext::MD_invariant_load);
-
// Get the NLPI for CacheKey, inserting one into the map if it doesn't
// already have one.
std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair =
@@ -1057,8 +1037,7 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
// If we already have a cache entry for this CacheKey, we may need to do some
// work to reconcile the cache entry and the current query.
- // Invariant loads don't participate in caching. Thus no need to reconcile.
- if (!isInvariantLoad && !Pair.second) {
+ if (!Pair.second) {
if (CacheInfo->Size != Loc.Size) {
bool ThrowOutEverything;
if (CacheInfo->Size.hasValue() && Loc.Size.hasValue()) {
@@ -1114,10 +1093,7 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
// If we have valid cached information for exactly the block we are
// investigating, just return it with no recomputation.
- // Don't use cached information for invariant loads since it is valid for
- // non-invariant loads only.
- if (!isInvariantLoad &&
- CacheInfo->Pair == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) {
+ if (CacheInfo->Pair == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) {
// We have a fully cached result for this query then we can just return the
// cached results and populate the visited set. However, we have to verify
// that we don't already have conflicting results for these blocks. Check
@@ -1153,18 +1129,14 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
return true;
}
- // Invariant loads don't affect cache in any way thus no need to update
- // CacheInfo as well.
- if (!isInvariantLoad) {
- // Otherwise, either this is a new block, a block with an invalid cache
- // pointer or one that we're about to invalidate by putting more info into
- // it than its valid cache info. If empty, the result will be valid cache
- // info, otherwise it isn't.
- if (Cache->empty())
- CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock);
- else
- CacheInfo->Pair = BBSkipFirstBlockPair();
- }
+ // Otherwise, either this is a new block, a block with an invalid cache
+ // pointer or one that we're about to invalidate by putting more info into it
+ // than its valid cache info. If empty, the result will be valid cache info,
+ // otherwise it isn't.
+ if (Cache->empty())
+ CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock);
+ else
+ CacheInfo->Pair = BBSkipFirstBlockPair();
SmallVector<BasicBlock *, 32> Worklist;
Worklist.push_back(StartBB);
@@ -1405,26 +1377,22 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
if (SkipFirstBlock)
return false;
- // Results of invariant loads are not cached thus no need to update cached
- // information.
- if (!isInvariantLoad) {
- bool foundBlock = false;
- for (NonLocalDepEntry &I : llvm::reverse(*Cache)) {
- if (I.getBB() != BB)
- continue;
+ bool foundBlock = false;
+ for (NonLocalDepEntry &I : llvm::reverse(*Cache)) {
+ if (I.getBB() != BB)
+ continue;
- assert((GotWorklistLimit || I.getResult().isNonLocal() ||
- !DT.isReachableFromEntry(BB)) &&
- "Should only be here with transparent block");
- foundBlock = true;
- I.setResult(MemDepResult::getUnknown());
- Result.push_back(
- NonLocalDepResult(I.getBB(), I.getResult(), Pointer.getAddr()));
- break;
- }
- (void)foundBlock; (void)GotWorklistLimit;
- assert((foundBlock || GotWorklistLimit) && "Current block not in cache?");
+ assert((GotWorklistLimit || I.getResult().isNonLocal() ||
+ !DT.isReachableFromEntry(BB)) &&
+ "Should only be here with transparent block");
+ foundBlock = true;
+ I.setResult(MemDepResult::getUnknown());
+ Result.push_back(
+ NonLocalDepResult(I.getBB(), I.getResult(), Pointer.getAddr()));
+ break;
}
+ (void)foundBlock; (void)GotWorklistLimit;
+ assert((foundBlock || GotWorklistLimit) && "Current block not in cache?");
}
// Okay, we're done now. If we added new values to the cache, re-sort it.
diff --git a/llvm/test/Analysis/MemoryDependenceAnalysis/InvariantLoad.ll b/llvm/test/Analysis/MemoryDependenceAnalysis/InvariantLoad.ll
deleted file mode 100644
index 152cb175ef60..000000000000
--- a/llvm/test/Analysis/MemoryDependenceAnalysis/InvariantLoad.ll
+++ /dev/null
@@ -1,173 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -gvn -S | FileCheck %s
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
-target triple = "x86_64-unknown-linux-gnu"
-
-declare void @llvm.memset.p0i8.i8(i8*, i8, i32, i1)
-declare void @foo(i8*)
-
-define i8 @test(i1 %cmp) {
-; CHECK-LABEL: @test(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[P:%.*]] = alloca i8
-; CHECK-NEXT: store i8 5, i8* [[P]]
-; CHECK-NEXT: br label [[HEADER:%.*]]
-; CHECK: header:
-; CHECK-NEXT: [[V:%.*]] = phi i8 [ 5, [[ENTRY:%.*]] ], [ -5, [[ALIVE:%.*]] ]
-; CHECK-NEXT: [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[I_INC:%.*]], [[ALIVE]] ]
-; CHECK-NEXT: br i1 [[CMP:%.*]], label [[ALIVE]], label [[DEAD:%.*]]
-; CHECK: dead:
-; CHECK-NEXT: call void @foo(i8* [[P]])
-; CHECK-NEXT: [[I_1:%.*]] = add i8 [[I]], [[V]]
-; CHECK-NEXT: br label [[ALIVE]]
-; CHECK: alive:
-; CHECK-NEXT: [[I_2:%.*]] = phi i8 [ [[I]], [[HEADER]] ], [ [[I_1]], [[DEAD]] ]
-; CHECK-NEXT: store i8 -5, i8* [[P]]
-; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* align 1 [[P]], i8 0, i32 1, i1 false)
-; CHECK-NEXT: [[I_INC]] = add i8 [[I_2]], 1
-; CHECK-NEXT: [[CMP_LOOP:%.*]] = icmp ugt i8 [[I_INC]], 100
-; CHECK-NEXT: br i1 [[CMP_LOOP]], label [[EXIT:%.*]], label [[HEADER]]
-; CHECK: exit:
-; CHECK-NEXT: ret i8 0
-;
-
-entry:
- %p = alloca i8
- %addr = getelementptr inbounds i8, i8* %p, i64 0
- store i8 5, i8* %addr
- br label %header
-header:
- %i = phi i8 [0, %entry], [%i.inc, %backedge]
- br i1 %cmp, label %alive, label %dead
-dead:
- call void @foo(i8* %p)
- %v = load i8, i8* %addr, !invariant.load !1
- %i.1 = add i8 %i, %v
- br label %alive
-alive:
- %i.2 = phi i8 [%i, %header], [%i.1, %dead]
- store i8 -5, i8* %addr
- br label %backedge
-backedge:
- call void @llvm.memset.p0i8.i8(i8 * align 1 %p, i8 0, i32 1, i1 false)
- %i.inc = add i8 %i.2, 1
- %cmp.loop = icmp ugt i8 %i.inc, 100
- br i1 %cmp.loop, label %exit, label %header
-exit:
- %res = load i8, i8* %addr
- ret i8 %res
-}
-
-; Check that first two loads are not optimized out while the one marked with
-; invariant.load reuses %res1
-define i8 @test2(i1 %cmp, i8 *%p) {
-; CHECK-LABEL: @test2(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[RES1:%.*]] = load i8, i8* [[P:%.*]]
-; CHECK-NEXT: call void @foo(i8* [[P]])
-; CHECK-NEXT: br i1 [[CMP:%.*]], label [[B2:%.*]], label [[B1:%.*]]
-; CHECK: b1:
-; CHECK-NEXT: [[RES2:%.*]] = load i8, i8* [[P]]
-; CHECK-NEXT: [[RES3:%.*]] = add i8 [[RES1]], [[RES2]]
-; CHECK-NEXT: br label [[ALIVE:%.*]]
-; CHECK: b2:
-; CHECK-NEXT: [[RES_DEAD:%.*]] = add i8 [[RES1]], [[RES1]]
-; CHECK-NEXT: br label [[ALIVE]]
-; CHECK: alive:
-; CHECK-NEXT: [[RES_PHI:%.*]] = phi i8 [ [[RES3]], [[B1]] ], [ [[RES_DEAD]], [[B2]] ]
-; CHECK-NEXT: ret i8 [[RES_PHI]]
-;
-
-entry:
- %res1 = load i8, i8* %p
- call void @foo(i8 *%p)
- br i1 %cmp, label %b2, label %b1
-b1:
- %res2 = load i8, i8* %p
- %res3 = add i8 %res1, %res2
- br label %alive
-b2:
- %v = load i8, i8* %p, !invariant.load !1
- %res.dead = add i8 %v, %res1
- br label %alive
-alive:
- %res.phi = phi i8 [%res3, %b1], [%res.dead, %b2]
- ret i8 %res.phi
-}
-
-; This is essentially the same test case as the above one but with %b1 and %b2
-; swapped in "br i1 %cmp, label %b1, label %b2" instruction. That helps us to
-; ensure that results doesn't depend on visiting order.
-define i8 @test3(i1 %cmp, i8 *%p) {
-; CHECK-LABEL: @test3(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[RES1:%.*]] = load i8, i8* [[P:%.*]]
-; CHECK-NEXT: call void @foo(i8* [[P]])
-; CHECK-NEXT: br i1 [[CMP:%.*]], label [[B1:%.*]], label [[B2:%.*]]
-; CHECK: b1:
-; CHECK-NEXT: [[RES2:%.*]] = load i8, i8* [[P]]
-; CHECK-NEXT: [[RES3:%.*]] = add i8 [[RES1]], [[RES2]]
-; CHECK-NEXT: br label [[ALIVE:%.*]]
-; CHECK: b2:
-; CHECK-NEXT: [[RES_DEAD:%.*]] = add i8 [[RES1]], [[RES1]]
-; CHECK-NEXT: br label [[ALIVE]]
-; CHECK: alive:
-; CHECK-NEXT: [[RES_PHI:%.*]] = phi i8 [ [[RES3]], [[B1]] ], [ [[RES_DEAD]], [[B2]] ]
-; CHECK-NEXT: ret i8 [[RES_PHI]]
-;
-entry:
- %res1 = load i8, i8* %p
- call void @foo(i8 *%p)
- br i1 %cmp, label %b1, label %b2
-b1:
- %res2 = load i8, i8* %p
- %res3 = add i8 %res1, %res2
- br label %alive
-b2:
- %v = load i8, i8* %p, !invariant.load !1
- %res.dead = add i8 %v, %res1
- br label %alive
-alive:
- %res.phi = phi i8 [%res3, %b1], [%res.dead, %b2]
- ret i8 %res.phi
-}
-
-
-; This is reduced test case catching regression in the first version of the
-; fix for invariant loads (https://reviews.llvm.org/D64405).
-define void @test4() {
-; CHECK-LABEL: @test4(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = load float, float* inttoptr (i64 8 to float*), align 4
-; CHECK-NEXT: [[TMP1:%.*]] = fmul float [[TMP0]], [[TMP0]]
-; CHECK-NEXT: br label [[FUSION_LOOP_HEADER_DIM_1_PREHEADER:%.*]]
-; CHECK: fusion.loop_header.dim.1.preheader:
-; CHECK-NEXT: [[TMP2:%.*]] = phi float [ [[TMP0]], [[ENTRY:%.*]] ], [ [[DOTPRE:%.*]], [[FUSION_LOOP_HEADER_DIM_1_PREHEADER]] ]
-; CHECK-NEXT: [[FUSION_INVAR_ADDRESS_DIM_0_03:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INVAR_INC3:%.*]], [[FUSION_LOOP_HEADER_DIM_1_PREHEADER]] ]
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x [1 x [4 x float]]], [2 x [1 x [4 x float]]]* null, i64 0, i64 [[FUSION_INVAR_ADDRESS_DIM_0_03]], i64 0, i64 2
-; CHECK-NEXT: [[TMP4:%.*]] = fmul float [[TMP2]], [[TMP2]]
-; CHECK-NEXT: [[INVAR_INC3]] = add nuw nsw i64 [[FUSION_INVAR_ADDRESS_DIM_0_03]], 1
-; CHECK-NEXT: [[DOTPHI_TRANS_INSERT:%.*]] = getelementptr inbounds [2 x [1 x [4 x float]]], [2 x [1 x [4 x float]]]* null, i64 0, i64 [[INVAR_INC3]], i64 0, i64 2
-; CHECK-NEXT: [[DOTPRE]] = load float, float* [[DOTPHI_TRANS_INSERT]], align 4, !invariant.load !0
-; CHECK-NEXT: br label [[FUSION_LOOP_HEADER_DIM_1_PREHEADER]]
-;
-entry:
- %0 = getelementptr inbounds [2 x [1 x [4 x float]]], [2 x [1 x [4 x float]]]* null, i64 0, i64 0, i64 0, i64 2
- %1 = load float, float* %0, align 4
- %2 = fmul float %1, %1
- br label %fusion.loop_header.dim.1.preheader
-
-fusion.loop_header.dim.1.preheader: ; preds = %fusion.loop_header.dim.1.preheader, %entry
- %fusion.invar_address.dim.0.03 = phi i64 [ 0, %entry ], [ %invar.inc3, %fusion.loop_header.dim.1.preheader ]
- %3 = getelementptr inbounds [2 x [1 x [4 x float]]], [2 x [1 x [4 x float]]]* null, i64 0, i64 %fusion.invar_address.dim.0.03, i64 0, i64 2
- %4 = load float, float* %3, align 4, !invariant.load !1
- %5 = fmul float %4, %4
- %6 = getelementptr inbounds [2 x [1 x [4 x float]]], [2 x [1 x [4 x float]]]* null, i64 0, i64 %fusion.invar_address.dim.0.03, i64 0, i64 2
- %7 = load float, float* %6, align 4, !invariant.load !1
- %8 = fmul float %7, %7
- %invar.inc3 = add nuw nsw i64 %fusion.invar_address.dim.0.03, 1
- br label %fusion.loop_header.dim.1.preheader
-}
-
-!1 = !{}
More information about the llvm-commits
mailing list