[llvm] cae643d - Reverting D73027 [DependenceAnalysis] Dependecies for loads marked with "ivnariant.load" should not be shared with general accesses(PR42151).

Fri Feb 14 07:59:51 PST 2020

On Fri, Feb 14, 2020 at 6:57 PM Evgeniy Brevnov via llvm-commits
<llvm-commits at lists.llvm.org> wrote:
>
>
> Author: Evgeniy Brevnov
> Date: 2020-02-14T22:57:23+07:00
> New Revision: cae643d596918e5a7cb00287c572099fb04715bd
>
> URL: https://github.com/llvm/llvm-project/commit/cae643d596918e5a7cb00287c572099fb04715bd
> DIFF: https://github.com/llvm/llvm-project/commit/cae643d596918e5a7cb00287c572099fb04715bd.diff
>
> LOG: Reverting D73027 [DependenceAnalysis] Dependecies for loads marked with "ivnariant.load" should not be shared with general accesses(PR42151).
>
> Added:
>
>
> Modified:
>     llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
>
> Removed:
>     llvm/test/Analysis/MemoryDependenceAnalysis/InvariantLoad.ll
>
>
> ################################################################################
> diff  --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
> index ce9944a5ce4b..450595cac57b 100644
> --- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
> +++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
> @@ -902,11 +902,6 @@ MemDepResult MemoryDependenceResults::GetNonLocalInfoForBlock(
>      Instruction *QueryInst, const MemoryLocation &Loc, bool isLoad,
>      BasicBlock *BB, NonLocalDepInfo *Cache, unsigned NumSortedEntries) {
>
> -  bool isInvariantLoad = false;
> -
> -  if (LoadInst *LI = dyn_cast_or_null<LoadInst>(QueryInst))
> -    isInvariantLoad = LI->getMetadata(LLVMContext::MD_invariant_load);
> -
>    // Do a binary search to see if we already have an entry for this block in
>    // the cache set.  If so, find it.
>    NonLocalDepInfo::iterator Entry = std::upper_bound(
> @@ -918,13 +913,6 @@ MemDepResult MemoryDependenceResults::GetNonLocalInfoForBlock(
>    if (Entry != Cache->begin() + NumSortedEntries && Entry->getBB() == BB)
>      ExistingResult = &*Entry;
>
> -  // Use cached result for invariant load only if there is no dependency for non
> -  // invariant load. In this case invariant load can not have any dependency as
> -  // well.
> -  if (ExistingResult && isInvariantLoad &&
> -      !ExistingResult->getResult().isNonFuncLocal())
> -    ExistingResult = nullptr;
> -
>    // If we have a cached entry, and it is non-dirty, use it as the value for
>    // this dependency.
>    if (ExistingResult && !ExistingResult->getResult().isDirty()) {
> @@ -953,10 +941,6 @@ MemDepResult MemoryDependenceResults::GetNonLocalInfoForBlock(
>    MemDepResult Dep =
>        getPointerDependencyFrom(Loc, isLoad, ScanPos, BB, QueryInst);
>
> -  // Don't cache results for invariant load.
> -  if (isInvariantLoad)
> -    return Dep;
> -
>    // If we had a dirty entry for the block, update it.  Otherwise, just add
>    // a new entry.
>    if (ExistingResult)
> @@ -1045,10 +1029,6 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
>    InitialNLPI.Size = Loc.Size;
>    InitialNLPI.AATags = Loc.AATags;
>
> -  bool isInvariantLoad = false;
> -  if (LoadInst *LI = dyn_cast_or_null<LoadInst>(QueryInst))
> -    isInvariantLoad = LI->getMetadata(LLVMContext::MD_invariant_load);
> -
>    // Get the NLPI for CacheKey, inserting one into the map if it doesn't
>    // already have one.
>    std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair =
> @@ -1057,8 +1037,7 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
>
>    // If we already have a cache entry for this CacheKey, we may need to do some
>    // work to reconcile the cache entry and the current query.
> -  // Invariant loads don't participate in caching. Thus no need to reconcile.
> -  if (!isInvariantLoad && !Pair.second) {
> +  if (!Pair.second) {
>      if (CacheInfo->Size != Loc.Size) {
>        bool ThrowOutEverything;
>        if (CacheInfo->Size.hasValue() && Loc.Size.hasValue()) {
> @@ -1114,10 +1093,7 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
>
>    // If we have valid cached information for exactly the block we are
>    // investigating, just return it with no recomputation.
> -  // Don't use cached information for invariant loads since it is valid for
> -  // non-invariant loads only.
> -  if (!isInvariantLoad &&
> -      CacheInfo->Pair == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) {
> +  if (CacheInfo->Pair == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) {
>      // We have a fully cached result for this query then we can just return the
>      // cached results and populate the visited set.  However, we have to verify
>      // that we don't already have conflicting results for these blocks.  Check
> @@ -1153,18 +1129,14 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
>      return true;
>    }
>
> -  // Invariant loads don't affect cache in any way thus no need to update
> -  // CacheInfo as well.
> -  if (!isInvariantLoad) {
> -    // Otherwise, either this is a new block, a block with an invalid cache
> -    // pointer or one that we're about to invalidate by putting more info into
> -    // it than its valid cache info.  If empty, the result will be valid cache
> -    // info, otherwise it isn't.
> -    if (Cache->empty())
> -      CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock);
> -    else
> -      CacheInfo->Pair = BBSkipFirstBlockPair();
> -  }
> +  // Otherwise, either this is a new block, a block with an invalid cache
> +  // pointer or one that we're about to invalidate by putting more info into it
> +  // than its valid cache info.  If empty, the result will be valid cache info,
> +  // otherwise it isn't.
> +  if (Cache->empty())
> +    CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock);
> +  else
> +    CacheInfo->Pair = BBSkipFirstBlockPair();
>
>    SmallVector<BasicBlock *, 32> Worklist;
>    Worklist.push_back(StartBB);
> @@ -1405,26 +1377,22 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
>      if (SkipFirstBlock)
>        return false;
>
> -    // Results of invariant loads are not cached thus no need to update cached
> -    // information.
> -    if (!isInvariantLoad) {
> -      bool foundBlock = false;
> -      for (NonLocalDepEntry &I : llvm::reverse(*Cache)) {
> -        if (I.getBB() != BB)
> -          continue;
> +    bool foundBlock = false;
> +    for (NonLocalDepEntry &I : llvm::reverse(*Cache)) {
> +      if (I.getBB() != BB)
> +        continue;
>
> -        assert((GotWorklistLimit || I.getResult().isNonLocal() ||
> -                !DT.isReachableFromEntry(BB)) &&
> -               "Should only be here with transparent block");
> -        foundBlock = true;
> -        I.setResult(MemDepResult::getUnknown());
> -        Result.push_back(
> -            NonLocalDepResult(I.getBB(), I.getResult(), Pointer.getAddr()));
> -        break;
> -      }
> -      (void)foundBlock; (void)GotWorklistLimit;
> -      assert((foundBlock || GotWorklistLimit) && "Current block not in cache?");
> +      assert((GotWorklistLimit || I.getResult().isNonLocal() ||
> +              !DT.isReachableFromEntry(BB)) &&
> +             "Should only be here with transparent block");
> +      foundBlock = true;
> +      I.setResult(MemDepResult::getUnknown());
> +      Result.push_back(
> +          NonLocalDepResult(I.getBB(), I.getResult(), Pointer.getAddr()));
> +      break;
>      }
> +    (void)foundBlock; (void)GotWorklistLimit;
> +    assert((foundBlock || GotWorklistLimit) && "Current block not in cache?");
>    }
>
>    // Okay, we're done now.  If we added new values to the cache, re-sort it.
>
> diff  --git a/llvm/test/Analysis/MemoryDependenceAnalysis/InvariantLoad.ll b/llvm/test/Analysis/MemoryDependenceAnalysis/InvariantLoad.ll
> deleted file mode 100644
> index 152cb175ef60..000000000000
> --- a/llvm/test/Analysis/MemoryDependenceAnalysis/InvariantLoad.ll
> +++ /dev/null
> @@ -1,173 +0,0 @@
> -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
Such tests really should be precommitted, and stay in-tree regardless
of whether the patch is being reverted or not,
and the patch itself should only contain the test check lines changes
(unless of course the old code crashed.)

> -; RUN: opt < %s -gvn -S | FileCheck %s
> -
> -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
> -target triple = "x86_64-unknown-linux-gnu"
> -
> -declare void @llvm.memset.p0i8.i8(i8*, i8, i32, i1)
> -declare void @foo(i8*)
> -
> -define i8 @test(i1 %cmp) {
> -; CHECK-LABEL: @test(
> -; CHECK-NEXT:  entry:
> -; CHECK-NEXT:    [[P:%.*]] = alloca i8
> -; CHECK-NEXT:    store i8 5, i8* [[P]]
> -; CHECK-NEXT:    br label [[HEADER:%.*]]
> -; CHECK:       header:
> -; CHECK-NEXT:    [[V:%.*]] = phi i8 [ 5, [[ENTRY:%.*]] ], [ -5, [[ALIVE:%.*]] ]
> -; CHECK-NEXT:    [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[I_INC:%.*]], [[ALIVE]] ]
> -; CHECK-NEXT:    br i1 [[CMP:%.*]], label [[ALIVE]], label [[DEAD:%.*]]
> -; CHECK:       dead:
> -; CHECK-NEXT:    call void @foo(i8* [[P]])
> -; CHECK-NEXT:    [[I_1:%.*]] = add i8 [[I]], [[V]]
> -; CHECK-NEXT:    br label [[ALIVE]]
> -; CHECK:       alive:
> -; CHECK-NEXT:    [[I_2:%.*]] = phi i8 [ [[I]], [[HEADER]] ], [ [[I_1]], [[DEAD]] ]
> -; CHECK-NEXT:    store i8 -5, i8* [[P]]
> -; CHECK-NEXT:    call void @llvm.memset.p0i8.i32(i8* align 1 [[P]], i8 0, i32 1, i1 false)
> -; CHECK-NEXT:    [[I_INC]] = add i8 [[I_2]], 1
> -; CHECK-NEXT:    [[CMP_LOOP:%.*]] = icmp ugt i8 [[I_INC]], 100
> -; CHECK-NEXT:    br i1 [[CMP_LOOP]], label [[EXIT:%.*]], label [[HEADER]]
> -; CHECK:       exit:
> -; CHECK-NEXT:    ret i8 0
> -;
> -
> -entry:
> -  %p = alloca i8
> -  %addr = getelementptr inbounds i8, i8* %p, i64 0
> -  store i8 5, i8* %addr
> -  br label %header
> -header:
> -  %i = phi i8 [0, %entry], [%i.inc, %backedge]
> -  br i1 %cmp, label %alive, label %dead
> -dead:
> -  call void @foo(i8* %p)
> -  %v = load i8, i8* %addr, !invariant.load !1
> -  %i.1 = add i8 %i, %v
> -  br label %alive
> -alive:
> -  %i.2 = phi i8 [%i, %header], [%i.1, %dead]
> -  store i8 -5, i8* %addr
> -  br label %backedge
> -backedge:
> -  call void @llvm.memset.p0i8.i8(i8 * align 1 %p, i8 0, i32 1, i1 false)
> -  %i.inc = add i8 %i.2, 1
> -  %cmp.loop = icmp ugt i8 %i.inc, 100
> -  br i1 %cmp.loop, label %exit, label %header
> -exit:
> -  %res = load i8, i8* %addr
> -  ret i8 %res
> -}
> -
> -; Check that first two loads are not optimized out while the one marked with
> -; invariant.load reuses %res1
> -define i8 @test2(i1 %cmp, i8 *%p) {
> -; CHECK-LABEL: @test2(
> -; CHECK-NEXT:  entry:
> -; CHECK-NEXT:    [[RES1:%.*]] = load i8, i8* [[P:%.*]]
> -; CHECK-NEXT:    call void @foo(i8* [[P]])
> -; CHECK-NEXT:    br i1 [[CMP:%.*]], label [[B2:%.*]], label [[B1:%.*]]
> -; CHECK:       b1:
> -; CHECK-NEXT:    [[RES2:%.*]] = load i8, i8* [[P]]
> -; CHECK-NEXT:    [[RES3:%.*]] = add i8 [[RES1]], [[RES2]]
> -; CHECK-NEXT:    br label [[ALIVE:%.*]]
> -; CHECK:       b2:
> -; CHECK-NEXT:    [[RES_DEAD:%.*]] = add i8 [[RES1]], [[RES1]]
> -; CHECK-NEXT:    br label [[ALIVE]]
> -; CHECK:       alive:
> -; CHECK-NEXT:    [[RES_PHI:%.*]] = phi i8 [ [[RES3]], [[B1]] ], [ [[RES_DEAD]], [[B2]] ]
> -; CHECK-NEXT:    ret i8 [[RES_PHI]]
> -;
> -
> -entry:
> -  %res1 = load i8, i8* %p
> -  call void @foo(i8 *%p)
> -  br i1 %cmp, label %b2, label %b1
> -b1:
> -  %res2 = load i8, i8* %p
> -  %res3 = add i8 %res1, %res2
> -  br label %alive
> -b2:
> -  %v = load i8, i8* %p, !invariant.load !1
> -  %res.dead = add i8 %v, %res1
> -  br label %alive
> -alive:
> -  %res.phi = phi i8 [%res3, %b1], [%res.dead, %b2]
> -  ret i8 %res.phi
> -}
> -
> -; This is essentially the same test case as the above one but with %b1 and %b2
> -; swapped in "br i1 %cmp, label %b1, label %b2" instruction. That helps us to
> -; ensure that results doesn't depend on visiting order.
> -define i8 @test3(i1 %cmp, i8 *%p) {
> -; CHECK-LABEL: @test3(
> -; CHECK-NEXT:  entry:
> -; CHECK-NEXT:    [[RES1:%.*]] = load i8, i8* [[P:%.*]]
> -; CHECK-NEXT:    call void @foo(i8* [[P]])
> -; CHECK-NEXT:    br i1 [[CMP:%.*]], label [[B1:%.*]], label [[B2:%.*]]
> -; CHECK:       b1:
> -; CHECK-NEXT:    [[RES2:%.*]] = load i8, i8* [[P]]
> -; CHECK-NEXT:    [[RES3:%.*]] = add i8 [[RES1]], [[RES2]]
> -; CHECK-NEXT:    br label [[ALIVE:%.*]]
> -; CHECK:       b2:
> -; CHECK-NEXT:    [[RES_DEAD:%.*]] = add i8 [[RES1]], [[RES1]]
> -; CHECK-NEXT:    br label [[ALIVE]]
> -; CHECK:       alive:
> -; CHECK-NEXT:    [[RES_PHI:%.*]] = phi i8 [ [[RES3]], [[B1]] ], [ [[RES_DEAD]], [[B2]] ]
> -; CHECK-NEXT:    ret i8 [[RES_PHI]]
> -;
> -entry:
> -  %res1 = load i8, i8* %p
> -  call void @foo(i8 *%p)
> -  br i1 %cmp, label %b1, label %b2
> -b1:
> -  %res2 = load i8, i8* %p
> -  %res3 = add i8 %res1, %res2
> -  br label %alive
> -b2:
> -  %v = load i8, i8* %p, !invariant.load !1
> -  %res.dead = add i8 %v, %res1
> -  br label %alive
> -alive:
> -  %res.phi = phi i8 [%res3, %b1], [%res.dead, %b2]
> -  ret i8 %res.phi
> -}
> -
> -
> -; This is reduced test case catching regression in the first version of the
> -; fix for invariant loads (https://reviews.llvm.org/D64405).
> -define void @test4() {
> -; CHECK-LABEL: @test4(
> -; CHECK-NEXT:  entry:
> -; CHECK-NEXT:    [[TMP0:%.*]] = load float, float* inttoptr (i64 8 to float*), align 4
> -; CHECK-NEXT:    [[TMP1:%.*]] = fmul float [[TMP0]], [[TMP0]]
> -; CHECK-NEXT:    br label [[FUSION_LOOP_HEADER_DIM_1_PREHEADER:%.*]]
> -; CHECK:       fusion.loop_header.dim.1.preheader:
> -; CHECK-NEXT:    [[TMP2:%.*]] = phi float [ [[TMP0]], [[ENTRY:%.*]] ], [ [[DOTPRE:%.*]], [[FUSION_LOOP_HEADER_DIM_1_PREHEADER]] ]
> -; CHECK-NEXT:    [[FUSION_INVAR_ADDRESS_DIM_0_03:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INVAR_INC3:%.*]], [[FUSION_LOOP_HEADER_DIM_1_PREHEADER]] ]
> -; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [2 x [1 x [4 x float]]], [2 x [1 x [4 x float]]]* null, i64 0, i64 [[FUSION_INVAR_ADDRESS_DIM_0_03]], i64 0, i64 2
> -; CHECK-NEXT:    [[TMP4:%.*]] = fmul float [[TMP2]], [[TMP2]]
> -; CHECK-NEXT:    [[INVAR_INC3]] = add nuw nsw i64 [[FUSION_INVAR_ADDRESS_DIM_0_03]], 1
> -; CHECK-NEXT:    [[DOTPHI_TRANS_INSERT:%.*]] = getelementptr inbounds [2 x [1 x [4 x float]]], [2 x [1 x [4 x float]]]* null, i64 0, i64 [[INVAR_INC3]], i64 0, i64 2
> -; CHECK-NEXT:    [[DOTPRE]] = load float, float* [[DOTPHI_TRANS_INSERT]], align 4, !invariant.load !0
> -; CHECK-NEXT:    br label [[FUSION_LOOP_HEADER_DIM_1_PREHEADER]]
> -;
> -entry:
> -  %0 = getelementptr inbounds [2 x [1 x [4 x float]]], [2 x [1 x [4 x float]]]* null, i64 0, i64 0, i64 0, i64 2
> -  %1 = load float, float* %0, align 4
> -  %2 = fmul float %1, %1
> -  br label %fusion.loop_header.dim.1.preheader
> -
> -fusion.loop_header.dim.1.preheader:               ; preds = %fusion.loop_header.dim.1.preheader, %entry
> -  %fusion.invar_address.dim.0.03 = phi i64 [ 0, %entry ], [ %invar.inc3, %fusion.loop_header.dim.1.preheader ]
> -  %3 = getelementptr inbounds [2 x [1 x [4 x float]]], [2 x [1 x [4 x float]]]* null, i64 0, i64 %fusion.invar_address.dim.0.03, i64 0, i64 2
> -  %4 = load float, float* %3, align 4, !invariant.load !1
> -  %5 = fmul float %4, %4
> -  %6 = getelementptr inbounds [2 x [1 x [4 x float]]], [2 x [1 x [4 x float]]]* null, i64 0, i64 %fusion.invar_address.dim.0.03, i64 0, i64 2
> -  %7 = load float, float* %6, align 4, !invariant.load !1
> -  %8 = fmul float %7, %7
> -  %invar.inc3 = add nuw nsw i64 %fusion.invar_address.dim.0.03, 1
> -  br label %fusion.loop_header.dim.1.preheader
> -}
> -
> -!1 = !{}
>
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits